From 9b7a7f4fd35d9c86cc4cf05598d6cc6001342744 Mon Sep 17 00:00:00 2001 From: Iago <6860957+igonro@users.noreply.github.com> Date: Tue, 8 Mar 2022 10:19:09 +0100 Subject: [PATCH 1/2] Add failing tests for issue #412 --- .../tests/core/generic.test.js | 60 ++++++++++++++++++- .../tests/core/indexing.test.js | 16 +++++ src/danfojs-node/test/core/generic.test.ts | 60 ++++++++++++++++++- src/danfojs-node/test/core/indexing.test.ts | 17 ++++++ 4 files changed, 151 insertions(+), 2 deletions(-) diff --git a/src/danfojs-browser/tests/core/generic.test.js b/src/danfojs-browser/tests/core/generic.test.js index 44683e5a..78ccac0f 100644 --- a/src/danfojs-browser/tests/core/generic.test.js +++ b/src/danfojs-browser/tests/core/generic.test.js @@ -407,7 +407,12 @@ describe("Generic (NDFrame)", function () { describe("Empty NDFrame", function () { it("Can successfully create an empty NDframe from empty array", function () { let data = []; - let sf = new dfd.NDframe({ data, isSeries: false }); + let df = new dfd.NDframe({ data, isSeries: false }); + assert.deepEqual(df.shape, [ 0, 0 ]); + assert.deepEqual(df.columns, []); + assert.deepEqual(df.dtypes, []); + assert.deepEqual(df.values, []); + let sf = new dfd.NDframe({ data, isSeries: true }); assert.deepEqual(sf.shape, [ 0, 0 ]); assert.deepEqual(sf.columns, []); assert.deepEqual(sf.dtypes, []); @@ -415,6 +420,11 @@ describe("Generic (NDFrame)", function () { }); it("Can successfully create an empty NDframe from undefined data", function () { let data = undefined; + let df = new dfd.NDframe({ data, isSeries: false }); + assert.deepEqual(df.shape, [ 0, 0 ]); + assert.deepEqual(df.columns, []); + assert.deepEqual(df.dtypes, []); + assert.deepEqual(df.values, []); let sf = new dfd.NDframe({ data, isSeries: true }); assert.deepEqual(sf.shape, [ 0, 0 ]); assert.deepEqual(sf.columns, []); @@ -423,12 +433,60 @@ describe("Generic (NDFrame)", function () { }); it("Can successfully create an empty NDframe", function () { + let df = new dfd.NDframe({ isSeries: false }); + assert.deepEqual(df.shape, [ 0, 0 ]); + assert.deepEqual(df.columns, []); + assert.deepEqual(df.dtypes, []); + assert.deepEqual(df.values, []); let sf = new dfd.NDframe({ isSeries: true }); assert.deepEqual(sf.shape, [ 0, 0 ]); assert.deepEqual(sf.columns, []); assert.deepEqual(sf.dtypes, []); assert.deepEqual(sf.values, []); }); + + it("Can successfully create an empty NDframe with columns names", function () { + let data = []; + let df = new dfd.NDframe({ data, columns: [ "A", "B", "C" ], isSeries: false }); + assert.deepEqual(df.shape, [ 0, 3 ]); + assert.deepEqual(df.columns, [ "A", "B", "C" ]); + assert.deepEqual(df.dtypes, []); + assert.deepEqual(df.values, []); + let sf = new dfd.NDframe({ data, columns: [ "A" ], isSeries: true }); + assert.deepEqual(sf.shape, [ 0, 1 ]); + assert.deepEqual(sf.columns, [ "A" ]); + assert.deepEqual(sf.dtypes, []); + assert.deepEqual(sf.values, []); + }); + + it("Can successfully create an empty NDframe with columns names and dtypes", function () { + let data = []; + let df = new dfd.NDframe({ data, columns: [ "A", "B", "C" ], dtypes: [ "string", "string", "int32" ], isSeries: false }); + assert.deepEqual(df.shape, [ 0, 3 ]); + assert.deepEqual(df.columns, [ "A", "B", "C" ]); + assert.deepEqual(df.dtypes, [ "string", "string", "int32" ]); + assert.deepEqual(df.values, []); + let sf = new dfd.NDframe({ data, columns: [ "A" ], dtypes: [ "string" ], isSeries: true }); + assert.deepEqual(sf.shape, [ 0, 1 ]); + assert.deepEqual(sf.columns, [ "A" ]); + assert.deepEqual(sf.dtypes, [ "string" ]); + assert.deepEqual(sf.values, []); + }); + + it("Cannot successfully create an empty NDframe with only columns dtypes", function () { + let data = []; + assert.throws( + () => new dfd.NDframe({ data, dtypes: [ "string", "string", "int32" ], isSeries: false }), + Error, + "DtypeError: columns parameter must be provided when dtypes parameter is provided" + ); + assert.throws( + () => new dfd.NDframe({ data, dtypes: [ "string" ], isSeries: true }), + Error, + "DtypeError: columns parameter must be provided when dtypes parameter is provided" + ); + }); + }); }); diff --git a/src/danfojs-browser/tests/core/indexing.test.js b/src/danfojs-browser/tests/core/indexing.test.js index 48602c6c..535759ab 100644 --- a/src/danfojs-browser/tests/core/indexing.test.js +++ b/src/danfojs-browser/tests/core/indexing.test.js @@ -408,6 +408,22 @@ describe("Iloc and Loc based Indexing", function () { assert.deepEqual(subDf.values, result); }); + + it("loc with no matches create a Empty DataFrame conserving columns information", function () { + const data = { + "Name": [ "Apples", "Mango", "Banana", "Pear" ], + "Count": [ 21, 5, 30, 10 ], + "Price": [ 200, 300, 40, 250 ] + }; + const df = new dfd.DataFrame(data); + const subDf = df.loc({ rows: df["Count"].gt(50) }); + + assert.deepEqual(subDf.values, []); + assert.deepEqual(subDf.shape, [ 0, 3 ]); + assert.deepEqual(subDf.columns, [ "Name", "Count", "Price" ]); + assert.deepEqual(subDf.dtypes, [ "string", "int32", "int32" ]); + + }); }); }); diff --git a/src/danfojs-node/test/core/generic.test.ts b/src/danfojs-node/test/core/generic.test.ts index a7c390a0..fe5b0710 100644 --- a/src/danfojs-node/test/core/generic.test.ts +++ b/src/danfojs-node/test/core/generic.test.ts @@ -344,7 +344,12 @@ describe("Generic (NDFrame)", function () { describe("Empty NDFrame", function () { it("Can successfully create an empty NDframe from empty array", function () { let data: any = []; - let sf = new NDframe({ data, isSeries: false }); + let df = new NDframe({ data, isSeries: false }); + assert.deepEqual(df.shape, [0, 0]); + assert.deepEqual(df.columns, []); + assert.deepEqual(df.dtypes, []); + assert.deepEqual(df.values, []); + let sf = new NDframe({ data, isSeries: true }); assert.deepEqual(sf.shape, [0, 0]); assert.deepEqual(sf.columns, []); assert.deepEqual(sf.dtypes, []); @@ -352,6 +357,11 @@ describe("Generic (NDFrame)", function () { }); it("Can successfully create an empty NDframe from undefined data", function () { let data = undefined; + let df = new NDframe({ data, isSeries: false }); + assert.deepEqual(df.shape, [0, 0]); + assert.deepEqual(df.columns, []); + assert.deepEqual(df.dtypes, []); + assert.deepEqual(df.values, []); let sf = new NDframe({ data, isSeries: true }); assert.deepEqual(sf.shape, [0, 0]); assert.deepEqual(sf.columns, []); @@ -360,11 +370,59 @@ describe("Generic (NDFrame)", function () { }); it("Can successfully create an empty NDframe", function () { + let df = new NDframe({ data: [], isSeries: false }); + assert.deepEqual(df.shape, [0, 0]); + assert.deepEqual(df.columns, []); + assert.deepEqual(df.dtypes, []); + assert.deepEqual(df.values, []); let sf = new NDframe({ data: [], isSeries: true }); assert.deepEqual(sf.shape, [0, 0]); assert.deepEqual(sf.columns, []); assert.deepEqual(sf.dtypes, []); assert.deepEqual(sf.values, []); }); + + it("Can successfully create an empty NDframe with columns names", function () { + let data: any = []; + let df = new NDframe({ data, columns: [ "A", "B", "C" ], isSeries: false }); + assert.deepEqual(df.shape, [ 0, 3 ]); + assert.deepEqual(df.columns, [ "A", "B", "C" ]); + assert.deepEqual(df.dtypes, []); + assert.deepEqual(df.values, []); + let sf = new NDframe({ data, columns: [ "A" ], isSeries: true }); + assert.deepEqual(sf.shape, [ 0, 1 ]); + assert.deepEqual(sf.columns, [ "A" ]); + assert.deepEqual(sf.dtypes, []); + assert.deepEqual(sf.values, []); + }); + + it("Can successfully create an empty NDframe with columns names and dtypes", function () { + let data: any = []; + let df = new NDframe({ data, columns: [ "A", "B", "C" ], dtypes: [ "string", "string", "int32" ], isSeries: false }); + assert.deepEqual(df.shape, [ 0, 3 ]); + assert.deepEqual(df.columns, [ "A", "B", "C" ]); + assert.deepEqual(df.dtypes, [ "string", "string", "int32" ]); + assert.deepEqual(df.values, []); + let sf = new NDframe({ data, columns: [ "A" ], dtypes: [ "string" ], isSeries: true }); + assert.deepEqual(sf.shape, [ 0, 1 ]); + assert.deepEqual(sf.columns, [ "A" ]); + assert.deepEqual(sf.dtypes, [ "string" ]); + assert.deepEqual(sf.values, []); + }); + + it("Cannot successfully create an empty NDframe with only columns dtypes", function () { + let data: any = []; + assert.throws( + () => new NDframe({ data, dtypes: [ "string", "string", "int32" ], isSeries: false }), + Error, + "DtypeError: columns parameter must be provided when dtypes parameter is provided" + ); + assert.throws( + () => new NDframe({ data, dtypes: [ "string" ], isSeries: true }), + Error, + "DtypeError: columns parameter must be provided when dtypes parameter is provided" + ); + }); + }); }); diff --git a/src/danfojs-node/test/core/indexing.test.ts b/src/danfojs-node/test/core/indexing.test.ts index 70327449..1164cf14 100644 --- a/src/danfojs-node/test/core/indexing.test.ts +++ b/src/danfojs-node/test/core/indexing.test.ts @@ -411,6 +411,23 @@ describe("Iloc and Loc based Indexing", function () { assert.deepEqual(subDf.values, result); }); + + it("loc with no matches create a Empty DataFrame conserving columns information", function () { + const data = { + "Name": [ "Apples", "Mango", "Banana", "Pear" ], + "Count": [ 21, 5, 30, 10 ], + "Price": [ 200, 300, 40, 250 ] + }; + const df = new DataFrame(data); + const subDf = df.loc({ rows: df["Count"].gt(50) }); + + assert.deepEqual(subDf.values, []); + assert.deepEqual(subDf.shape, [ 0, 3 ]); + assert.deepEqual(subDf.columns, [ "Name", "Count", "Price" ]); + assert.deepEqual(subDf.dtypes, [ "string", "int32", "int32" ]); + + }); + }) }); \ No newline at end of file From b97bc7da8b7f757f030b193a71439213c583388c Mon Sep 17 00:00:00 2001 From: Iago <6860957+igonro@users.noreply.github.com> Date: Tue, 8 Mar 2022 10:19:56 +0100 Subject: [PATCH 2/2] Add feature for allowing empty dataframes with column information (issue #412) --- src/danfojs-base/core/generic.ts | 13 ++++++++++--- src/danfojs-base/shared/errors.ts | 5 +++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/danfojs-base/core/generic.ts b/src/danfojs-base/core/generic.ts index 3f58fd0f..d5665d1b 100644 --- a/src/danfojs-base/core/generic.ts +++ b/src/danfojs-base/core/generic.ts @@ -72,7 +72,10 @@ export default class NDframe implements NDframeInterface { } if (data === undefined || (Array.isArray(data) && data.length === 0)) { - this.loadArrayIntoNdframe({ data: [], index: [], columns: [], dtypes: [] }); + if (columns === undefined) columns = []; + if (dtypes === undefined) dtypes = []; + if (columns.length === 0 && dtypes.length !== 0) ErrorThrower.throwDtypeWithoutColumnError(); + this.loadArrayIntoNdframe({ data: [], index: [], columns: columns, dtypes: dtypes }); } else if (utils.is1DArray(data)) { this.loadArrayIntoNdframe({ data, index, columns, dtypes }); } else { @@ -306,6 +309,7 @@ export default class NDframe implements NDframeInterface { */ $setColumnNames(columns?: string[]) { + // console.log(columns); if (this.$isSeries) { if (columns) { if (this.$data.length != 0 && columns.length != 1 && typeof columns != 'string') { @@ -322,7 +326,7 @@ export default class NDframe implements NDframeInterface { ErrorThrower.throwColumnNamesLengthError(this, columns) } - if (Array.from(new Set(columns)).length !== this.shape[1]) { + if (Array.from(new Set(columns)).length !== columns.length) { ErrorThrower.throwColumnDuplicateError() } @@ -337,7 +341,10 @@ export default class NDframe implements NDframeInterface { * Returns the shape of the NDFrame. Shape is determined by [row length, column length] */ get shape(): Array { - if (this.$data.length === 0) return [0, 0] + if (this.$data.length === 0) { + if (this.$columns.length === 0) return [0, 0]; + else return [0, this.$columns.length]; + } if (this.$isSeries) { return [this.$data.length, 1]; } else { diff --git a/src/danfojs-base/shared/errors.ts b/src/danfojs-base/shared/errors.ts index cdd256d6..92a324cb 100644 --- a/src/danfojs-base/shared/errors.ts +++ b/src/danfojs-base/shared/errors.ts @@ -51,6 +51,11 @@ class ErrorThrower { throw new Error(msg) } + throwDtypeWithoutColumnError = (): void => { + const msg = `DtypeError: columns parameter must be provided when dtypes parameter is provided` + throw new Error(msg) + } + throwColumnLengthError = (ndframe: NDframe | DataFrame, arrLen: number): void => { const msg = `ParamError: Column data length mismatch. You provided data with length ${arrLen} but Ndframe has column of length ${ndframe.shape[1]}` throw new Error(msg)