From 7fbea07431dbd194259e44e868df74617814f6de Mon Sep 17 00:00:00 2001 From: Dan Crescimanno Date: Tue, 3 May 2022 21:51:56 -0700 Subject: [PATCH 1/4] feat: not complete serialization --- src/cluster/KMeans.test.ts | 16 +- src/cluster/KMeans.ts | 2 +- src/compose/ColumnTransformer.ts | 7 +- src/dummy/DummyClassifier.test.ts | 14 +- src/dummy/DummyRegressor.test.ts | 12 +- src/impute/SimpleImputer.test.ts | 21 +- src/index.ts | 8 + src/linear_model/LinearRegression.test.ts | 8 +- src/linear_model/LogisticRegression.test.ts | 6 +- src/linear_model/SgdClassifier.ts | 9 + src/linear_model/SgdRegressor.ts | 10 + src/mixins.ts | 3 +- src/naive_bayes/GaussianNB.test.ts | 5 +- src/pipeline/Pipeline.test.ts | 6 +- src/preprocessing/MinMaxScaler.test.ts | 8 +- src/simpleSerializer.ts | 223 ++++++++++++++++++++ src/tree/Criterion.test.ts | 24 ++- src/tree/Criterion.ts | 88 ++++---- src/tree/DecisionTree.test.ts | 5 +- src/tree/DecisionTree.ts | 18 +- src/tree/Splitter.test.ts | 90 ++++++-- src/tree/Splitter.ts | 46 ++-- 22 files changed, 489 insertions(+), 140 deletions(-) create mode 100644 src/simpleSerializer.ts diff --git a/src/cluster/KMeans.test.ts b/src/cluster/KMeans.test.ts index 6aa15ea9..ab7f65ca 100644 --- a/src/cluster/KMeans.test.ts +++ b/src/cluster/KMeans.test.ts @@ -1,5 +1,5 @@ import { KMeans } from './KMeans' - +import { fromObject } from '../index' // Next steps: Improve on kmeans cluster testing describe('KMeans', () => { const X = [ @@ -38,7 +38,7 @@ describe('KMeans', () => { ) }) - it('should save kmeans model', () => { + it('should save kmeans model', async () => { const expectedResult = { name: 'KMeans', nClusters: 2, @@ -48,7 +48,7 @@ describe('KMeans', () => { randomState: 0, nInit: 10, clusterCenters: { - type: 'Tensor', + name: 'Tensor', value: [ [2.5, 1], [2.5, 4] @@ -57,20 +57,20 @@ describe('KMeans', () => { } const kmean = new KMeans({ nClusters: 2, randomState: 0 }) kmean.fit(X) - const ksave = kmean.toJson() as string + const ksave = await kmean.toObject() - expect(expectedResult).toEqual(JSON.parse(ksave)) + expect(expectedResult).toEqual(ksave) }) - it('should load serialized kmeans model', () => { + it('should load serialized kmeans model', async () => { const centroids = [ [2.5, 1], [2.5, 4] ] const kmean = new KMeans({ nClusters: 2, randomState: 0 }) kmean.fit(X) - const ksave = kmean.toJson() as string - const ksaveModel = new KMeans().fromJson(ksave) + const ksave = await kmean.toObject() + const ksaveModel = await fromObject(ksave) expect(centroids).toEqual(ksaveModel.clusterCenters.arraySync()) }) diff --git a/src/cluster/KMeans.ts b/src/cluster/KMeans.ts index d5810cd1..f154084e 100644 --- a/src/cluster/KMeans.ts +++ b/src/cluster/KMeans.ts @@ -1,6 +1,6 @@ import { Scikit2D } from '../types' import { convertToNumericTensor2D, sampleWithoutReplacement } from '../utils' -import Serialize from '../serialize' +import { Serialize } from '../simpleSerializer' import { tf } from '../shared/globals' /* diff --git a/src/compose/ColumnTransformer.ts b/src/compose/ColumnTransformer.ts index 0f89059a..7109b65b 100644 --- a/src/compose/ColumnTransformer.ts +++ b/src/compose/ColumnTransformer.ts @@ -1,5 +1,6 @@ -import { DataFrameInterface, Scikit1D, Scikit2D, Transformer } from '../types' -import { isDataFrameInterface, isScikitLike2D } from '../typesUtils' +import { DataFrameInterface, Scikit1D, Transformer } from '../types' +import { isDataFrameInterface } from '../typesUtils' +import { Serialize } from '../simpleSerializer' import { tf } from '../shared/globals' /* Next steps: @@ -64,7 +65,7 @@ export interface ColumnTransformerParams { ] * ``` */ -export class ColumnTransformer { +export class ColumnTransformer extends Serialize { transformers: TransformerTriple remainder: Transformer | 'drop' | 'passthrough' diff --git a/src/dummy/DummyClassifier.test.ts b/src/dummy/DummyClassifier.test.ts index 858c5adc..419a986b 100644 --- a/src/dummy/DummyClassifier.test.ts +++ b/src/dummy/DummyClassifier.test.ts @@ -1,5 +1,5 @@ import { DummyClassifier } from './DummyClassifier' - +import { fromObject } from '../simpleSerializer' describe('DummyClassifier', function () { it('Use DummyClassifier on simple example (mostFrequent)', function () { const clf = new DummyClassifier() @@ -51,7 +51,7 @@ describe('DummyClassifier', function () { expect(scaler.classes).toEqual([1, 2, 3]) }) - it('should serialize DummyClassifier', function () { + it('should serialize DummyClassifier', async function () { const clf = new DummyClassifier() const X = [ @@ -70,10 +70,10 @@ describe('DummyClassifier', function () { } clf.fit(X, y) - const clfSave = clf.toJson() as string - expect(expectedResult).toEqual(JSON.parse(clfSave)) + const clfSave = await clf.toObject() + expect(expectedResult).toEqual(clfSave) }) - it('should load DummyClassifier', function () { + it('should load DummyClassifier', async function () { const clf = new DummyClassifier() const X = [ @@ -85,8 +85,8 @@ describe('DummyClassifier', function () { const y = [10, 20, 20, 30] clf.fit(X, y) - const clfSave = clf.toJson() as string - const newClf = new DummyClassifier().fromJson(clfSave) + const clfSave = await clf.toObject() + const newClf = await fromObject(clfSave) expect(clf).toEqual(newClf) }) }) diff --git a/src/dummy/DummyRegressor.test.ts b/src/dummy/DummyRegressor.test.ts index 04299b7e..ebbe652d 100644 --- a/src/dummy/DummyRegressor.test.ts +++ b/src/dummy/DummyRegressor.test.ts @@ -1,5 +1,5 @@ import { DummyRegressor } from './DummyRegressor' - +import { toObject, fromObject } from '../simpleSerializer' describe('DummyRegressor', function () { it('Use DummyRegressor on simple example (mean)', function () { const reg = new DummyRegressor() @@ -55,7 +55,7 @@ describe('DummyRegressor', function () { reg.fit(X, y) expect(reg.predict(predictX).arraySync()).toEqual([10, 10, 10]) }) - it('Should save DummyRegressor', function () { + it('Should save DummyRegressor', async function () { const reg = new DummyRegressor({ strategy: 'constant', constant: 10 }) const X = [ @@ -73,10 +73,10 @@ describe('DummyRegressor', function () { reg.fit(X, y) - expect(saveResult).toEqual(JSON.parse(reg.toJson() as string)) + expect(saveResult).toEqual(await toObject(reg)) }) - it('Should load serialized DummyRegressor', function () { + it('Should load serialized DummyRegressor', async function () { const reg = new DummyRegressor({ strategy: 'constant', constant: 10 }) const X = [ @@ -92,8 +92,8 @@ describe('DummyRegressor', function () { ] reg.fit(X, y) - const saveReg = reg.toJson() as string - const newReg = new DummyRegressor().fromJson(saveReg) + const saveReg = await toObject(reg) + const newReg = await fromObject(saveReg) expect(newReg.predict(predictX).arraySync()).toEqual([10, 10, 10]) }) diff --git a/src/impute/SimpleImputer.test.ts b/src/impute/SimpleImputer.test.ts index fb58af50..c0969a80 100644 --- a/src/impute/SimpleImputer.test.ts +++ b/src/impute/SimpleImputer.test.ts @@ -1,6 +1,6 @@ import { tf } from '../shared/globals' import { SimpleImputer } from './SimpleImputer' - +import { toObject, fromObject } from '../simpleSerializer' describe('SimpleImputer', function () { it('Imputes with "constant" strategy 2D one column. In this strategy, we give the fill value', function () { const imputer = new SimpleImputer({ strategy: 'constant', fillValue: 3 }) @@ -119,7 +119,7 @@ describe('SimpleImputer', function () { expect(returned.arraySync()).toEqual(expected) expect(imputer.transform([[NaN, NaN]]).arraySync()).toEqual([[4, 3]]) }) - it('Should serialized Imputer', function () { + it('Should serialized Imputer', async function () { const imputer = new SimpleImputer({ strategy: 'mostFrequent' }) const data = [ @@ -129,21 +129,21 @@ describe('SimpleImputer', function () { [4, 2], [6, NaN] ] - const expected = { name: 'SimpleImputer', - missingValues: null, + missingValues: NaN, + fillValue: undefined, strategy: 'mostFrequent', statistics: { - type: 'Tensor', + name: 'Tensor', value: [4, 3] } } - const returned = imputer.fitTransform(data) - expect(JSON.parse(imputer.toJson() as string)).toEqual(expected) + imputer.fitTransform(data) + expect(await toObject(imputer)).toEqual(expected) }) - it('Should load serialized Imputer', function () { + it('Should load serialized Imputer', async function () { const imputer = new SimpleImputer({ strategy: 'mostFrequent' }) const data = [ @@ -162,8 +162,9 @@ describe('SimpleImputer', function () { [6, 3] ] - const returned = imputer.fitTransform(data) - const newImputer = new SimpleImputer().fromJson(imputer.toJson() as string) + imputer.fitTransform(data) + const thing = await toObject(imputer) + const newImputer = await fromObject(thing) const newReturned = newImputer.transform(data) expect(newReturned.arraySync()).toEqual(expected) expect(newImputer.transform([[NaN, NaN]]).arraySync()).toEqual([[4, 3]]) diff --git a/src/index.ts b/src/index.ts index 6ab8fb07..22917ec5 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,3 +1,5 @@ +import Serialize from './serialize' + /** * @license * Copyright 2021, JsData. All rights reserved. @@ -83,3 +85,9 @@ export { DecisionTreeRegressor, DecisionTreeRegressorParams } from './tree/DecisionTree' + +export { fromObject, Serialize } from './simpleSerializer' + +export { ClassificationCriterion, RegressionCriterion } from './tree/Criterion' +export { Splitter } from './tree/Splitter' +export { DecisionTreeBase, DecisionTree } from './tree/DecisionTree' diff --git a/src/linear_model/LinearRegression.test.ts b/src/linear_model/LinearRegression.test.ts index a4d67295..1753fa4c 100644 --- a/src/linear_model/LinearRegression.test.ts +++ b/src/linear_model/LinearRegression.test.ts @@ -1,7 +1,7 @@ import { LinearRegression } from './LinearRegression' import { tensorEqual } from '../utils' import { tf } from '../shared/globals' - +import { toObject, fromObject } from '../simpleSerializer' function roughlyEqual(a: number, b: number, tol = 0.1) { return Math.abs(a - b) < tol } @@ -148,8 +148,10 @@ describe('LinearRegression', function () { const lr = new LinearRegression({ fitIntercept: false }) await lr.fit(mediumX, yPlusJitter) - const serialized = await lr.toJson() - const newModel = new LinearRegression({}).fromJson(serialized) + const serialized = await lr.toObject() + console.log({ serialized }) + const newModel = await fromObject(serialized) + console.log(newModel) expect(tensorEqual(newModel.coef, tf.tensor1d([2.5, 1]), 0.1)).toBe(true) expect(roughlyEqual(newModel.intercept as number, 0)).toBe(true) diff --git a/src/linear_model/LogisticRegression.test.ts b/src/linear_model/LogisticRegression.test.ts index c00db0fa..3dde416f 100644 --- a/src/linear_model/LogisticRegression.test.ts +++ b/src/linear_model/LogisticRegression.test.ts @@ -1,6 +1,6 @@ import { LogisticRegression } from './LogisticRegression' import { tf } from '../shared/globals' - +import { fromObject } from '../simpleSerializer' describe('LogisticRegression', function () { it('Works on arrays (small example)', async function () { const lr = new LogisticRegression() @@ -133,8 +133,8 @@ describe('LogisticRegression', function () { let logreg = new LogisticRegression({ penalty: 'l2' }) await logreg.fit(X, y) - const serializeModel = await logreg.toJson() - const newModel = logreg.fromJson(serializeModel) + const serializeModel = await logreg.toObject() + const newModel = await fromObject(serializeModel) const newModelResult = newModel.predict(Xtest) expect(newModelResult.arraySync()).toEqual([0, 0, 0, 0, 0, 0, 2, 2, 2]) diff --git a/src/linear_model/SgdClassifier.ts b/src/linear_model/SgdClassifier.ts index b5900a39..e3579da6 100644 --- a/src/linear_model/SgdClassifier.ts +++ b/src/linear_model/SgdClassifier.ts @@ -413,4 +413,13 @@ export class SGDClassifier extends ClassifierMixin { public fromJson(model: string) { return fromJson(this, model) as this } + + // public async toObject(): Promise { + // let { toObject } = await import('../simpleSerializer') + // return await toObject(this, [ + // 'modelCompileArgs', + // 'modelFitArgs', + // 'denseLayerArgs' + // ]) + // } } diff --git a/src/linear_model/SgdRegressor.ts b/src/linear_model/SgdRegressor.ts index 917ac73c..d3bad4e4 100644 --- a/src/linear_model/SgdRegressor.ts +++ b/src/linear_model/SgdRegressor.ts @@ -21,6 +21,7 @@ import { import { Scikit2D, Scikit1D, OptimizerTypes, LossTypes } from '../types' import { RegressorMixin } from '../mixins' import { fromJson, toJSON } from './modelSerializer' + /** * SGD is a thin Wrapper around Tensorflow's model api with a single dense layer. * With this base class and different error functions / regularizers we can @@ -208,6 +209,15 @@ export class SGDRegressor extends RegressorMixin { return this } + public async toObject(): Promise { + let { toObject } = await import('../simpleSerializer') + return await toObject(this, [ + 'modelCompileArgs', + 'modelFitArgs', + 'denseLayerArgs' + ]) + } + /** * Similar to scikit-learn, this returns the object of configuration params for SGD * @returns {SGDRegressorParams} Returns an object of configuration params. diff --git a/src/mixins.ts b/src/mixins.ts index d826e455..38d9de8e 100644 --- a/src/mixins.ts +++ b/src/mixins.ts @@ -1,6 +1,7 @@ import { Scikit2D, Scikit1D } from './types' import { r2Score, accuracyScore } from './metrics/metrics' -import Serialize from './serialize' +// import Serialize from './serialize' +import { Serialize } from './simpleSerializer' import { tf } from './shared/globals' export class TransformerMixin extends Serialize { // We assume that fit and transform exist diff --git a/src/naive_bayes/GaussianNB.test.ts b/src/naive_bayes/GaussianNB.test.ts index 627018e8..abce7a22 100644 --- a/src/naive_bayes/GaussianNB.test.ts +++ b/src/naive_bayes/GaussianNB.test.ts @@ -13,6 +13,7 @@ * ========================================================================== */ import { GaussianNB } from './GaussianNB' +import { toObject, fromObject } from '../simpleSerializer' describe('GaussianNB', function () { it('without priors', async () => { @@ -101,8 +102,8 @@ describe('GaussianNB', function () { await model.fit(X, y) const labels = model.predict(X) - const serializeModel = model.toJson() - const newModel = new GaussianNB().fromJson(serializeModel) + const serializeModel = await toObject(model) + const newModel = await fromObject(serializeModel) expect(newModel.predict(X).arraySync()).toEqual([0, 0, 1, 1, 1]) }) }) diff --git a/src/pipeline/Pipeline.test.ts b/src/pipeline/Pipeline.test.ts index 52ead335..7c10306b 100644 --- a/src/pipeline/Pipeline.test.ts +++ b/src/pipeline/Pipeline.test.ts @@ -4,6 +4,7 @@ import { tensorEqual } from '../utils' import { LinearRegression } from '../linear_model/LinearRegression' import { SimpleImputer } from '../impute/SimpleImputer' import { MinMaxScaler } from '../preprocessing/MinMaxScaler' +import { toObject, fromObject } from '../simpleSerializer' describe('Pipeline', function () { it('Use a Pipeline (min-max scaler, and linear regression)', async function () { @@ -96,8 +97,9 @@ describe('Pipeline', function () { await pipeline.fit(X, y) - const saveModel = (await pipeline.toJson()) as string - const newPipeLine = new Pipeline().fromJson(saveModel) + const saveModel = await toObject(pipeline) + console.log(saveModel) + const newPipeLine = await fromObject(saveModel) expect(newPipeLine.steps[1][1].min.arraySync()).toEqual([0, 0]) expect( diff --git a/src/preprocessing/MinMaxScaler.test.ts b/src/preprocessing/MinMaxScaler.test.ts index 7b15cc47..6e995b47 100644 --- a/src/preprocessing/MinMaxScaler.test.ts +++ b/src/preprocessing/MinMaxScaler.test.ts @@ -3,7 +3,7 @@ import * as dfd from 'danfojs-node' import { isDataFrameInterface, isSeriesInterface } from '../typesUtils' import { ScikitVecOrMatrix } from '../types' import { tf } from '../shared/globals' - +import { toObject, fromObject } from '../simpleSerializer' export function convertTensorToInputType( tensor: tf.Tensor, inputData: ScikitVecOrMatrix @@ -161,12 +161,12 @@ describe('MinMaxscaler', function () { 0 ]) }) - it('Serialize and unserialize MinMaxScaler', function () { + it('Serialize and unserialize MinMaxScaler', async function () { const data = tf.tensor2d([4, 4, 'whoops', 3, 3] as any, [5, 1]) const scaler = new MinMaxScaler() scaler.fit(data) - const serial = scaler.toJson() as string - const newModel = new MinMaxScaler().fromJson(serial) + const serial = (await toObject(scaler)) as string + const newModel = await fromObject(serial) expect(newModel.transform(data).arraySync().flat()).toEqual([ 1, 1, diff --git a/src/simpleSerializer.ts b/src/simpleSerializer.ts new file mode 100644 index 00000000..ca8ee217 --- /dev/null +++ b/src/simpleSerializer.ts @@ -0,0 +1,223 @@ +import { tf } from './shared/globals' + +const EstimatorList = [ + 'KNeighborsRegressor', + 'LinearRegression', + 'LassoRegression', + 'RidgeRegression', + 'ElasticNet', + 'LogisticRegression', + 'DummyRegressor', + 'DummyClassifier', + 'MinMaxScaler', + 'StandardScaler', + 'MaxAbsScaler', + 'SimpleImputer', + 'OneHotEncoder', + 'LabelEncoder', + 'OrdinalEncoder', + 'Normalizer', + 'Pipeline', + 'ColumnTransformer', + 'RobustScaler', + 'KMeans', + 'VotingRegressor', + 'VotingClassifier', + 'LinearSVC', + 'LinearSVR', + 'GaussianNB', + 'DecisionTreeClassifier', + 'DecisionTreeRegressor', + 'ClassificationCriterion', + 'RegressionCriterion', + 'Splitter', + 'DecisionTreeBase', + 'DecisionTree' +] + +/** + * 1. Make a list called EstimatorList + * 2. Do a dynamic import here + */ + +class JSONHandler { + savedArtifacts: any + constructor(artifacts?: any) { + this.savedArtifacts = artifacts || null + } + + async save(artifacts: any) { + // Base 64 encoding + this.savedArtifacts = artifacts + return { + modelArtifactsInfo: { + dateSaved: new Date(), + modelTopologyType: 'JSON', + modelTopologyBytes: JSON.stringify(artifacts.modelTopology).length, + weightSpecsBytes: JSON.stringify(artifacts.weightSpecs).length, + weightDataBytes: artifacts.weightData.byteLength + } + } + } + + async load() { + // Base64 decode + return this.savedArtifacts + } +} + +export async function toObjectInner( + val: any, + ignoreKeys: string[] = [] +): Promise { + // console.log(val) + if (['number', 'string', 'undefined', 'boolean'].includes(typeof val)) { + return val + } + + if (typeof val === 'function') { + console.warn( + `warning: Serializing function ${val}. Not going to be able to deserialize this later.` + ) + if (val.name) { + return val.name + } + } + + if (typeof val === 'object') { + // Null case + if (val === null) { + return null + } + // Array case + if (Array.isArray(val)) { + return await Promise.all(val.map(async (el) => await toObjectInner(el))) + } + + // Serialize a Tensor + if (val instanceof tf.Tensor) { + return { + name: 'Tensor', + value: val.arraySync() + } + } + + // Int32Array serialization. Used for DecisionTrees + if (val instanceof Int32Array) { + return { + name: 'Int32Array', + value: Array.from(val) + } + } + + // The tf object + if (val.ENV && val.AdadeltaOptimizer && val.version) { + return { + name: 'TF', + version: val.version.tfjs + } + } + + // tf.layers model + if (val instanceof tf.Sequential) { + let mem = new JSONHandler() + await val.save(mem as any) + return { + name: 'Sequential', + artifacts: mem.savedArtifacts + } + } + + if (EstimatorList.includes(val.name)) { + if (val.toObject) { + return val.toObject() + } + } + + // Generic object case / class case + let response: any = {} + for (let key of Object.keys(val)) { + // Ignore all the keys that we choose to + if (ignoreKeys.includes(key)) { + continue + } + // Ignore any function when we serialize + // if (typeof val[key] === 'function') { + // continue + // } + response[key] = await toObjectInner(val[key]) + } + return response + } +} + +export async function fromObjectInner(val: any): Promise { + // Ignores all types that aren't objects + if (typeof val !== 'object') { + return val + } + + // Null case + if (val === null) { + return null + } + + // Make a Tensor + if (val.name === 'Tensor') { + return tf.tensor(val.value) + } + + if (val.name === 'Sequential') { + let newMem = new JSONHandler(val.artifacts) + return await tf.loadLayersModel(newMem as any) + } + + if (val.name === 'Int32Array') { + return new Int32Array(val.value) + } + + // Array case + if (Array.isArray(val)) { + return await Promise.all(val.map(async (el) => await fromObjectInner(el))) + } + + // Generic object case + for (let key of Object.keys(val)) { + val[key] = await fromObjectInner(val[key]) + } + + // Make a model + if (EstimatorList.includes(val.name)) { + // Do dynamic import to avoid circular dependency tree + // Every class extends this class and therefor it + // can't import those classes in here + let module = await import('./index') + let model = (module as any)[val.name] + + let resultObj = new model(val) + for (let key of Object.keys(val)) { + resultObj[key] = val[key] + } + return resultObj + } + + return val +} + +export async function fromObject(val: any): Promise { + try { + return await fromObjectInner(val) + } catch (e) { + console.error(e) + } +} + +export class Serialize { + async toObject(ignoreKeys: string[] = []): Promise { + try { + return await toObjectInner(this, ignoreKeys) + } catch (e) { + console.error(e) + } + } +} diff --git a/src/tree/Criterion.test.ts b/src/tree/Criterion.test.ts index c58617fc..e2ed5a32 100644 --- a/src/tree/Criterion.test.ts +++ b/src/tree/Criterion.test.ts @@ -1,5 +1,5 @@ import { ClassificationCriterion, giniCoefficient, entropy } from './Criterion' - +import { toObject, fromObject } from '../simpleSerializer' describe('Criterion', function () { let X = [ [-2, -1], @@ -15,7 +15,7 @@ describe('Criterion', function () { sampleMap[i] = i } it('Use the criterion (init)', async function () { - let criterion = new ClassificationCriterion('gini', y) + let criterion = new ClassificationCriterion({ impurityMeasure: 'gini', y }) criterion.init(0, 6, sampleMap) expect(criterion.start).toEqual(0) @@ -29,7 +29,7 @@ describe('Criterion', function () { expect(criterion.labelFreqsRight[1]).toEqual(0) }, 1000) it('Use the criterion (update)', async function () { - let criterion = new ClassificationCriterion('gini', y) + let criterion = new ClassificationCriterion({ impurityMeasure: 'gini', y }) criterion.init(0, 6, sampleMap) criterion.update(3, sampleMap) @@ -40,20 +40,23 @@ describe('Criterion', function () { expect(criterion.labelFreqsRight[1]).toEqual(3) }, 1000) it('Use the criterion (gini)', async function () { - let criterion = new ClassificationCriterion('gini', y) + let criterion = new ClassificationCriterion({ impurityMeasure: 'gini', y }) criterion.init(0, 6, sampleMap) expect(criterion.nodeImpurity()).toEqual(0.5) }, 1000) it('Use the criterion (entropy)', async function () { - let criterion = new ClassificationCriterion('entropy', y) + let criterion = new ClassificationCriterion({ + impurityMeasure: 'entropy', + y + }) criterion.init(0, 6, sampleMap) expect(criterion.nodeImpurity()).toEqual(1) }, 1000) it('Use the criterion (gini update)', async function () { - let criterion = new ClassificationCriterion('gini', y) + let criterion = new ClassificationCriterion({ impurityMeasure: 'gini', y }) criterion.init(0, 6, sampleMap) criterion.update(4, sampleMap) @@ -75,10 +78,13 @@ describe('Criterion', function () { expect(entropy(labelFreqs, nSamples)).toEqual(0.7219280948873623) }, 1000) it('Use the criterion (entropy)', async function () { - let criterion = new ClassificationCriterion('entropy', y) + let criterion = new ClassificationCriterion({ + impurityMeasure: 'entropy', + y + }) criterion.init(0, 6, sampleMap) - const serial = criterion.toJson() as string - const newCriterion = ClassificationCriterion.fromJson(serial) + const serial = await toObject(criterion) + const newCriterion = await fromObject(serial) expect(newCriterion.nodeImpurity()).toEqual(1) }, 1000) }) diff --git a/src/tree/Criterion.ts b/src/tree/Criterion.ts index d7fd6cae..3f57c2de 100644 --- a/src/tree/Criterion.ts +++ b/src/tree/Criterion.ts @@ -43,7 +43,7 @@ function arrayMax(labels: int[]) { export class ClassificationCriterion extends Serialize { y: int[] impurityMeasure: ImpurityMeasure - impurityFunc: (labelFreqs: int[], nSamples: int) => number + // impurityFunc: (labelFreqs: int[], nSamples: int) => number start: int = 0 end: int = 0 pos: int = 0 @@ -54,21 +54,19 @@ export class ClassificationCriterion extends Serialize { nSamples: int = 0 nSamplesLeft: int = 0 nSamplesRight: int = 0 - name = 'classificationCriterion' - - constructor(impurityMeasure: ImpurityMeasure, y: number[]) { + name = 'ClassificationCriterion' + + constructor({ + impurityMeasure, + y + }: { + impurityMeasure: ImpurityMeasure + y: number[] + }) { super() - assert( - ['gini', 'entropy'].includes(impurityMeasure), - 'Unkown impurity measure. Only supports gini, and entropy' - ) this.impurityMeasure = impurityMeasure - if (this.impurityMeasure === 'gini') { - this.impurityFunc = giniCoefficient - } else { - this.impurityFunc = entropy - } + // This assumes that the labels are 0,1,2,...,(n-1) this.nLabels = arrayMax(y) + 1 this.y = y @@ -116,12 +114,12 @@ export class ClassificationCriterion extends Serialize { } childrenImpurities() { + let impurityFunc = + this.impurityMeasure === 'gini' ? giniCoefficient : entropy + return { - impurityLeft: this.impurityFunc(this.labelFreqsLeft, this.nSamplesLeft), - impurityRight: this.impurityFunc( - this.labelFreqsRight, - this.nSamplesRight - ) + impurityLeft: impurityFunc(this.labelFreqsLeft, this.nSamplesLeft), + impurityRight: impurityFunc(this.labelFreqsRight, this.nSamplesRight) } } @@ -134,7 +132,10 @@ export class ClassificationCriterion extends Serialize { } nodeImpurity() { - return this.impurityFunc(this.labelFreqsTotal, this.nSamples) + let impurityFunc = + this.impurityMeasure === 'gini' ? giniCoefficient : entropy + + return impurityFunc(this.labelFreqsTotal, this.nSamples) } nodeValue() { @@ -143,10 +144,10 @@ export class ClassificationCriterion extends Serialize { static fromJson(model: string) { const jsonClass = JSON.parse(model) - const newModel = new ClassificationCriterion( - jsonClass.impurityMeasure, - jsonClass.y - ) + const newModel = new ClassificationCriterion({ + impurityMeasure: jsonClass.impurityMeasure, + y: jsonClass.y + }) return Object.assign(newModel, jsonClass) } } @@ -154,7 +155,7 @@ export class ClassificationCriterion extends Serialize { export class RegressionCriterion extends Serialize { y: number[] impurityMeasure: 'squared_error' - impurityFunc: (ySquaredSum: number, ySum: number, nSamples: int) => number + // impurityFunc: (ySquaredSum: number, ySum: number, nSamples: int) => number start: int = 0 end: int = 0 pos: int = 0 @@ -167,18 +168,23 @@ export class RegressionCriterion extends Serialize { nSamples: int = 0 nSamplesLeft: int = 0 nSamplesRight: int = 0 - name = 'regressionCriterion' - - constructor(impurityMeasure: 'squared_error', y: number[]) { + name = 'RegressionCriterion' + + constructor({ + impurityMeasure, + y + }: { + impurityMeasure: 'squared_error' + y: number[] + }) { super() - assert( - ['squared_error'].includes(impurityMeasure), - 'Unkown impurity measure. Only supports squared_error' - ) + + // We don't assert in the constructor, we assert in fit in accordance with the sklearn docs // Support MAE one day this.impurityMeasure = impurityMeasure - this.impurityFunc = mse + // We don't set the impurityFunc here because we need it to be serializable as an object + // this.impurityFunc = mse this.y = y } @@ -224,13 +230,15 @@ export class RegressionCriterion extends Serialize { } childrenImpurities() { + // once we get another impurity function we can do a ternary here + let impurityFunc = mse return { - impurityLeft: this.impurityFunc( + impurityLeft: impurityFunc( this.squaredSumLeft, this.sumTotalLeft, this.nSamplesLeft ), - impurityRight: this.impurityFunc( + impurityRight: impurityFunc( this.squaredSumRight, this.sumTotalRight, this.nSamplesRight @@ -247,7 +255,9 @@ export class RegressionCriterion extends Serialize { } nodeImpurity() { - return this.impurityFunc(this.squaredSum, this.sumTotal, this.nSamples) + // once we get another impurity function we can do a ternary here + let impurityFunc = mse + return impurityFunc(this.squaredSum, this.sumTotal, this.nSamples) } nodeValue() { @@ -256,10 +266,10 @@ export class RegressionCriterion extends Serialize { static fromJson(model: string) { const jsonClass = JSON.parse(model) - const newModel = new RegressionCriterion( - jsonClass.impurityMeasure, - jsonClass.y - ) + const newModel = new RegressionCriterion({ + impurityMeasure: jsonClass.impurityMeasure, + y: jsonClass.y + }) return Object.assign(newModel, jsonClass) } } diff --git a/src/tree/DecisionTree.test.ts b/src/tree/DecisionTree.test.ts index 7e1a5207..d2fc7728 100644 --- a/src/tree/DecisionTree.test.ts +++ b/src/tree/DecisionTree.test.ts @@ -1,6 +1,7 @@ import { DecisionTreeClassifier, DecisionTreeRegressor } from './DecisionTree' import { dataUrls } from '../datasets/datasets' import * as dfd from 'danfojs-node' +import { toObject, fromObject } from '../simpleSerializer' describe('DecisionTree', function () { it('Use the DecisionTree (toy)', async function () { @@ -620,8 +621,8 @@ describe('DecisionTree', function () { let tree_classifier = new DecisionTreeClassifier() tree_classifier.fit(X, y) - const serial = tree_classifier.toJson() - const newTree = new DecisionTreeClassifier().fromJson(serial) + const serial = await toObject(tree_classifier) + const newTree = await fromObject(serial) expect(newTree.predict(T)).toEqual(true_result) }, 1000) }) diff --git a/src/tree/DecisionTree.ts b/src/tree/DecisionTree.ts index 292dca08..a4659451 100644 --- a/src/tree/DecisionTree.ts +++ b/src/tree/DecisionTree.ts @@ -48,9 +48,10 @@ function argMax(array: number[]) { return array.map((x, i) => [x, i]).reduce((r, a) => (a[0] > r[0] ? a : r))[1] } -class DecisionTree { +export class DecisionTree { nodes: Node[] = [] isBuilt = false + name = 'DecisionTree' getLeafNodes(X: number[][]): int[] { let leafNodeIds: int[] = [] @@ -140,7 +141,7 @@ interface DecisionTreeBaseParams { minImpurityDecrease?: number } -class DecisionTreeBase extends Serialize { +export class DecisionTreeBase extends Serialize { splitter!: Splitter stack: NodeRecord[] = [] minSamplesLeaf: int @@ -173,6 +174,7 @@ class DecisionTreeBase extends Serialize { this.minImpurityDecrease = minImpurityDecrease this.maxFeaturesNumb = 0 this.tree = new DecisionTree() + this.name = 'DecisionTreeBase' } calcMaxFeatures( nFeatures: int, @@ -203,14 +205,14 @@ class DecisionTreeBase extends Serialize { // CheckNegativeLabels(yptr); this.maxFeaturesNumb = this.calcMaxFeatures(X[0].length, this.maxFeatures) - this.splitter = new Splitter( + this.splitter = new Splitter({ X, y, - this.minSamplesLeaf, - this.criterion, - this.maxFeaturesNumb, - newSamplesSubset - ) + minSamplesLeaf: this.minSamplesLeaf, + impurityMeasure: this.criterion, + maxFeatures: this.maxFeaturesNumb, + samplesSubset: newSamplesSubset + }) // put root node on stack let rootNode: NodeRecord = { diff --git a/src/tree/Splitter.test.ts b/src/tree/Splitter.test.ts index ff7d7a6c..a8b164f1 100644 --- a/src/tree/Splitter.test.ts +++ b/src/tree/Splitter.test.ts @@ -1,5 +1,6 @@ import { ImpurityMeasure } from './Criterion' import { Splitter } from './Splitter' +import { toObject, fromObject } from '../simpleSerializer' describe('Splitter', function () { let types = ['gini', 'entropy', 'squared_error'] @@ -8,7 +9,14 @@ describe('Splitter', function () { let y = [0, 0, 0, 1, 1, 1] types.forEach((type) => { - let splitter = new Splitter(X, y, 1, type as ImpurityMeasure, 1, []) + let splitter = new Splitter({ + X, + y, + minSamplesLeaf: 1, + impurityMeasure: type as ImpurityMeasure, + maxFeatures: 1, + samplesSubset: [] + }) let bestSplit = splitter.splitNode() expect(bestSplit.threshold).toEqual(0) @@ -21,7 +29,14 @@ describe('Splitter', function () { let y = [1, 1, 0, 1, 1, 1] types.forEach((type) => { - let splitter = new Splitter(X, y, 1, type as ImpurityMeasure, 1, []) + let splitter = new Splitter({ + X, + y, + minSamplesLeaf: 1, + impurityMeasure: type as ImpurityMeasure, + maxFeatures: 1, + samplesSubset: [] + }) let bestSplit = splitter.splitNode() expect(bestSplit.threshold).toEqual(0) @@ -34,7 +49,14 @@ describe('Splitter', function () { let y = [1, 0, 1, 1, 1, 1] types.forEach((type) => { - let splitter = new Splitter(X, y, 1, type as ImpurityMeasure, 1, []) + let splitter = new Splitter({ + X, + y, + minSamplesLeaf: 1, + impurityMeasure: type as ImpurityMeasure, + maxFeatures: 1, + samplesSubset: [] + }) let bestSplit = splitter.splitNode() expect(bestSplit.threshold).toEqual(-0.5) expect(bestSplit.feature).toEqual(0) @@ -47,7 +69,14 @@ describe('Splitter', function () { let y = [1, 1, 1, 1, 2, 2, 2, 2] types.forEach((type) => { - let splitter = new Splitter(X, y, 1, type as ImpurityMeasure, 1, []) + let splitter = new Splitter({ + X, + y, + minSamplesLeaf: 1, + impurityMeasure: type as ImpurityMeasure, + maxFeatures: 1, + samplesSubset: [] + }) let bestSplit = splitter.splitNode() expect(bestSplit.foundSplit).toEqual(false) expect(bestSplit.threshold).toEqual(0) @@ -60,7 +89,14 @@ describe('Splitter', function () { let y = [1, 1, 1, 2, 2, 2, 2, 2] types.forEach((type) => { - let splitter = new Splitter(X, y, 4, type as ImpurityMeasure, 1, []) + let splitter = new Splitter({ + X, + y, + minSamplesLeaf: 4, + impurityMeasure: type as ImpurityMeasure, + maxFeatures: 1, + samplesSubset: [] + }) let bestSplit = splitter.splitNode() expect(bestSplit.foundSplit).toEqual(true) expect(bestSplit.feature).toEqual(0) @@ -73,7 +109,14 @@ describe('Splitter', function () { let y = [1, 1, 1, 2, 2, 2, 2, 2] types.forEach((type) => { - let splitter = new Splitter(X, y, 4, type as ImpurityMeasure, 1, []) + let splitter = new Splitter({ + X, + y, + minSamplesLeaf: 4, + impurityMeasure: type as ImpurityMeasure, + maxFeatures: 1, + samplesSubset: [] + }) let bestSplit = splitter.splitNode() expect(bestSplit.foundSplit).toEqual(true) expect(bestSplit.feature).toEqual(0) @@ -95,7 +138,14 @@ describe('Splitter', function () { let y = [1, 1, 1, 1, 2, 2, 2, 2] types.forEach((type) => { - let splitter = new Splitter(X, y, 1, type as ImpurityMeasure, 20, []) + let splitter = new Splitter({ + X, + y, + minSamplesLeaf: 1, + impurityMeasure: type as ImpurityMeasure, + maxFeatures: 20, + samplesSubset: [] + }) let bestSplit = splitter.splitNode() expect(bestSplit.foundSplit).toEqual(true) expect(bestSplit.feature).toEqual(1) @@ -117,7 +167,14 @@ describe('Splitter', function () { let y = [2, 1, 1, 2, 1, 2, 2, 1] types.forEach((type) => { - let splitter = new Splitter(X, y, 1, type as ImpurityMeasure, 20, []) + let splitter = new Splitter({ + X, + y, + minSamplesLeaf: 1, + impurityMeasure: type as ImpurityMeasure, + maxFeatures: 20, + samplesSubset: [] + }) let bestSplit = splitter.splitNode() expect(bestSplit.foundSplit).toEqual(true) expect(bestSplit.feature).toEqual(1) @@ -137,11 +194,18 @@ describe('Splitter', function () { [0, 1] ] let y = [2, 1, 1, 2, 1, 2, 2, 1] - let splitter = new Splitter(X, y, 1, 'gini', 20, []) - let bestSplit = splitter.splitNode() - const serial = splitter.toJson() - const newSpliter = Splitter.fromJson(serial) - const newBestSplitter = newSpliter.splitNode() + let splitter = new Splitter({ + X, + y, + minSamplesLeaf: 1, + impurityMeasure: 'gini', + maxFeatures: 20, + samplesSubset: [] + }) + splitter.splitNode() + const serial = await toObject(splitter) + const newSplitter = await fromObject(serial) + const newBestSplitter = newSplitter.splitNode() expect(newBestSplitter.foundSplit).toEqual(true) expect(newBestSplitter.feature).toEqual(1) expect(newBestSplitter.threshold).toEqual(2.5) diff --git a/src/tree/Splitter.ts b/src/tree/Splitter.ts index ce408d2a..c53750a9 100644 --- a/src/tree/Splitter.ts +++ b/src/tree/Splitter.ts @@ -41,16 +41,23 @@ export class Splitter extends Serialize { sampleMap: Int32Array nSamplesTotal: int nFeatures: int - name = 'splitter' - - constructor( - X: number[][], - y: int[], - minSamplesLeaf: int, - impurityMeasure: ImpurityMeasure, - maxFeatures: int, - samplesSubset: int[] = [] - ) { + name = 'Splitter' + + constructor({ + X, + y, + minSamplesLeaf, + impurityMeasure, + maxFeatures, + samplesSubset = [] + }: { + X: number[][] + y: int[] + minSamplesLeaf: int + impurityMeasure: ImpurityMeasure + maxFeatures: int + samplesSubset: int[] + }) { super() this.X = X this.y = y @@ -74,9 +81,9 @@ export class Splitter extends Serialize { } } if (impurityMeasure === 'squared_error') { - this.criterion = new RegressionCriterion(impurityMeasure, y) + this.criterion = new RegressionCriterion({ impurityMeasure, y }) } else { - this.criterion = new ClassificationCriterion(impurityMeasure, y) + this.criterion = new ClassificationCriterion({ impurityMeasure, y }) } this.featureOrder = [] for (let i = 0; i < this.nFeatures; i++) { @@ -237,13 +244,14 @@ export class Splitter extends Serialize { jsonClass.sampleMap = new Int32Array(jsonClass.sampleMap) } - const splitter = new Splitter( - jsonClass.X, - jsonClass.y, - jsonClass.minSamplesLeaf, - 'squared_error', - jsonClass.samplesSubset - ) + const splitter = new Splitter({ + X: jsonClass.X, + y: jsonClass.y, + minSamplesLeaf: jsonClass.minSamplesLeaf, + impurityMeasure: 'squared_error', + maxFeatures: jsonClass.maxFeatures, + samplesSubset: jsonClass.samplesSubset + }) return Object.assign(splitter, jsonClass) as Splitter } From ec713230efbf07ac228c0291c21f5fcc0b1bd995 Mon Sep 17 00:00:00 2001 From: Dan Crescimanno Date: Sat, 7 May 2022 20:59:06 -0700 Subject: [PATCH 2/4] feat: updated serialization --- src/cluster/KMeans.test.ts | 3 +- src/compose/ColumnTransformer.test.ts | 31 ++++++- src/compose/ColumnTransformer.ts | 1 + src/dummy/DummyClassifier.test.ts | 3 +- src/dummy/DummyRegressor.test.ts | 11 +-- src/ensemble/VotingClassifier.test.ts | 15 ++-- src/ensemble/VotingClassifier.ts | 10 --- src/ensemble/VotingRegressor.test.ts | 14 ++-- src/ensemble/VotingRegressor.ts | 10 --- src/ensemble/serializeEnsemble.ts | 90 -------------------- src/impute/SimpleImputer.test.ts | 8 +- src/index.ts | 7 +- src/linear_model/LinearRegression.test.ts | 5 +- src/linear_model/LogisticRegression.test.ts | 3 +- src/linear_model/SgdClassifier.ts | 20 +---- src/linear_model/SgdRegressor.ts | 19 ----- src/linear_model/modelSerializer.ts | 91 --------------------- src/mixins.ts | 1 - src/naive_bayes/BaseNaiveBayes.ts | 35 +------- src/naive_bayes/GaussianNB.test.ts | 7 +- src/neighbors/KNeighborsBase.ts | 2 +- src/neighbors/KNeighborsRegressor.test.ts | 4 +- src/pipeline/Pipeline.test.ts | 16 ++-- src/pipeline/Pipeline.ts | 12 +-- src/preprocessing/LabelEncoder.test.ts | 2 +- src/preprocessing/LabelEncoder.ts | 2 +- src/preprocessing/MaxAbsScaler.test.ts | 16 +++- src/preprocessing/MinMaxScaler.test.ts | 5 +- src/preprocessing/Normalizer.test.ts | 2 +- src/preprocessing/OneHotEncoder.test.ts | 2 +- src/preprocessing/OrdinalEncoder.test.ts | 2 +- src/preprocessing/RobustScaler.test.ts | 2 +- src/preprocessing/StandardScaler.test.ts | 2 +- src/serialize.ts | 50 ----------- src/simpleSerializer.ts | 22 ++--- src/tree/Criterion.test.ts | 4 +- src/tree/Criterion.ts | 3 +- src/tree/DecisionTree.test.ts | 4 +- src/tree/DecisionTree.ts | 34 +------- src/tree/Splitter.test.ts | 4 +- src/tree/Splitter.ts | 42 +--------- 41 files changed, 128 insertions(+), 488 deletions(-) delete mode 100644 src/ensemble/serializeEnsemble.ts delete mode 100644 src/linear_model/modelSerializer.ts delete mode 100644 src/serialize.ts diff --git a/src/cluster/KMeans.test.ts b/src/cluster/KMeans.test.ts index ab7f65ca..ac737fc5 100644 --- a/src/cluster/KMeans.test.ts +++ b/src/cluster/KMeans.test.ts @@ -1,5 +1,4 @@ -import { KMeans } from './KMeans' -import { fromObject } from '../index' +import { fromObject, KMeans } from '../index' // Next steps: Improve on kmeans cluster testing describe('KMeans', () => { const X = [ diff --git a/src/compose/ColumnTransformer.test.ts b/src/compose/ColumnTransformer.test.ts index 063da473..9dd79e1b 100644 --- a/src/compose/ColumnTransformer.test.ts +++ b/src/compose/ColumnTransformer.test.ts @@ -1,6 +1,9 @@ -import { ColumnTransformer } from './ColumnTransformer' -import { MinMaxScaler } from '../preprocessing/MinMaxScaler' -import { SimpleImputer } from '../impute/SimpleImputer' +import { + fromObject, + SimpleImputer, + MinMaxScaler, + ColumnTransformer +} from '../index' import * as dfd from 'danfojs-node' describe('ColumnTransformer', function () { @@ -30,4 +33,26 @@ describe('ColumnTransformer', function () { expect(result.arraySync()).toEqual(expected) }) + it('ColumnTransformer serialize/deserialize test', async function () { + const X = [ + [2, 2], // [1, .5] + [2, 3], // [1, .75] + [0, NaN], // [0, 1] + [2, 0] // [.5, 0] + ] + let newDf = new dfd.DataFrame(X) + + const transformer = new ColumnTransformer({ + transformers: [ + ['minmax', new MinMaxScaler(), [0]], + ['simpleImpute', new SimpleImputer({ strategy: 'median' }), [1]] + ] + }) + + transformer.fitTransform(newDf) + let obj = await transformer.toObject() + let myResult = await fromObject(obj) + + expect(myResult.transformers.length).toEqual(2) + }) }) diff --git a/src/compose/ColumnTransformer.ts b/src/compose/ColumnTransformer.ts index 7109b65b..096337c1 100644 --- a/src/compose/ColumnTransformer.ts +++ b/src/compose/ColumnTransformer.ts @@ -76,6 +76,7 @@ export class ColumnTransformer extends Serialize { transformers = [], remainder = 'drop' }: ColumnTransformerParams = {}) { + super() this.transformers = transformers this.remainder = remainder } diff --git a/src/dummy/DummyClassifier.test.ts b/src/dummy/DummyClassifier.test.ts index 419a986b..9905da94 100644 --- a/src/dummy/DummyClassifier.test.ts +++ b/src/dummy/DummyClassifier.test.ts @@ -1,5 +1,4 @@ -import { DummyClassifier } from './DummyClassifier' -import { fromObject } from '../simpleSerializer' +import { DummyClassifier, fromObject } from '../index' describe('DummyClassifier', function () { it('Use DummyClassifier on simple example (mostFrequent)', function () { const clf = new DummyClassifier() diff --git a/src/dummy/DummyRegressor.test.ts b/src/dummy/DummyRegressor.test.ts index ebbe652d..6cc01a50 100644 --- a/src/dummy/DummyRegressor.test.ts +++ b/src/dummy/DummyRegressor.test.ts @@ -1,5 +1,5 @@ -import { DummyRegressor } from './DummyRegressor' -import { toObject, fromObject } from '../simpleSerializer' +import { DummyRegressor, fromObject } from '../index' + describe('DummyRegressor', function () { it('Use DummyRegressor on simple example (mean)', function () { const reg = new DummyRegressor() @@ -68,12 +68,13 @@ describe('DummyRegressor', function () { name: 'DummyRegressor', EstimatorType: 'regressor', strategy: 'constant', - constant: 10 + constant: 10, + quantile: undefined } reg.fit(X, y) - expect(saveResult).toEqual(await toObject(reg)) + expect(saveResult).toEqual(await reg.toObject()) }) it('Should load serialized DummyRegressor', async function () { @@ -92,7 +93,7 @@ describe('DummyRegressor', function () { ] reg.fit(X, y) - const saveReg = await toObject(reg) + const saveReg = await reg.toObject() const newReg = await fromObject(saveReg) expect(newReg.predict(predictX).arraySync()).toEqual([10, 10, 10]) diff --git a/src/ensemble/VotingClassifier.test.ts b/src/ensemble/VotingClassifier.test.ts index d741c00a..60f43da7 100644 --- a/src/ensemble/VotingClassifier.test.ts +++ b/src/ensemble/VotingClassifier.test.ts @@ -1,7 +1,10 @@ -import { makeVotingClassifier, VotingClassifier } from './VotingClassifier' -import { DummyClassifier } from '../dummy/DummyClassifier' - -import { LogisticRegression } from '../linear_model/LogisticRegression' +import { + makeVotingClassifier, + VotingClassifier, + DummyClassifier, + LogisticRegression, + fromObject +} from '../index' describe('VotingClassifier', function () { it('Use VotingClassifier on simple example (voting = hard)', async function () { @@ -118,8 +121,8 @@ describe('VotingClassifier', function () { await voter.fit(X, y) - const savedModel = (await voter.toJson()) as string - const newModel = new VotingClassifier({}).fromJson(savedModel) + const savedModel = await voter.toObject() + const newModel = await fromObject(savedModel) expect(newModel.predict(X).arraySync()).toEqual([1, 1, 1, 1, 1]) }, 30000) diff --git a/src/ensemble/VotingClassifier.ts b/src/ensemble/VotingClassifier.ts index 5db1241e..ce93c91a 100644 --- a/src/ensemble/VotingClassifier.ts +++ b/src/ensemble/VotingClassifier.ts @@ -2,7 +2,6 @@ import { Scikit1D, Scikit2D } from '../types' import { tf } from '../shared/globals' import { ClassifierMixin } from '../mixins' import { LabelEncoder } from '../preprocessing/LabelEncoder' -import { fromJson, toJson } from './serializeEnsemble' /* Next steps: @@ -154,15 +153,6 @@ export class VotingClassifier extends ClassifierMixin { ): Promise | Array> { return (await this.fit(X, y)).transform(X) } - - public fromJson(model: string) { - return fromJson(this, model) - } - - public async toJson(): Promise { - const classJson = JSON.parse(super.toJson() as string) - return toJson(this, classJson) - } } export function makeVotingClassifier(...args: any[]) { diff --git a/src/ensemble/VotingRegressor.test.ts b/src/ensemble/VotingRegressor.test.ts index 06a69f97..e0060196 100644 --- a/src/ensemble/VotingRegressor.test.ts +++ b/src/ensemble/VotingRegressor.test.ts @@ -1,6 +1,10 @@ -import { makeVotingRegressor, VotingRegressor } from './VotingRegressor' -import { DummyRegressor } from '../dummy/DummyRegressor' -import { LinearRegression } from '../linear_model/LinearRegression' +import { + makeVotingRegressor, + VotingRegressor, + fromObject, + DummyRegressor, + LinearRegression +} from '../index' describe('VotingRegressor', function () { it('Use VotingRegressor on simple example ', async function () { @@ -51,8 +55,8 @@ describe('VotingRegressor', function () { await voter.fit(X, y) - const savedModel = (await voter.toJson()) as string - const newModel = new VotingRegressor({}).fromJson(savedModel) + const savedModel = await voter.toObject() + const newModel = await fromObject(savedModel) expect(newModel.score(X, y)).toEqual(voter.score(X, y)) }, 30000) }) diff --git a/src/ensemble/VotingRegressor.ts b/src/ensemble/VotingRegressor.ts index db3d4973..41396e4d 100644 --- a/src/ensemble/VotingRegressor.ts +++ b/src/ensemble/VotingRegressor.ts @@ -1,7 +1,6 @@ import { Scikit1D, Scikit2D } from '../types' import { tf } from '../shared/globals' import { RegressorMixin } from '../mixins' -import { fromJson, toJson } from './serializeEnsemble' /* Next steps: 0. Write validation code to check Estimator inputs @@ -95,15 +94,6 @@ export class VotingRegressor extends RegressorMixin { public async fitTransform(X: Scikit2D, y: Scikit1D) { return (await this.fit(X, y)).transform(X) } - - public fromJson(model: string) { - return fromJson(this, model) as this - } - - public async toJson(): Promise { - const classJson = JSON.parse(super.toJson() as string) - return toJson(this, classJson) - } } /** diff --git a/src/ensemble/serializeEnsemble.ts b/src/ensemble/serializeEnsemble.ts deleted file mode 100644 index 245c89df..00000000 --- a/src/ensemble/serializeEnsemble.ts +++ /dev/null @@ -1,90 +0,0 @@ -import { DummyClassifier } from '../dummy/DummyClassifier' -import { DummyRegressor } from '../dummy/DummyRegressor' -import { LogisticRegression } from '../linear_model/LogisticRegression' -import { RidgeRegression } from '../linear_model/RidgeRegression' -import { LinearRegression } from '../linear_model/LinearRegression' -import { LassoRegression } from '../linear_model/LassoRegression' -import { ElasticNet } from '../linear_model/ElasticNet' -import { LabelEncoder } from '../preprocessing/LabelEncoder' -import { SimpleImputer } from '../impute/SimpleImputer' -import { tf } from '../shared/globals' -import { MinMaxScaler } from '../preprocessing/MinMaxScaler' - -function getEstimator(name: string, serialJson: string) { - switch (name) { - case 'DummyClassifier': - return new DummyClassifier().fromJson(serialJson) - case 'DummyRegressor': - return new DummyRegressor().fromJson(serialJson) - case 'LogisticRegression': - return new LogisticRegression().fromJson(serialJson) - case 'RidgeRegression': - return new RidgeRegression().fromJson(serialJson) - case 'LinearRegression': - return new LinearRegression().fromJson(serialJson) - case 'LassoRegression': - return new LassoRegression().fromJson(serialJson) - case 'ElasticNet': - return new ElasticNet().fromJson(serialJson) - case 'SimpleImputer': - return new SimpleImputer().fromJson(serialJson) - case 'MinMaxScaler': - return new MinMaxScaler().fromJson(serialJson) - default: - throw new Error(`${name} estimator not supported`) - } -} - -export function fromJson(classConstructor: any, model: string) { - let jsonClass = JSON.parse(model) - if (jsonClass.name != classConstructor.name) { - throw new Error( - `wrong json values for ${classConstructor.name} constructor` - ) - } - - const copyThis: any = Object.assign({}, classConstructor) - for (let key of Object.keys(classConstructor)) { - let value = copyThis[key] - if (value instanceof tf.Tensor) { - jsonClass[key] = tf.tensor(jsonClass[key]) - } - } - // for ensembles - if (jsonClass.estimators || jsonClass.steps) { - const jsonEstimatorOrStep = jsonClass.estimators || jsonClass.steps - for (let i = 0; i < jsonEstimatorOrStep.length; i++) { - const estimatorName = JSON.parse(jsonEstimatorOrStep[i][1]).name - const estimators = getEstimator(estimatorName, jsonEstimatorOrStep[i][1]) - jsonEstimatorOrStep[i][1] = Object.assign( - estimators, - jsonEstimatorOrStep[i][1] - ) - } - } - - if (jsonClass.le) { - const labelEncode = new LabelEncoder() - jsonClass.le = Object.assign(labelEncode, jsonClass.le) - } - return Object.assign(classConstructor, jsonClass) -} - -export async function toJson(classConstructor: any, classJson: any) { - let i = 0 - if (classConstructor.estimators) { - for (const estimator of classConstructor.estimators) { - classJson.estimators[i][1] = await estimator[1].toJson() - i += 1 - } - } - - if (classConstructor.steps) { - for (const step of classConstructor.steps) { - classJson.steps[i][1] = await step[1].toJson() - i += 1 - } - } - - return JSON.stringify(classJson) -} diff --git a/src/impute/SimpleImputer.test.ts b/src/impute/SimpleImputer.test.ts index c0969a80..2bad5c7b 100644 --- a/src/impute/SimpleImputer.test.ts +++ b/src/impute/SimpleImputer.test.ts @@ -1,6 +1,6 @@ import { tf } from '../shared/globals' -import { SimpleImputer } from './SimpleImputer' -import { toObject, fromObject } from '../simpleSerializer' +import { SimpleImputer, fromObject } from '../index' + describe('SimpleImputer', function () { it('Imputes with "constant" strategy 2D one column. In this strategy, we give the fill value', function () { const imputer = new SimpleImputer({ strategy: 'constant', fillValue: 3 }) @@ -141,7 +141,7 @@ describe('SimpleImputer', function () { } imputer.fitTransform(data) - expect(await toObject(imputer)).toEqual(expected) + expect(await imputer.toObject()).toEqual(expected) }) it('Should load serialized Imputer', async function () { const imputer = new SimpleImputer({ strategy: 'mostFrequent' }) @@ -163,7 +163,7 @@ describe('SimpleImputer', function () { ] imputer.fitTransform(data) - const thing = await toObject(imputer) + const thing = await imputer.toObject() const newImputer = await fromObject(thing) const newReturned = newImputer.transform(data) expect(newReturned.arraySync()).toEqual(expected) diff --git a/src/index.ts b/src/index.ts index 22917ec5..532f8be3 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,5 +1,3 @@ -import Serialize from './serialize' - /** * @license * Copyright 2021, JsData. All rights reserved. @@ -15,6 +13,7 @@ import Serialize from './serialize' * ========================================================================== */ export { KNeighborsRegressor } from './neighbors/KNeighborsRegressor' +export { KNeighborsClassifier } from './neighbors/KNeighborsClassifier' export { LinearRegression, LinearRegressionParams @@ -85,7 +84,9 @@ export { DecisionTreeRegressor, DecisionTreeRegressorParams } from './tree/DecisionTree' - +export { KFold } from './model_selection/KFold' +export { trainTestSplit } from './model_selection/trainTestSplit' +export { crossValScore } from './model_selection/crossValScore' export { fromObject, Serialize } from './simpleSerializer' export { ClassificationCriterion, RegressionCriterion } from './tree/Criterion' diff --git a/src/linear_model/LinearRegression.test.ts b/src/linear_model/LinearRegression.test.ts index 1753fa4c..d4ca20a6 100644 --- a/src/linear_model/LinearRegression.test.ts +++ b/src/linear_model/LinearRegression.test.ts @@ -1,7 +1,6 @@ -import { LinearRegression } from './LinearRegression' +import { LinearRegression, fromObject } from '../index' import { tensorEqual } from '../utils' import { tf } from '../shared/globals' -import { toObject, fromObject } from '../simpleSerializer' function roughlyEqual(a: number, b: number, tol = 0.1) { return Math.abs(a - b) < tol } @@ -149,9 +148,7 @@ describe('LinearRegression', function () { await lr.fit(mediumX, yPlusJitter) const serialized = await lr.toObject() - console.log({ serialized }) const newModel = await fromObject(serialized) - console.log(newModel) expect(tensorEqual(newModel.coef, tf.tensor1d([2.5, 1]), 0.1)).toBe(true) expect(roughlyEqual(newModel.intercept as number, 0)).toBe(true) diff --git a/src/linear_model/LogisticRegression.test.ts b/src/linear_model/LogisticRegression.test.ts index 3dde416f..03243db3 100644 --- a/src/linear_model/LogisticRegression.test.ts +++ b/src/linear_model/LogisticRegression.test.ts @@ -1,6 +1,5 @@ -import { LogisticRegression } from './LogisticRegression' +import { LogisticRegression, fromObject } from '../index' import { tf } from '../shared/globals' -import { fromObject } from '../simpleSerializer' describe('LogisticRegression', function () { it('Works on arrays (small example)', async function () { const lr = new LogisticRegression() diff --git a/src/linear_model/SgdClassifier.ts b/src/linear_model/SgdClassifier.ts index e3579da6..06f3f539 100644 --- a/src/linear_model/SgdClassifier.ts +++ b/src/linear_model/SgdClassifier.ts @@ -20,7 +20,7 @@ import { Scikit2D, Scikit1D, OptimizerTypes, LossTypes } from '../types' import { OneHotEncoder } from '../preprocessing/OneHotEncoder' import { assert } from '../typesUtils' import { ClassifierMixin } from '../mixins' -import { fromJson, toJSON } from './modelSerializer' + /** * SGD is a thin Wrapper around Tensorflow's model api with a single dense layer. * With this base class and different error functions / regularizers we can @@ -404,22 +404,4 @@ export class SGDClassifier extends ClassifierMixin { private getModelWeight(): Promise> { return Promise.all(this.model.getWeights().map((weight) => weight.array())) } - - public async toJson(): Promise { - const classifierJson = JSON.parse(super.toJson() as string) - return toJSON(this, classifierJson) - } - - public fromJson(model: string) { - return fromJson(this, model) as this - } - - // public async toObject(): Promise { - // let { toObject } = await import('../simpleSerializer') - // return await toObject(this, [ - // 'modelCompileArgs', - // 'modelFitArgs', - // 'denseLayerArgs' - // ]) - // } } diff --git a/src/linear_model/SgdRegressor.ts b/src/linear_model/SgdRegressor.ts index d3bad4e4..e178889f 100644 --- a/src/linear_model/SgdRegressor.ts +++ b/src/linear_model/SgdRegressor.ts @@ -20,7 +20,6 @@ import { } from '../utils' import { Scikit2D, Scikit1D, OptimizerTypes, LossTypes } from '../types' import { RegressorMixin } from '../mixins' -import { fromJson, toJSON } from './modelSerializer' /** * SGD is a thin Wrapper around Tensorflow's model api with a single dense layer. @@ -209,15 +208,6 @@ export class SGDRegressor extends RegressorMixin { return this } - public async toObject(): Promise { - let { toObject } = await import('../simpleSerializer') - return await toObject(this, [ - 'modelCompileArgs', - 'modelFitArgs', - 'denseLayerArgs' - ]) - } - /** * Similar to scikit-learn, this returns the object of configuration params for SGD * @returns {SGDRegressorParams} Returns an object of configuration params. @@ -390,13 +380,4 @@ export class SGDRegressor extends RegressorMixin { return intercept } - - public async toJson(): Promise { - const classifierJson = JSON.parse(super.toJson() as string) - return toJSON(this, classifierJson) - } - - public fromJson(model: string) { - return fromJson(this, model) as this - } } diff --git a/src/linear_model/modelSerializer.ts b/src/linear_model/modelSerializer.ts deleted file mode 100644 index aac0aeba..00000000 --- a/src/linear_model/modelSerializer.ts +++ /dev/null @@ -1,91 +0,0 @@ -import { optimizer, initializer, getLoss } from '../utils' -import { tf } from '../shared/globals' -import { OneHotEncoder } from '../preprocessing/OneHotEncoder' - -function getModelWeight( - model: tf.Sequential -): Promise> { - return Promise.all(model.getWeights().map((weight) => weight.array())) -} - -export async function toJSON( - classConstructor: any, - classifierJson: any -): Promise { - const modelConfig = classConstructor.model.getConfig() - const modelWeight = await getModelWeight(classConstructor.model) - classifierJson.model = { - config: modelConfig, - weight: modelWeight - } - - if (classConstructor.denseLayerArgs.kernelInitializer) { - const initializerName = - classConstructor.denseLayerArgs.kernelInitializer.constructor.name - classifierJson.denseLayerArgs.kernelInitializer = initializerName - } - if (classConstructor.denseLayerArgs.biasInitializer) { - const biasName = - classConstructor.denseLayerArgs.biasInitializer.constructor.name - classifierJson.denseLayerArgs.biasInitializer = biasName - } - // set optimizer - classifierJson.modelCompileArgs.optimizer = - classConstructor.model.optimizer.getConfig() - return JSON.stringify(classifierJson) -} - -export function fromJson(classConstructor: any, model: string) { - let jsonClass = JSON.parse(model) - if (jsonClass.name != classConstructor.name) { - throw new Error( - `wrong json values for ${classConstructor.name} constructor` - ) - } - - const jsonModel = tf.Sequential.fromConfig( - tf.Sequential, - jsonClass.model.config - ) as tf.Sequential - const jsonOpt = optimizer(jsonClass.optimizerType) - const optim = Object.assign(jsonOpt, jsonClass.modelCompileArgs.optimizer) - const loss = getLoss(jsonClass.lossType) - jsonClass.modelCompileArgs = { - ...jsonClass.modelCompileArgs, - optimizer: optim, - loss: loss - } - - jsonModel.compile(jsonClass.modelCompileArgs) - const weights = [] - for (const weight of jsonClass.model.weight) { - weights.push(tf.tensor(weight)) - } - jsonModel.setWeights(weights) - jsonClass.model = jsonModel - - // if call back create callback - // default usecase is set to EarlyStop - // might get complex for custom callback - if (jsonClass.modelFitArgs.callbacks) { - let jsonCallback = tf.callbacks.earlyStopping() - let modelFitArgs = jsonClass.modelFitArgs - jsonCallback = Object.assign(jsonCallback, modelFitArgs.callbacks[0]) - modelFitArgs.callbacks = [jsonCallback] - } - - if (jsonClass.denseLayerArgs.kernelInitializer) { - let initializerName = jsonClass.denseLayerArgs.kernelInitializer - jsonClass.denseLayerArgs.kernelInitializer = initializer(initializerName) - } - if (jsonClass.denseLayerArgs.biasInitializer) { - let biasName = jsonClass.denseLayerArgs.biasInitializer - jsonClass.denseLayerArgs.biasInitializer = initializer(biasName) - } - - if (jsonClass.oneHot) { - let jsonOneHotEncoder = new OneHotEncoder() - jsonClass.oneHot = Object.assign(jsonOneHotEncoder, jsonClass.oneHot) - } - return Object.assign(classConstructor, jsonClass) -} diff --git a/src/mixins.ts b/src/mixins.ts index 38d9de8e..20085282 100644 --- a/src/mixins.ts +++ b/src/mixins.ts @@ -1,6 +1,5 @@ import { Scikit2D, Scikit1D } from './types' import { r2Score, accuracyScore } from './metrics/metrics' -// import Serialize from './serialize' import { Serialize } from './simpleSerializer' import { tf } from './shared/globals' export class TransformerMixin extends Serialize { diff --git a/src/naive_bayes/BaseNaiveBayes.ts b/src/naive_bayes/BaseNaiveBayes.ts index 563c8fa3..fd48b42b 100644 --- a/src/naive_bayes/BaseNaiveBayes.ts +++ b/src/naive_bayes/BaseNaiveBayes.ts @@ -16,7 +16,7 @@ import { polyfillUnique } from '../tfUtils' import { tf } from '../shared/globals' import { Scikit1D, Scikit2D } from '../types' import { convertToNumericTensor2D, convertToTensor1D } from '../utils' -import Serialize from '../serialize' +import { Serialize } from '../simpleSerializer' export interface NaiveBayesParams { /** @@ -152,37 +152,4 @@ export abstract class BaseNaiveBayes extends Serialize { mean: tf.Tensor1D, variance: tf.Tensor1D ): tf.Tensor1D - - public toJson(): string { - const jsonClass = JSON.parse(super.toJson() as string) - - if (this.priors) { - jsonClass.priors = this.priors.arraySync() - } - jsonClass.classes = this.classes.arraySync() - jsonClass.means = this.means.map((t: tf.Tensor1D) => t.arraySync()) - jsonClass.variances = this.variances.map((v: tf.Tensor1D) => v.arraySync()) - return JSON.stringify(jsonClass) - } - - public fromJson(model: string) { - const jsonModel = JSON.parse(model) - - if (jsonModel.priors) { - jsonModel.priors = tf.tensor(jsonModel.priors) - } - jsonModel.classes = tf.tensor(jsonModel.classes) - - const means = [] - for (const wMeans of jsonModel.means) { - means.push(tf.tensor(wMeans)) - } - const variances = [] - for (const variance of jsonModel.variances) { - variances.push(tf.tensor(variance)) - } - jsonModel.means = means - jsonModel.variances = variances - return Object.assign(this, jsonModel) as this - } } diff --git a/src/naive_bayes/GaussianNB.test.ts b/src/naive_bayes/GaussianNB.test.ts index abce7a22..093f2151 100644 --- a/src/naive_bayes/GaussianNB.test.ts +++ b/src/naive_bayes/GaussianNB.test.ts @@ -12,8 +12,7 @@ * limitations under the License. * ========================================================================== */ -import { GaussianNB } from './GaussianNB' -import { toObject, fromObject } from '../simpleSerializer' +import { GaussianNB, fromObject } from '../index' describe('GaussianNB', function () { it('without priors', async () => { @@ -100,9 +99,9 @@ describe('GaussianNB', function () { const model = new GaussianNB({ priors: [0.5, 0.5], varSmoothing: 1.0 }) await model.fit(X, y) - const labels = model.predict(X) + model.predict(X) - const serializeModel = await toObject(model) + const serializeModel = await model.toObject() const newModel = await fromObject(serializeModel) expect(newModel.predict(X).arraySync()).toEqual([0, 0, 1, 1, 1]) }) diff --git a/src/neighbors/KNeighborsBase.ts b/src/neighbors/KNeighborsBase.ts index 3b4dd715..31774e18 100644 --- a/src/neighbors/KNeighborsBase.ts +++ b/src/neighbors/KNeighborsBase.ts @@ -21,7 +21,7 @@ import { convertToNumericTensor1D, convertToNumericTensor2D } from '../utils' import { assert } from '../typesUtils' import { tf } from '../shared/globals' import { KdTree } from './KdTree' -import Serialize from '../serialize' +import { Serialize } from '../simpleSerializer' const WEIGHTS_FUNCTIONS = { uniform(distances: tf.Tensor2D) { diff --git a/src/neighbors/KNeighborsRegressor.test.ts b/src/neighbors/KNeighborsRegressor.test.ts index 50c5c418..751fb8fb 100644 --- a/src/neighbors/KNeighborsRegressor.test.ts +++ b/src/neighbors/KNeighborsRegressor.test.ts @@ -13,12 +13,10 @@ * ========================================================================== */ -import { KNeighborsRegressor } from './KNeighborsRegressor' +import { KNeighborsRegressor, crossValScore, KFold } from '../index' import { KNeighborsParams } from './KNeighborsBase' import { dataUrls } from '../datasets/datasets' import { arrayEqual } from '../utils' -import { crossValScore } from '../model_selection/crossValScore' -import { KFold } from '../model_selection/KFold' import { negMeanSquaredError } from '../model_selection/scorers' import '../jestTensorMatchers' import * as dfd from 'danfojs-node' diff --git a/src/pipeline/Pipeline.test.ts b/src/pipeline/Pipeline.test.ts index 7c10306b..18e77d1c 100644 --- a/src/pipeline/Pipeline.test.ts +++ b/src/pipeline/Pipeline.test.ts @@ -1,10 +1,13 @@ -import { Pipeline, makePipeline } from './Pipeline' +import { + Pipeline, + makePipeline, + LinearRegression, + SimpleImputer, + MinMaxScaler, + fromObject +} from '../index' import { tf } from '../shared/globals' import { tensorEqual } from '../utils' -import { LinearRegression } from '../linear_model/LinearRegression' -import { SimpleImputer } from '../impute/SimpleImputer' -import { MinMaxScaler } from '../preprocessing/MinMaxScaler' -import { toObject, fromObject } from '../simpleSerializer' describe('Pipeline', function () { it('Use a Pipeline (min-max scaler, and linear regression)', async function () { @@ -97,8 +100,7 @@ describe('Pipeline', function () { await pipeline.fit(X, y) - const saveModel = await toObject(pipeline) - console.log(saveModel) + const saveModel = await pipeline.toObject() const newPipeLine = await fromObject(saveModel) expect(newPipeLine.steps[1][1].min.arraySync()).toEqual([0, 0]) diff --git a/src/pipeline/Pipeline.ts b/src/pipeline/Pipeline.ts index 6cf5e6c4..a2c5e923 100644 --- a/src/pipeline/Pipeline.ts +++ b/src/pipeline/Pipeline.ts @@ -1,8 +1,7 @@ /* eslint-disable @typescript-eslint/no-explicit-any */ import { assert } from '../typesUtils' import { Scikit1D, Scikit2D } from '../types' -import Serialize from '../serialize' -import { toJson, fromJson } from '../ensemble/serializeEnsemble' +import { Serialize } from '../simpleSerializer' import { tf } from '../shared/globals' /* @@ -206,15 +205,6 @@ export class Pipeline extends Serialize { let XT = this.fitTransformExceptLast(X) return await lastEstimator.fitPredict(XT, y) } - - public async toJson(): Promise { - const classJson = JSON.parse(super.toJson() as string) - return toJson(this, classJson) - } - - public fromJson(model: string) { - return fromJson(this, model) as this - } } /** diff --git a/src/preprocessing/LabelEncoder.test.ts b/src/preprocessing/LabelEncoder.test.ts index 062929c8..b56ede1a 100644 --- a/src/preprocessing/LabelEncoder.test.ts +++ b/src/preprocessing/LabelEncoder.test.ts @@ -1,4 +1,4 @@ -import { LabelEncoder } from './LabelEncoder' +import { LabelEncoder } from '../index' import * as dfd from 'danfojs-node' describe('LabelEncoder', function () { diff --git a/src/preprocessing/LabelEncoder.ts b/src/preprocessing/LabelEncoder.ts index 9067e707..6173f31b 100644 --- a/src/preprocessing/LabelEncoder.ts +++ b/src/preprocessing/LabelEncoder.ts @@ -16,7 +16,7 @@ import { Scikit1D } from '../types' import { tf } from '../shared/globals' import { isSeriesInterface } from '../typesUtils' -import Serialize from '../serialize' +import { Serialize } from '../simpleSerializer' /* Next steps: diff --git a/src/preprocessing/MaxAbsScaler.test.ts b/src/preprocessing/MaxAbsScaler.test.ts index b69f584a..91781bab 100644 --- a/src/preprocessing/MaxAbsScaler.test.ts +++ b/src/preprocessing/MaxAbsScaler.test.ts @@ -1,4 +1,4 @@ -import { MaxAbsScaler } from './MaxAbsScaler' +import { MaxAbsScaler, fromObject } from '../index' import * as dfd from 'danfojs-node' import { tf } from '../shared/globals' import { arrayEqual } from '../utils' @@ -135,6 +135,20 @@ describe('MaxAbsScaler', function () { expect(arrayEqual(X_trans_new, X_expected_new, 0.01)).toBe(true) }) + it('Serialize and unserialize MaxAbsScaler', async function () { + const data = tf.tensor2d([4, 4, 'whoops', 3, 3] as any, [5, 1]) + const scaler = new MaxAbsScaler() + scaler.fit(data) + const serial = await scaler.toObject() + const newModel = await fromObject(serial) + expect(newModel.transform(data).arraySync().flat()).toEqual([ + 1, + 1, + NaN, + 0.75, + 0.75 + ]) + }) /* Streaming test def test_maxabs_scaler_partial_fit(): # Test if partial_fit run over many batches of size 1 and 50 diff --git a/src/preprocessing/MinMaxScaler.test.ts b/src/preprocessing/MinMaxScaler.test.ts index 6e995b47..b0bf1420 100644 --- a/src/preprocessing/MinMaxScaler.test.ts +++ b/src/preprocessing/MinMaxScaler.test.ts @@ -1,9 +1,8 @@ -import { MinMaxScaler } from './MinMaxScaler' +import { MinMaxScaler, fromObject } from '../index' import * as dfd from 'danfojs-node' import { isDataFrameInterface, isSeriesInterface } from '../typesUtils' import { ScikitVecOrMatrix } from '../types' import { tf } from '../shared/globals' -import { toObject, fromObject } from '../simpleSerializer' export function convertTensorToInputType( tensor: tf.Tensor, inputData: ScikitVecOrMatrix @@ -165,7 +164,7 @@ describe('MinMaxscaler', function () { const data = tf.tensor2d([4, 4, 'whoops', 3, 3] as any, [5, 1]) const scaler = new MinMaxScaler() scaler.fit(data) - const serial = (await toObject(scaler)) as string + const serial = await scaler.toObject() const newModel = await fromObject(serial) expect(newModel.transform(data).arraySync().flat()).toEqual([ 1, diff --git a/src/preprocessing/Normalizer.test.ts b/src/preprocessing/Normalizer.test.ts index 65d20877..b202c377 100644 --- a/src/preprocessing/Normalizer.test.ts +++ b/src/preprocessing/Normalizer.test.ts @@ -1,4 +1,4 @@ -import { Normalizer } from './Normalizer' +import { Normalizer } from '../index' import * as dfd from 'danfojs-node' import { arrayEqual } from '../utils' diff --git a/src/preprocessing/OneHotEncoder.test.ts b/src/preprocessing/OneHotEncoder.test.ts index 35100413..2ddd7fce 100644 --- a/src/preprocessing/OneHotEncoder.test.ts +++ b/src/preprocessing/OneHotEncoder.test.ts @@ -1,5 +1,5 @@ import { tf } from '../shared/globals' -import { OneHotEncoder } from './OneHotEncoder' +import { OneHotEncoder } from '../index' import { arrayTo2DColumn } from '../utils' describe('OneHotEncoder', function () { diff --git a/src/preprocessing/OrdinalEncoder.test.ts b/src/preprocessing/OrdinalEncoder.test.ts index 57666262..53438b53 100644 --- a/src/preprocessing/OrdinalEncoder.test.ts +++ b/src/preprocessing/OrdinalEncoder.test.ts @@ -1,4 +1,4 @@ -import { OrdinalEncoder } from './OrdinalEncoder' +import { OrdinalEncoder } from '../index' import { arrayTo2DColumn } from '../utils' describe('OrdinalEncoder', function () { diff --git a/src/preprocessing/RobustScaler.test.ts b/src/preprocessing/RobustScaler.test.ts index 3a31475e..3653c9d4 100644 --- a/src/preprocessing/RobustScaler.test.ts +++ b/src/preprocessing/RobustScaler.test.ts @@ -1,4 +1,4 @@ -import { RobustScaler } from './RobustScaler' +import { RobustScaler } from '../index' import * as dfd from 'danfojs-node' import { arrayEqual } from '../utils' diff --git a/src/preprocessing/StandardScaler.test.ts b/src/preprocessing/StandardScaler.test.ts index e3830ef7..d80621d6 100644 --- a/src/preprocessing/StandardScaler.test.ts +++ b/src/preprocessing/StandardScaler.test.ts @@ -1,4 +1,4 @@ -import { StandardScaler } from './StandardScaler' +import { StandardScaler } from '../index' import * as dfd from 'danfojs-node' describe('StandardScaler', function () { diff --git a/src/serialize.ts b/src/serialize.ts deleted file mode 100644 index 2ef36c94..00000000 --- a/src/serialize.ts +++ /dev/null @@ -1,50 +0,0 @@ -/** - * A Generic class to serialized and Unserialized classes (models, transformers, - * or any operator) - */ - -import { tf } from './shared/globals' -export default class Serialize { - public name = 'Serialize' // default name for all inherited class - - /** - * Serialize all [inherited] class property into - * a json string - * @returns Json string - */ - public toJson(): string | Promise { - const thisCopy: any = Object.assign({}, this) - for (const key of Object.keys(thisCopy)) { - let value = thisCopy[key] - if (value instanceof tf.Tensor) { - thisCopy[key] = { - type: 'Tensor', - value: value.arraySync() - } - } - } - return JSON.stringify(thisCopy) - } - - /** - * Initialize [inherited] class from serialized - * json string - * @param model string - * @returns [Inherited] Class - */ - public fromJson(model: string) { - let jsonClass = JSON.parse(model) - if (jsonClass.name != this.name) { - throw new Error(`wrong json values for ${this.name} constructor`) - } - - for (let key of Object.keys(jsonClass)) { - let value = jsonClass[key] - if (typeof value === 'object' && value?.type === 'Tensor') { - jsonClass[key] = tf.tensor(jsonClass[key].value) - } - } - - return Object.assign(this, jsonClass) as this - } -} diff --git a/src/simpleSerializer.ts b/src/simpleSerializer.ts index ca8ee217..bcd8d714 100644 --- a/src/simpleSerializer.ts +++ b/src/simpleSerializer.ts @@ -91,7 +91,9 @@ export async function toObjectInner( } // Array case if (Array.isArray(val)) { - return await Promise.all(val.map(async (el) => await toObjectInner(el))) + return await Promise.all( + val.map(async (el) => await toObjectInner(el, ignoreKeys)) + ) } // Serialize a Tensor @@ -128,12 +130,6 @@ export async function toObjectInner( } } - if (EstimatorList.includes(val.name)) { - if (val.toObject) { - return val.toObject() - } - } - // Generic object case / class case let response: any = {} for (let key of Object.keys(val)) { @@ -145,7 +141,7 @@ export async function toObjectInner( // if (typeof val[key] === 'function') { // continue // } - response[key] = await toObjectInner(val[key]) + response[key] = await toObjectInner(val[key], ignoreKeys) } return response } @@ -212,10 +208,16 @@ export async function fromObject(val: any): Promise { } } +let ignoredKeysForSGDRegressor = [ + 'modelCompileArgs', + 'modelFitArgs', + 'denseLayerArgs' +] + export class Serialize { - async toObject(ignoreKeys: string[] = []): Promise { + async toObject(): Promise { try { - return await toObjectInner(this, ignoreKeys) + return await toObjectInner(this, ignoredKeysForSGDRegressor) } catch (e) { console.error(e) } diff --git a/src/tree/Criterion.test.ts b/src/tree/Criterion.test.ts index e2ed5a32..f9ab852c 100644 --- a/src/tree/Criterion.test.ts +++ b/src/tree/Criterion.test.ts @@ -1,5 +1,5 @@ import { ClassificationCriterion, giniCoefficient, entropy } from './Criterion' -import { toObject, fromObject } from '../simpleSerializer' +import { fromObject } from '../simpleSerializer' describe('Criterion', function () { let X = [ [-2, -1], @@ -83,7 +83,7 @@ describe('Criterion', function () { y }) criterion.init(0, 6, sampleMap) - const serial = await toObject(criterion) + const serial = await criterion.toObject() const newCriterion = await fromObject(serial) expect(newCriterion.nodeImpurity()).toEqual(1) }, 1000) diff --git a/src/tree/Criterion.ts b/src/tree/Criterion.ts index 3f57c2de..ef846390 100644 --- a/src/tree/Criterion.ts +++ b/src/tree/Criterion.ts @@ -1,6 +1,5 @@ -import { assert } from '../typesUtils' import { int } from '../randUtils' -import Serialize from '../serialize' +import { Serialize } from '../simpleSerializer' export type ImpurityMeasure = 'gini' | 'entropy' | 'squared_error' diff --git a/src/tree/DecisionTree.test.ts b/src/tree/DecisionTree.test.ts index d2fc7728..c5933bd1 100644 --- a/src/tree/DecisionTree.test.ts +++ b/src/tree/DecisionTree.test.ts @@ -1,7 +1,7 @@ import { DecisionTreeClassifier, DecisionTreeRegressor } from './DecisionTree' import { dataUrls } from '../datasets/datasets' import * as dfd from 'danfojs-node' -import { toObject, fromObject } from '../simpleSerializer' +import { fromObject } from '../simpleSerializer' describe('DecisionTree', function () { it('Use the DecisionTree (toy)', async function () { @@ -621,7 +621,7 @@ describe('DecisionTree', function () { let tree_classifier = new DecisionTreeClassifier() tree_classifier.fit(X, y) - const serial = await toObject(tree_classifier) + const serial = await tree_classifier.toObject() const newTree = await fromObject(serial) expect(newTree.predict(T)).toEqual(true_result) }, 1000) diff --git a/src/tree/DecisionTree.ts b/src/tree/DecisionTree.ts index a4659451..8e654f6a 100644 --- a/src/tree/DecisionTree.ts +++ b/src/tree/DecisionTree.ts @@ -8,7 +8,7 @@ import { validateX, validateY } from './utils' import { Scikit1D, Scikit2D } from '../types' import { convertScikit2DToArray, convertScikit1DToArray } from '../utils' import { LabelEncoder } from '../preprocessing/LabelEncoder' -import Serialize from '../serialize' +import { Serialize } from '../simpleSerializer' /* Next steps: @@ -155,6 +155,7 @@ export class DecisionTreeBase extends Serialize { X: number[][] = [] y: number[] = [] labelEncoder?: LabelEncoder + name: string constructor({ criterion = 'gini', @@ -300,37 +301,6 @@ export class DecisionTreeBase extends Serialize { this.tree.populateChildIds() this.tree.isBuilt = true } - - public toJson(): string { - const jsonClass = JSON.parse(super.toJson() as string) - - if (this.splitter) { - jsonClass.splitter = this.splitter.toJson() as string - } - if (this.labelEncoder) { - jsonClass.labelEncoder = this.labelEncoder.toJson() - } - return JSON.stringify(jsonClass) - } - - public fromJson(model: string) { - const jsonClass = JSON.parse(model) - - if (jsonClass.tree) { - const tree = new DecisionTree() - jsonClass.tree = Object.assign(tree, jsonClass.tree) - } - - if (jsonClass.splitter) { - jsonClass.splitter = Splitter.fromJson(jsonClass.splitter) - } - if (jsonClass.labelEncoder) { - jsonClass.labelEncoder = new LabelEncoder().fromJson( - jsonClass.labelEncoder - ) - } - return Object.assign(this, jsonClass) as this - } } export interface DecisionTreeClassifierParams { diff --git a/src/tree/Splitter.test.ts b/src/tree/Splitter.test.ts index a8b164f1..35481658 100644 --- a/src/tree/Splitter.test.ts +++ b/src/tree/Splitter.test.ts @@ -1,6 +1,6 @@ import { ImpurityMeasure } from './Criterion' import { Splitter } from './Splitter' -import { toObject, fromObject } from '../simpleSerializer' +import { fromObject } from '../simpleSerializer' describe('Splitter', function () { let types = ['gini', 'entropy', 'squared_error'] @@ -203,7 +203,7 @@ describe('Splitter', function () { samplesSubset: [] }) splitter.splitNode() - const serial = await toObject(splitter) + const serial = await splitter.toObject() const newSplitter = await fromObject(serial) const newBestSplitter = newSplitter.splitNode() expect(newBestSplitter.foundSplit).toEqual(true) diff --git a/src/tree/Splitter.ts b/src/tree/Splitter.ts index c53750a9..8a9c9be8 100644 --- a/src/tree/Splitter.ts +++ b/src/tree/Splitter.ts @@ -5,7 +5,7 @@ import { } from './Criterion' import shuffle from 'lodash/shuffle' import { int } from '../randUtils' -import Serialize from '../serialize' +import { Serialize } from '../simpleSerializer' export interface Split { feature: int @@ -215,44 +215,4 @@ export class Splitter extends Serialize { return currentSplit } } - - public toJson(): string { - const jsonClass = JSON.parse(super.toJson() as string) - - if (jsonClass.criterion) { - jsonClass.criterion = this.criterion.toJson() as string - } - if (this.sampleMap) jsonClass.sampleMap = Array.from(this.sampleMap) - return JSON.stringify(jsonClass) - } - - static fromJson(model: string) { - const jsonClass = JSON.parse(model) - - if (jsonClass.criterion) { - const criterionName = JSON.parse(jsonClass.criterion).name - if (criterionName == 'classificationCriterion') { - jsonClass.criterion = ClassificationCriterion.fromJson( - jsonClass.criterion - ) - } else { - jsonClass.criterion = RegressionCriterion.fromJson(jsonClass.criterion) - } - } - - if (jsonClass.sampleMap) { - jsonClass.sampleMap = new Int32Array(jsonClass.sampleMap) - } - - const splitter = new Splitter({ - X: jsonClass.X, - y: jsonClass.y, - minSamplesLeaf: jsonClass.minSamplesLeaf, - impurityMeasure: 'squared_error', - maxFeatures: jsonClass.maxFeatures, - samplesSubset: jsonClass.samplesSubset - }) - - return Object.assign(splitter, jsonClass) as Splitter - } } From 260c1347d7c06c494b24b813c00c11d68c4da354 Mon Sep 17 00:00:00 2001 From: Dan Crescimanno Date: Sun, 8 May 2022 09:17:24 -0700 Subject: [PATCH 3/4] feat: updated test --- docs/convert.js | 1 - package-lock.json | 14 ++++++++++++++ package.json | 1 + src/cluster/KMeans.test.ts | 6 +++--- src/compose/ColumnTransformer.test.ts | 6 +++--- src/dummy/DummyClassifier.test.ts | 6 +++--- src/dummy/DummyRegressor.test.ts | 6 +++--- src/ensemble/VotingClassifier.test.ts | 6 +++--- src/ensemble/VotingRegressor.test.ts | 6 +++--- src/impute/SimpleImputer.test.ts | 6 +++--- src/index.ts | 2 +- src/linear_model/LinearRegression.test.ts | 6 +++--- src/linear_model/LogisticRegression.test.ts | 6 +++--- src/model_selection/KFold.test.ts | 2 +- src/naive_bayes/GaussianNB.test.ts | 6 +++--- src/neighbors/KNeighborsClassifier.test.ts | 2 +- src/pipeline/Pipeline.test.ts | 6 +++--- src/preprocessing/MaxAbsScaler.test.ts | 6 +++--- src/preprocessing/MinMaxScaler.test.ts | 6 +++--- src/simpleSerializer.ts | 13 +++++++++++-- src/tree/Criterion.test.ts | 6 +++--- src/tree/DecisionTree.test.ts | 6 +++--- src/tree/Splitter.test.ts | 6 +++--- 23 files changed, 77 insertions(+), 54 deletions(-) diff --git a/docs/convert.js b/docs/convert.js index ef76d67e..ddd5c88d 100644 --- a/docs/convert.js +++ b/docs/convert.js @@ -189,7 +189,6 @@ function getTypeName(val, bigObj) { } function generateProperties(jsonClass, bigObj) { - // console.log(jsonClass.children) let interface = getInterfaceForClass(jsonClass, bigObj) let allConstructorArgs = [] if (interface && interface.children) { diff --git a/package-lock.json b/package-lock.json index c40cd259..131322a0 100644 --- a/package-lock.json +++ b/package-lock.json @@ -12,6 +12,7 @@ "dependencies": { "@tensorflow/tfjs": "^3.16.0", "@tensorflow/tfjs-node": "^3.16.0", + "base64-arraybuffer": "^1.0.2", "lodash": "^4.17.21", "mathjs": "^10.0.0", "simple-statistics": "^7.7.0" @@ -4757,6 +4758,14 @@ "version": "1.0.2", "license": "MIT" }, + "node_modules/base64-arraybuffer": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/base64-arraybuffer/-/base64-arraybuffer-1.0.2.tgz", + "integrity": "sha512-I3yl4r9QB5ZRY3XuJVEPfc2XhZO6YweFPI+UovAzn+8/hb3oJ6lnysaFcjVpkCPfVWFUDvoZ8kmVDP7WyRtYtQ==", + "engines": { + "node": ">= 0.6.0" + } + }, "node_modules/base64id": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/base64id/-/base64id-2.0.0.tgz", @@ -19938,6 +19947,11 @@ "balanced-match": { "version": "1.0.2" }, + "base64-arraybuffer": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/base64-arraybuffer/-/base64-arraybuffer-1.0.2.tgz", + "integrity": "sha512-I3yl4r9QB5ZRY3XuJVEPfc2XhZO6YweFPI+UovAzn+8/hb3oJ6lnysaFcjVpkCPfVWFUDvoZ8kmVDP7WyRtYtQ==" + }, "base64id": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/base64id/-/base64id-2.0.0.tgz", diff --git a/package.json b/package.json index 2360944b..071963eb 100644 --- a/package.json +++ b/package.json @@ -51,6 +51,7 @@ "dependencies": { "@tensorflow/tfjs": "^3.16.0", "@tensorflow/tfjs-node": "^3.16.0", + "base64-arraybuffer": "^1.0.2", "lodash": "^4.17.21", "mathjs": "^10.0.0", "simple-statistics": "^7.7.0" diff --git a/src/cluster/KMeans.test.ts b/src/cluster/KMeans.test.ts index ac737fc5..b5f3ce2a 100644 --- a/src/cluster/KMeans.test.ts +++ b/src/cluster/KMeans.test.ts @@ -1,4 +1,4 @@ -import { fromObject, KMeans } from '../index' +import { fromJSON, KMeans } from '../index' // Next steps: Improve on kmeans cluster testing describe('KMeans', () => { const X = [ @@ -68,8 +68,8 @@ describe('KMeans', () => { ] const kmean = new KMeans({ nClusters: 2, randomState: 0 }) kmean.fit(X) - const ksave = await kmean.toObject() - const ksaveModel = await fromObject(ksave) + const ksave = await kmean.toJSON() + const ksaveModel = await fromJSON(ksave) expect(centroids).toEqual(ksaveModel.clusterCenters.arraySync()) }) diff --git a/src/compose/ColumnTransformer.test.ts b/src/compose/ColumnTransformer.test.ts index 9dd79e1b..b31a918e 100644 --- a/src/compose/ColumnTransformer.test.ts +++ b/src/compose/ColumnTransformer.test.ts @@ -1,5 +1,5 @@ import { - fromObject, + fromJSON, SimpleImputer, MinMaxScaler, ColumnTransformer @@ -50,8 +50,8 @@ describe('ColumnTransformer', function () { }) transformer.fitTransform(newDf) - let obj = await transformer.toObject() - let myResult = await fromObject(obj) + let obj = await transformer.toJSON() + let myResult = await fromJSON(obj) expect(myResult.transformers.length).toEqual(2) }) diff --git a/src/dummy/DummyClassifier.test.ts b/src/dummy/DummyClassifier.test.ts index 9905da94..b1f0e920 100644 --- a/src/dummy/DummyClassifier.test.ts +++ b/src/dummy/DummyClassifier.test.ts @@ -1,4 +1,4 @@ -import { DummyClassifier, fromObject } from '../index' +import { DummyClassifier, fromJSON } from '../index' describe('DummyClassifier', function () { it('Use DummyClassifier on simple example (mostFrequent)', function () { const clf = new DummyClassifier() @@ -84,8 +84,8 @@ describe('DummyClassifier', function () { const y = [10, 20, 20, 30] clf.fit(X, y) - const clfSave = await clf.toObject() - const newClf = await fromObject(clfSave) + const clfSave = await clf.toJSON() + const newClf = await fromJSON(clfSave) expect(clf).toEqual(newClf) }) }) diff --git a/src/dummy/DummyRegressor.test.ts b/src/dummy/DummyRegressor.test.ts index 6cc01a50..6c79feb7 100644 --- a/src/dummy/DummyRegressor.test.ts +++ b/src/dummy/DummyRegressor.test.ts @@ -1,4 +1,4 @@ -import { DummyRegressor, fromObject } from '../index' +import { DummyRegressor, fromJSON } from '../index' describe('DummyRegressor', function () { it('Use DummyRegressor on simple example (mean)', function () { @@ -93,8 +93,8 @@ describe('DummyRegressor', function () { ] reg.fit(X, y) - const saveReg = await reg.toObject() - const newReg = await fromObject(saveReg) + const saveReg = await reg.toJSON() + const newReg = await fromJSON(saveReg) expect(newReg.predict(predictX).arraySync()).toEqual([10, 10, 10]) }) diff --git a/src/ensemble/VotingClassifier.test.ts b/src/ensemble/VotingClassifier.test.ts index 60f43da7..cafb9973 100644 --- a/src/ensemble/VotingClassifier.test.ts +++ b/src/ensemble/VotingClassifier.test.ts @@ -3,7 +3,7 @@ import { VotingClassifier, DummyClassifier, LogisticRegression, - fromObject + fromJSON } from '../index' describe('VotingClassifier', function () { @@ -121,8 +121,8 @@ describe('VotingClassifier', function () { await voter.fit(X, y) - const savedModel = await voter.toObject() - const newModel = await fromObject(savedModel) + const savedModel = await voter.toJSON() + const newModel = await fromJSON(savedModel) expect(newModel.predict(X).arraySync()).toEqual([1, 1, 1, 1, 1]) }, 30000) diff --git a/src/ensemble/VotingRegressor.test.ts b/src/ensemble/VotingRegressor.test.ts index e0060196..7782ab86 100644 --- a/src/ensemble/VotingRegressor.test.ts +++ b/src/ensemble/VotingRegressor.test.ts @@ -1,7 +1,7 @@ import { makeVotingRegressor, VotingRegressor, - fromObject, + fromJSON, DummyRegressor, LinearRegression } from '../index' @@ -55,8 +55,8 @@ describe('VotingRegressor', function () { await voter.fit(X, y) - const savedModel = await voter.toObject() - const newModel = await fromObject(savedModel) + const savedModel = await voter.toJSON() + const newModel = await fromJSON(savedModel) expect(newModel.score(X, y)).toEqual(voter.score(X, y)) }, 30000) }) diff --git a/src/impute/SimpleImputer.test.ts b/src/impute/SimpleImputer.test.ts index 2bad5c7b..d9957b5e 100644 --- a/src/impute/SimpleImputer.test.ts +++ b/src/impute/SimpleImputer.test.ts @@ -1,5 +1,5 @@ import { tf } from '../shared/globals' -import { SimpleImputer, fromObject } from '../index' +import { SimpleImputer, fromJSON } from '../index' describe('SimpleImputer', function () { it('Imputes with "constant" strategy 2D one column. In this strategy, we give the fill value', function () { @@ -163,8 +163,8 @@ describe('SimpleImputer', function () { ] imputer.fitTransform(data) - const thing = await imputer.toObject() - const newImputer = await fromObject(thing) + const thing = await imputer.toJSON() + const newImputer = await fromJSON(thing) const newReturned = newImputer.transform(data) expect(newReturned.arraySync()).toEqual(expected) expect(newImputer.transform([[NaN, NaN]]).arraySync()).toEqual([[4, 3]]) diff --git a/src/index.ts b/src/index.ts index 532f8be3..95c1e491 100644 --- a/src/index.ts +++ b/src/index.ts @@ -87,7 +87,7 @@ export { export { KFold } from './model_selection/KFold' export { trainTestSplit } from './model_selection/trainTestSplit' export { crossValScore } from './model_selection/crossValScore' -export { fromObject, Serialize } from './simpleSerializer' +export { fromObject, fromJSON, Serialize } from './simpleSerializer' export { ClassificationCriterion, RegressionCriterion } from './tree/Criterion' export { Splitter } from './tree/Splitter' diff --git a/src/linear_model/LinearRegression.test.ts b/src/linear_model/LinearRegression.test.ts index d4ca20a6..1a85e235 100644 --- a/src/linear_model/LinearRegression.test.ts +++ b/src/linear_model/LinearRegression.test.ts @@ -1,4 +1,4 @@ -import { LinearRegression, fromObject } from '../index' +import { LinearRegression, fromJSON } from '../index' import { tensorEqual } from '../utils' import { tf } from '../shared/globals' function roughlyEqual(a: number, b: number, tol = 0.1) { @@ -147,8 +147,8 @@ describe('LinearRegression', function () { const lr = new LinearRegression({ fitIntercept: false }) await lr.fit(mediumX, yPlusJitter) - const serialized = await lr.toObject() - const newModel = await fromObject(serialized) + const serialized = await lr.toJSON() + const newModel = await fromJSON(serialized) expect(tensorEqual(newModel.coef, tf.tensor1d([2.5, 1]), 0.1)).toBe(true) expect(roughlyEqual(newModel.intercept as number, 0)).toBe(true) diff --git a/src/linear_model/LogisticRegression.test.ts b/src/linear_model/LogisticRegression.test.ts index 03243db3..be3fe8e1 100644 --- a/src/linear_model/LogisticRegression.test.ts +++ b/src/linear_model/LogisticRegression.test.ts @@ -1,4 +1,4 @@ -import { LogisticRegression, fromObject } from '../index' +import { LogisticRegression, fromJSON } from '../index' import { tf } from '../shared/globals' describe('LogisticRegression', function () { it('Works on arrays (small example)', async function () { @@ -132,8 +132,8 @@ describe('LogisticRegression', function () { let logreg = new LogisticRegression({ penalty: 'l2' }) await logreg.fit(X, y) - const serializeModel = await logreg.toObject() - const newModel = await fromObject(serializeModel) + const serializeModel = await logreg.toJSON() + const newModel = await fromJSON(serializeModel) const newModelResult = newModel.predict(Xtest) expect(newModelResult.arraySync()).toEqual([0, 0, 0, 0, 0, 0, 2, 2, 2]) diff --git a/src/model_selection/KFold.test.ts b/src/model_selection/KFold.test.ts index 625e0c0d..3b0678fb 100644 --- a/src/model_selection/KFold.test.ts +++ b/src/model_selection/KFold.test.ts @@ -14,7 +14,7 @@ */ import * as fc from 'fast-check' -import { KFold } from './KFold' +import { KFold } from '../index' import { alea } from '../randUtils' import '../jestTensorMatchers' import { tf } from '../shared/globals' diff --git a/src/naive_bayes/GaussianNB.test.ts b/src/naive_bayes/GaussianNB.test.ts index 093f2151..5cdfc9bb 100644 --- a/src/naive_bayes/GaussianNB.test.ts +++ b/src/naive_bayes/GaussianNB.test.ts @@ -12,7 +12,7 @@ * limitations under the License. * ========================================================================== */ -import { GaussianNB, fromObject } from '../index' +import { GaussianNB, fromJSON } from '../index' describe('GaussianNB', function () { it('without priors', async () => { @@ -101,8 +101,8 @@ describe('GaussianNB', function () { await model.fit(X, y) model.predict(X) - const serializeModel = await model.toObject() - const newModel = await fromObject(serializeModel) + const serializeModel = await model.toJSON() + const newModel = await fromJSON(serializeModel) expect(newModel.predict(X).arraySync()).toEqual([0, 0, 1, 1, 1]) }) }) diff --git a/src/neighbors/KNeighborsClassifier.test.ts b/src/neighbors/KNeighborsClassifier.test.ts index 559a72df..b69464fc 100644 --- a/src/neighbors/KNeighborsClassifier.test.ts +++ b/src/neighbors/KNeighborsClassifier.test.ts @@ -13,7 +13,7 @@ * ========================================================================== */ -import { KNeighborsClassifier } from './KNeighborsClassifier' +import { KNeighborsClassifier } from '../index' import { KNeighborsParams } from './KNeighborsBase' import { dataUrls } from '../datasets/datasets' import { crossValScore } from '../model_selection/crossValScore' diff --git a/src/pipeline/Pipeline.test.ts b/src/pipeline/Pipeline.test.ts index 18e77d1c..8a9ac2ec 100644 --- a/src/pipeline/Pipeline.test.ts +++ b/src/pipeline/Pipeline.test.ts @@ -4,7 +4,7 @@ import { LinearRegression, SimpleImputer, MinMaxScaler, - fromObject + fromJSON } from '../index' import { tf } from '../shared/globals' import { tensorEqual } from '../utils' @@ -100,8 +100,8 @@ describe('Pipeline', function () { await pipeline.fit(X, y) - const saveModel = await pipeline.toObject() - const newPipeLine = await fromObject(saveModel) + const saveModel = await pipeline.toJSON() + const newPipeLine = await fromJSON(saveModel) expect(newPipeLine.steps[1][1].min.arraySync()).toEqual([0, 0]) expect( diff --git a/src/preprocessing/MaxAbsScaler.test.ts b/src/preprocessing/MaxAbsScaler.test.ts index 91781bab..f742bdcc 100644 --- a/src/preprocessing/MaxAbsScaler.test.ts +++ b/src/preprocessing/MaxAbsScaler.test.ts @@ -1,4 +1,4 @@ -import { MaxAbsScaler, fromObject } from '../index' +import { MaxAbsScaler, fromJSON } from '../index' import * as dfd from 'danfojs-node' import { tf } from '../shared/globals' import { arrayEqual } from '../utils' @@ -139,8 +139,8 @@ describe('MaxAbsScaler', function () { const data = tf.tensor2d([4, 4, 'whoops', 3, 3] as any, [5, 1]) const scaler = new MaxAbsScaler() scaler.fit(data) - const serial = await scaler.toObject() - const newModel = await fromObject(serial) + const serial = await scaler.toJSON() + const newModel = await fromJSON(serial) expect(newModel.transform(data).arraySync().flat()).toEqual([ 1, 1, diff --git a/src/preprocessing/MinMaxScaler.test.ts b/src/preprocessing/MinMaxScaler.test.ts index b0bf1420..a4efd446 100644 --- a/src/preprocessing/MinMaxScaler.test.ts +++ b/src/preprocessing/MinMaxScaler.test.ts @@ -1,4 +1,4 @@ -import { MinMaxScaler, fromObject } from '../index' +import { MinMaxScaler, fromJSON } from '../index' import * as dfd from 'danfojs-node' import { isDataFrameInterface, isSeriesInterface } from '../typesUtils' import { ScikitVecOrMatrix } from '../types' @@ -164,8 +164,8 @@ describe('MinMaxscaler', function () { const data = tf.tensor2d([4, 4, 'whoops', 3, 3] as any, [5, 1]) const scaler = new MinMaxScaler() scaler.fit(data) - const serial = await scaler.toObject() - const newModel = await fromObject(serial) + const serial = await scaler.toJSON() + const newModel = await fromJSON(serial) expect(newModel.transform(data).arraySync().flat()).toEqual([ 1, 1, diff --git a/src/simpleSerializer.ts b/src/simpleSerializer.ts index bcd8d714..6b54b764 100644 --- a/src/simpleSerializer.ts +++ b/src/simpleSerializer.ts @@ -1,5 +1,5 @@ import { tf } from './shared/globals' - +import { encode, decode } from 'base64-arraybuffer' const EstimatorList = [ 'KNeighborsRegressor', 'LinearRegression', @@ -48,6 +48,7 @@ class JSONHandler { async save(artifacts: any) { // Base 64 encoding + artifacts.weightData = encode(artifacts.weightData) this.savedArtifacts = artifacts return { modelArtifactsInfo: { @@ -62,6 +63,7 @@ class JSONHandler { async load() { // Base64 decode + this.savedArtifacts.weightData = decode(this.savedArtifacts.weightData) return this.savedArtifacts } } @@ -70,7 +72,6 @@ export async function toObjectInner( val: any, ignoreKeys: string[] = [] ): Promise { - // console.log(val) if (['number', 'string', 'undefined', 'boolean'].includes(typeof val)) { return val } @@ -208,6 +209,10 @@ export async function fromObject(val: any): Promise { } } +export async function fromJSON(val: string): Promise { + return await fromObject(JSON.parse(val)) +} + let ignoredKeysForSGDRegressor = [ 'modelCompileArgs', 'modelFitArgs', @@ -222,4 +227,8 @@ export class Serialize { console.error(e) } } + + async toJSON(): Promise { + return JSON.stringify(await this.toObject()) + } } diff --git a/src/tree/Criterion.test.ts b/src/tree/Criterion.test.ts index f9ab852c..76e88960 100644 --- a/src/tree/Criterion.test.ts +++ b/src/tree/Criterion.test.ts @@ -1,5 +1,5 @@ import { ClassificationCriterion, giniCoefficient, entropy } from './Criterion' -import { fromObject } from '../simpleSerializer' +import { fromJSON } from '../simpleSerializer' describe('Criterion', function () { let X = [ [-2, -1], @@ -83,8 +83,8 @@ describe('Criterion', function () { y }) criterion.init(0, 6, sampleMap) - const serial = await criterion.toObject() - const newCriterion = await fromObject(serial) + const serial = await criterion.toJSON() + const newCriterion = await fromJSON(serial) expect(newCriterion.nodeImpurity()).toEqual(1) }, 1000) }) diff --git a/src/tree/DecisionTree.test.ts b/src/tree/DecisionTree.test.ts index c5933bd1..71652fac 100644 --- a/src/tree/DecisionTree.test.ts +++ b/src/tree/DecisionTree.test.ts @@ -1,7 +1,7 @@ import { DecisionTreeClassifier, DecisionTreeRegressor } from './DecisionTree' import { dataUrls } from '../datasets/datasets' import * as dfd from 'danfojs-node' -import { fromObject } from '../simpleSerializer' +import { fromJSON } from '../simpleSerializer' describe('DecisionTree', function () { it('Use the DecisionTree (toy)', async function () { @@ -621,8 +621,8 @@ describe('DecisionTree', function () { let tree_classifier = new DecisionTreeClassifier() tree_classifier.fit(X, y) - const serial = await tree_classifier.toObject() - const newTree = await fromObject(serial) + const serial = await tree_classifier.toJSON() + const newTree = await fromJSON(serial) expect(newTree.predict(T)).toEqual(true_result) }, 1000) }) diff --git a/src/tree/Splitter.test.ts b/src/tree/Splitter.test.ts index 35481658..620aa37e 100644 --- a/src/tree/Splitter.test.ts +++ b/src/tree/Splitter.test.ts @@ -1,6 +1,6 @@ import { ImpurityMeasure } from './Criterion' import { Splitter } from './Splitter' -import { fromObject } from '../simpleSerializer' +import { fromJSON } from '../simpleSerializer' describe('Splitter', function () { let types = ['gini', 'entropy', 'squared_error'] @@ -203,8 +203,8 @@ describe('Splitter', function () { samplesSubset: [] }) splitter.splitNode() - const serial = await splitter.toObject() - const newSplitter = await fromObject(serial) + const serial = await splitter.toJSON() + const newSplitter = await fromJSON(serial) const newBestSplitter = newSplitter.splitNode() expect(newBestSplitter.foundSplit).toEqual(true) expect(newBestSplitter.feature).toEqual(1) From 973c3fb255875d98b334e1a9c8c53ae3aca422b1 Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Sun, 8 May 2022 16:34:04 +0000 Subject: [PATCH 4/4] chore(release): 1.21.0 [skip ci] # [1.21.0](https://github.com/javascriptdata/scikit.js/compare/v1.20.0...v1.21.0) (2022-05-08) ### Features * not complete serialization ([7fbea07](https://github.com/javascriptdata/scikit.js/commit/7fbea07431dbd194259e44e868df74617814f6de)) * updated serialization ([ec71323](https://github.com/javascriptdata/scikit.js/commit/ec713230efbf07ac228c0291c21f5fcc0b1bd995)) * updated test ([260c134](https://github.com/javascriptdata/scikit.js/commit/260c1347d7c06c494b24b813c00c11d68c4da354)) --- CHANGELOG.md | 9 +++++++++ package-lock.json | 4 ++-- package.json | 2 +- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ea38e816..c8317c9b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,12 @@ +# [1.21.0](https://github.com/javascriptdata/scikit.js/compare/v1.20.0...v1.21.0) (2022-05-08) + + +### Features + +* not complete serialization ([7fbea07](https://github.com/javascriptdata/scikit.js/commit/7fbea07431dbd194259e44e868df74617814f6de)) +* updated serialization ([ec71323](https://github.com/javascriptdata/scikit.js/commit/ec713230efbf07ac228c0291c21f5fcc0b1bd995)) +* updated test ([260c134](https://github.com/javascriptdata/scikit.js/commit/260c1347d7c06c494b24b813c00c11d68c4da354)) + # [1.20.0](https://github.com/javascriptdata/scikit.js/compare/v1.19.0...v1.20.0) (2022-04-26) diff --git a/package-lock.json b/package-lock.json index 131322a0..83c3e9fd 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "scikitjs", - "version": "1.20.0", + "version": "1.21.0", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "scikitjs", - "version": "1.20.0", + "version": "1.21.0", "hasInstallScript": true, "license": "ISC", "dependencies": { diff --git a/package.json b/package.json index 071963eb..339f817f 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "scikitjs", - "version": "1.20.0", + "version": "1.21.0", "description": "Scikit-Learn for JS", "output": { "node": "dist/node/index.js",