From 411010b43b66035059dc42b315fe47e79587004d Mon Sep 17 00:00:00 2001 From: Yaw Joseph Etse Date: Wed, 3 Aug 2022 22:42:18 -0400 Subject: [PATCH] fix: added getModel validation with no inputs/outputs --- src/__test__/mock_automl_data.ts | 233 +++++++++++++++++++++++++++++++ src/automl.test.ts | 83 ++++++++++- src/jsonm.ts | 8 +- 3 files changed, 315 insertions(+), 9 deletions(-) diff --git a/src/__test__/mock_automl_data.ts b/src/__test__/mock_automl_data.ts index 4ed7c8e..55af6dd 100644 --- a/src/__test__/mock_automl_data.ts +++ b/src/__test__/mock_automl_data.ts @@ -1071,4 +1071,237 @@ export const autoMLdata = { 1, 4 ] +}; + +export const autoMLdataSM = { + "outputs": 5, + "rowRange": [ + 1, + 151 + ], + "colRange": [ + 1, + 5 + ], + "data": [ + [ + "sepal_length_cm", + "sepal_width_cm", + "petal_length_cm", + "petal_width_cm", + "plant" + ], + [ + 5.1, + 3.5, + 1.4, + 0.2, + "Iris-setosa" + ], + [ + 4.9, + 3, + 1.4, + 0.2, + "Iris-setosa" + ], + [ + 4.7, + 3.2, + 1.3, + 0.2, + "Iris-setosa" + ], + [ + 4.6, + 3.1, + 1.5, + 0.2, + "Iris-setosa" + ], + [ + 6, + 2.7, + 5.1, + 1.6, + "Iris-versicolor" + ], + [ + 5.4, + 3, + 4.5, + 1.5, + "Iris-versicolor" + ], + [ + 6, + 3.4, + 4.5, + 1.6, + "Iris-versicolor" + ], + [ + 6.7, + 3.1, + 4.7, + 1.5, + "Iris-versicolor" + ], + [ + 6.3, + 2.3, + 4.4, + 1.3, + "Iris-versicolor" + ], + [ + 6.3, + 2.5, + 5, + 1.9, + "Iris-virginica" + ], + [ + 6.5, + 3, + 5.2, + 2, + "Iris-virginica" + ], + [ + 6.2, + 3.4, + 5.4, + 2.3, + "Iris-virginica" + ], + [ + 5.9, + 3, + 5.1, + 1.8, + "Iris-virginica" + ] + ], + "inputs": [ + 1, + 4 + ] +}; + + +export const autoMLdataTNE = { + "outputs": 5, + "rowRange": [ + 1, + 151 + ], + "colRange": [ + 1, + 5 + ], + "data": [ + [ + "sepal_length_cm", + "sepal_width_cm", + "petal_length_cm", + "petal_width_cm", + "plant" + ], + [ + 5.1, + 3.5, + 1.4, + 0.2, + "Iris-setosa" + ], + [ + 4.9, + 3, + 1.4, + 0.2, + "Iris-setosa" + ], + [ + 4.7, + 3.2, + 1.3, + 0.2, + "Iris-setosa" + ], + [ + 4.6, + 3.1, + 1.5, + 0.2, + "" + ], + [ + 6, + 2.7, + 5.1, + 1.6, + "Iris-versicolor" + ], + [ + 5.4, + 3, + 4.5, + 1.5, + "Iris-versicolor" + ], + [ + 6, + 3.4, + 4.5, + 1.6, + "Iris-versicolor" + ], + [ + 6.7, + 3.1, + 4.7, + 1.5, + "Iris-versicolor" + ], + [ + 6.3, + 2.3, + 4.4, + 1.3, + "" + ], + [ + 6.3, + 2.5, + 5, + 1.9, + "Iris-virginica" + ], + [ + 6.5, + 3, + 5.2, + 2, + "Iris-virginica" + ], + [ + 6.2, + 3.4, + 5.4, + 2.3, + "Iris-virginica" + ], + [ + 5.9, + 3, + 5.1, + 1.8, + "" + ] + ], + "inputs": [ + 1, + 4 + ] }; \ No newline at end of file diff --git a/src/automl.test.ts b/src/automl.test.ts index 4784a66..2edfaca 100644 --- a/src/automl.test.ts +++ b/src/automl.test.ts @@ -5,7 +5,7 @@ import * as JSONM from './index'; import { ModelTypes } from './model'; import { toBeWithinRange, } from './jest.test'; expect.extend({ toBeWithinRange }); -import {autoMLdata} from './__test__/mock_automl_data' +import {autoMLdata, autoMLdataSM, autoMLdataTNE} from './__test__/mock_automl_data' import { Data } from '@jsonstack/data/src/DataSet'; import { setBackend } from './tensorflow_singleton'; import * as tf from '@tensorflow/tfjs-node'; @@ -66,8 +66,44 @@ describe('AutoML Sheets Test',()=>{ }); }) describe('mock end to end example',()=>{ - - it('should run a basic test from spreadsheet data',async ()=>{ + it('should run a basic test from spreadsheet with no prediction data',async ()=>{ + const on_progress = ({ + completion_percentage, + loss, + epoch, + logs, + status, + defaultLog, + }:TrainingProgressUpdate)=>{ + if(status!=='training') console.log({status,defaultLog}) + } + // const vectors = autoMLdata?.data.concat([]); + // const labels = vectors?.splice(0,1)[0] as string[]; + // const dataset = JSONM.Data.DataSet.reverseColumnMatrix({labels,vectors});\ + //@ts-ignore + const{vectors,labels,dataset}=getSpreadsheetDataset(autoMLdataSM?.data,{on_progress}); + //@ts-ignore + const {columns,inputs,outputs} = JSONM.getInputsOutputsFromDataset({dataset,labels, on_progress}); + const {trainingData,predictionData} = await splitTrainingPredictionData({ + inputs, + outputs, + data: dataset, + }); + try{ + const SpreadsheetModel = await getModel({ + type:'prediction', + inputs, + outputs, + dataset:trainingData, + //@ts-ignore + on_progress, + }); + await SpreadsheetModel.trainModel(); + } catch(e){ + expect(e).toBeInstanceOf(RangeError) + } + },30000) + it('should run a basic test from spreadsheet with small prediction data',async ()=>{ const on_progress = ({ completion_percentage, loss, @@ -82,7 +118,7 @@ describe('AutoML Sheets Test',()=>{ // const labels = vectors?.splice(0,1)[0] as string[]; // const dataset = JSONM.Data.DataSet.reverseColumnMatrix({labels,vectors});\ //@ts-ignore - const{vectors,labels,dataset}=getSpreadsheetDataset(autoMLdata?.data,{on_progress}); + const{vectors,labels,dataset}=getSpreadsheetDataset(autoMLdataTNE?.data,{on_progress}); //@ts-ignore const {columns,inputs,outputs} = JSONM.getInputsOutputsFromDataset({dataset,labels, on_progress}); const {trainingData,predictionData} = await splitTrainingPredictionData({ @@ -90,7 +126,7 @@ describe('AutoML Sheets Test',()=>{ outputs, data: dataset, }); - // console.log({trainingData,predictionData}); + console.log({trainingData,predictionData}) const SpreadsheetModel = await getModel({ type:'prediction', inputs, @@ -100,8 +136,43 @@ describe('AutoML Sheets Test',()=>{ on_progress, }); await SpreadsheetModel.trainModel(); + },30000) + // describe('mock end to end example',()=>{ + // it('should run a basic test from spreadsheet data',async ()=>{ + // const on_progress = ({ + // completion_percentage, + // loss, + // epoch, + // logs, + // status, + // defaultLog, + // }:TrainingProgressUpdate)=>{ + // if(status!=='training') console.log({status,defaultLog}) + // } + // // const vectors = autoMLdata?.data.concat([]); + // // const labels = vectors?.splice(0,1)[0] as string[]; + // // const dataset = JSONM.Data.DataSet.reverseColumnMatrix({labels,vectors});\ + // //@ts-ignore + // const{vectors,labels,dataset}=getSpreadsheetDataset(autoMLdata?.data,{on_progress}); + // //@ts-ignore + // const {columns,inputs,outputs} = JSONM.getInputsOutputsFromDataset({dataset,labels, on_progress}); + // const {trainingData,predictionData} = await splitTrainingPredictionData({ + // inputs, + // outputs, + // data: dataset, + // }); + // // console.log({trainingData,predictionData}); + // const SpreadsheetModel = await getModel({ + // type:'prediction', + // inputs, + // outputs, + // dataset:trainingData, + // //@ts-ignore + // on_progress, + // }); + // await SpreadsheetModel.trainModel(); - },30000) + // },30000) }) }); diff --git a/src/jsonm.ts b/src/jsonm.ts index cbb317c..83dacbd 100644 --- a/src/jsonm.ts +++ b/src/jsonm.ts @@ -69,6 +69,8 @@ export async function getModelFromJSONM(jml?: JML): Promise { const trainingData = Array.isArray(jml.dataset) ? jml.dataset : await getDataSet(jml.dataset); + if(jml.outputs.length<1) throw new RangeError('Every model requires at least one output') + if(jml.inputs.length<1) throw new RangeError('Every model requires at least one input') return new ModelX({ trainingData, @@ -149,10 +151,10 @@ export function getModelOptions(jml?:JML,datum?:Datum){ } }) const dataset = await getDataSet(options?.data); - const {trainingData, predictionData} = dataset.reduce((result,datum)=>{ + const {trainingData, predictionData} = dataset.reduce((result,datum,idx)=>{ if(options?.outputs?.filter((output)=> isEmpty(datum[output]) - ).length) result.predictionData.push(datum); - else result.trainingData.push(datum); + ).length) result.predictionData.push({...datum,__original_dataset_index: idx}); + else result.trainingData.push({...datum,__original_dataset_index: idx}); return result; },{trainingData:[],predictionData:[],}) return {trainingData,predictionData}