Skip to content

Commit

Permalink
Integration Testing for forest model
Browse files Browse the repository at this point in the history
The changes in this iteration are improvements in test for forest model :

1. Post discussion last week, the regression test was removed ( `TestForestModel.py` )since it won't be useful when model performance improves. Rather, the structures of predictions is checked. This check is merged with TestForestModel.py

2. After #944 , `predict_labels_with_n` in `run_model.py` expectes a lists and then iterates over it. The forest model and rest of the tests were updated accordingly.
  • Loading branch information
humbleOldSage committed Feb 5, 2024
1 parent 052cb08 commit 104dd9a
Show file tree
Hide file tree
Showing 5 changed files with 100 additions and 172 deletions.
10 changes: 4 additions & 6 deletions emission/analysis/modelling/trip_model/forest_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,13 +103,11 @@ def predict(self, trip: List[float]) -> Tuple[List[Dict], int]:
#check if theres no trip to predict
logging.debug(f"forest classifier predict called with {len(trip)} trips")
if len(trip) == 0:
msg = f'model.predict cannot be called with an empty trips'
msg = f'model.predict cannot be called with an empty trip'
raise Exception(msg)
# CONVERT LIST OF TRIPS TO dataFrame
test_df = estb.BuiltinTimeSeries.to_data_df("analysis/confirmed_trip",trip)
labeled_trip_df = esdtq.filter_labeled_trips(test_df)
expanded_labeled_trip_df= esdtq.expand_userinputs(labeled_trip_df)
predcitions_df= self.model.predict(expanded_labeled_trip_df)
# CONVERT TRIP TO dataFrame
test_df = estb.BuiltinTimeSeries.to_data_df("analysis/confirmed_trip",[trip])
predcitions_df= self.model.predict(test_df)

# the predictions_df currently holds the highest probable options
# individually in all three categories. the predictions_df are in the form
Expand Down
151 changes: 0 additions & 151 deletions emission/tests/modellingTests/TestForestModel.py

This file was deleted.

94 changes: 84 additions & 10 deletions emission/tests/modellingTests/TestForestModelIntegration.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,87 @@
import emission.pipeline.intake_stage as epi
import logging

class TestLabelInferencePipeline(unittest.TestCase):
# It is important that these functions be deterministic

import emission.analysis.modelling.trip_model.run_model as eamur
import emission.analysis.modelling.trip_model.model_type as eamumt
import emission.analysis.modelling.trip_model.model_storage as eamums
import emission.tests.modellingTests.modellingTestAssets as etmm
import emission.storage.timeseries.abstract_timeseries as esta


class TestForestModelIntegration(unittest.TestCase):
# Test if the forest model for label prediction is smoothly integrated with the inference pipeline.
# In the initial setup, build a dummy forest model. Then run the pipeline on real example data.
# Finally in the test, assert the type of label predictions expected.

def setUp(self):

self.reset_all()
np.random.seed(91)
self.test_algorithms = eacilp.primary_algorithms

forest_model_config= {
"loc_feature" : "coordinates",
"radius": 500,
"size_thresh":1,
"purity_thresh":1.0,
"gamma":0.05,
"C":1,
"n_estimators":100,
"criterion":"gini",
"max_depth":'null',
"min_samples_split":2,
"min_samples_leaf":1,
"max_features":"sqrt",
"bootstrap":True,
"random_state":42,
"use_start_clusters":False,
"use_trip_clusters":True
}
etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-07-22") ##maybe use a different file
ts = esta.TimeSeries.get_time_series(self.testUUID)
label_data = {
"mode_confirm": ['ebike', 'bike'],
"purpose_confirm": ['happy-hour', 'dog-park'],
"replaced_mode": ['walk'],
"mode_weights": [0.9, 0.1],
"purpose_weights": [0.1, 0.9]
}

self.origin = (-105.1705977, 39.7402654,)
self.destination = (-105.1755606, 39.7673075)
self.min_trips = 14
self.total_trips = 100
self.clustered_trips = 33
self.has_label_percent = 0.9
## generate mock trips
train = etmm.generate_mock_trips(
user_id=self.testUUID,
trips=self.total_trips,
origin=self.origin,
destination=self.destination,
trip_part='od',
label_data=label_data,
within_threshold=self.clustered_trips,
threshold=0.004, # ~400m
has_label_p=self.has_label_percent
)
ts.bulk_insert(train)
# confirm data write did not fail
check_data = esda.get_entries(key="analysis/confirmed_trip", user_id=self.testUUID, time_query=None)
if len(check_data) != self.total_trips:
logging.debug(f'test invariant failed after generating test data')
self.fail()
else:
logging.debug(f'found {self.total_trips} trips in database')
## Build an already existing model or a new model
eamur.update_trip_model(
user_id=self.testUUID,
model_type=eamumt.ModelType.RANDOM_FOREST_CLASSIFIER,
model_storage=eamums.ModelStorage.DOCUMENT_DATABASE,
min_trips=4,
model_config=forest_model_config
)
## run inference pipeline
self.run_pipeline(self.test_algorithms)
time_range = estt.TimeQuery("metadata.write_ts", None, time.time())
self.inferred_trips = esda.get_entries(esda.INFERRED_TRIP_KEY, self.testUUID, time_query=time_range)
Expand All @@ -39,16 +110,19 @@ def run_pipeline(self, algorithms):
def reset_all(self):
etc.dropAllCollections(edb._get_current_db())

# Tests that algorithm being tested runs and saves to the database correctly
def testIndividualAlgorithms(self):
logging.debug('TEST1')
# Tests that forest algorithm being tested runs successfully
def testForestAlgorithm(self):
for trip in self.inferred_trips:
entries = esdt.get_sections_for_trip("inference/labels", self.testUUID, trip.get_id())
logging.debug(f"ENTRIES: {entries}")
self.assertEqual(len(entries), len(self.test_algorithms))
# for entry in entries:
# self.assertGreater(len(entry["data"]["prediction"]), 0)

for entry in entries:
self.assertGreater(len(entry["data"]["prediction"]), 0)
for singleprediction in entry["data"]["prediction"]:
self.assertIsInstance(singleprediction, dict, " should be an instance of the dictionary class")
self.assertIsInstance(singleprediction['labels'], dict, " should be an instance of the dictionary class")
self.assertIn('mode_confirm',singleprediction['labels'].keys())
self.assertIn('replaced_mode',singleprediction['labels'].keys())
self.assertIn('purpose_confirm',singleprediction['labels'].keys())

def main():
etc.configLogging()
Expand Down
8 changes: 4 additions & 4 deletions emission/tests/modellingTests/TestForestModelLoadandSave.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def testForestModelRoundTrip(self):
# logging.debug(f'Predictions on trips in database')

predictions_list = eamur.predict_labels_with_n(
trip_list = [test],
trip_list = test,
model=model
)

Expand All @@ -151,7 +151,7 @@ def testForestModelRoundTrip(self):

# logging.debug(f'Predictions on trips using deserialised model')
predictions_loaded_model_list = eamur.predict_labels_with_n(
trip_list = [test],
trip_list = test,
model=deserialized_model
)
# logging.debug(f'Assert that both predictions are the same')
Expand Down Expand Up @@ -184,7 +184,7 @@ def testForestModelConsistency(self):
# logging.debug(f' Model Predictions on trips in database')

predictions_list_model1 = eamur.predict_labels_with_n(
trip_list = [test],
trip_list = test,
model=model_iter1
)
# logging.debug(f' Loading Model again')
Expand All @@ -197,7 +197,7 @@ def testForestModelConsistency(self):
)
# logging.debug(f' Model Predictions on trips in database')
predictions_list_model2 = eamur.predict_labels_with_n(
trip_list = [test],
trip_list = test,
model=model_iter2
)

Expand Down
9 changes: 8 additions & 1 deletion emission/tests/modellingTests/TestRunForestModel.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,9 +183,16 @@ def test1RoundPredictForestModel(self):
)

predictions_list = eamur.predict_labels_with_n(
trip_list = [test],
trip_list = test,
model=model
)
for prediction, n in predictions_list:
[logging.debug(p) for p in sorted(prediction, key=lambda r: r['p'], reverse=True)]
self.assertNotEqual(len(prediction), 0, "should have a prediction")
self.assertIn('labels',prediction[0].keys())
self.assertIn('p',prediction[0].keys())
self.assertIsInstance(prediction[0], dict, " should be an instance of the dictionary class")
self.assertIsInstance(prediction[0]['labels'], dict, " should be an instance of the dictionary class")
self.assertIn('mode_confirm',prediction[0]['labels'].keys())
self.assertIn('replaced_mode',prediction[0]['labels'].keys())
self.assertIn('purpose_confirm',prediction[0]['labels'].keys())

0 comments on commit 104dd9a

Please sign in to comment.