From aa8cfd6d559b94a53c5dae2c90a8ee2824701441 Mon Sep 17 00:00:00 2001
From: $aTyam <satyam.saini@rutgers.edu>
Date: Sun, 20 Aug 2023 14:03:06 -0400
Subject: [PATCH 01/10] Moving Dependence from tour_model to trip_model

The following changes support e-mission-server-eval-private's  TRB_label_assist, reducing dependence on custom branch.
---
 .../analysis/modelling/similarity/similarity_metric.py | 10 ++++++++--
 .../modelling/trip_model/greedy_similarity_binning.py  |  9 ++++++++-
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/emission/analysis/modelling/similarity/similarity_metric.py b/emission/analysis/modelling/similarity/similarity_metric.py
index 6be00216f..7a88f9da7 100644
--- a/emission/analysis/modelling/similarity/similarity_metric.py
+++ b/emission/analysis/modelling/similarity/similarity_metric.py
@@ -27,15 +27,21 @@ def similarity(self, a: List[float], b: List[float]) -> List[float]:
         """
         pass
 
-    def similar(self, a: List[float], b: List[float], thresh: float) -> bool:
+    def similar(self, a: List[float], b: List[float], thresh: float, clusteringWay :str = 'origin-destination') -> bool:
         """compares the features, returning true if they are similar
         within some threshold
 
         :param a: features for a trip
         :param b: features for another trip
         :param thresh: threshold for similarity
+        :clusterinWay: clustering based on origin/destination/origin-destination-pair
         :return: true if the feature similarity is within some threshold
         """
         similarity_values = self.similarity(a, b)
-        is_similar = all(map(lambda sim: sim <= thresh, similarity_values))
+        if clusteringWay == 'origin':
+            is_similar = similarity_values[0] <= thresh
+        elif clusteringWay == 'destination':
+            is_similar = similarity_values[1] <= thresh
+        else:
+            is_similar = all(map(lambda sim: sim <= thresh, similarity_values))
         return is_similar
diff --git a/emission/analysis/modelling/trip_model/greedy_similarity_binning.py b/emission/analysis/modelling/trip_model/greedy_similarity_binning.py
index d750a451e..34157dee3 100644
--- a/emission/analysis/modelling/trip_model/greedy_similarity_binning.py
+++ b/emission/analysis/modelling/trip_model/greedy_similarity_binning.py
@@ -119,6 +119,11 @@ class label to apply:
         self.sim_thresh = config['similarity_threshold_meters']
         self.apply_cutoff = config['apply_cutoff']
         self.is_incremental = config['incremental_evaluation']
+        if config.get('clustering_way') is None:
+            self.clusteringWay='origin-destination'   # previous default
+        else:
+            self.clusteringWay= config['clustering_way'] 
+        self.tripLabels=[]
 
         self.bins: Dict[str, Dict] = {}
         
@@ -184,9 +189,11 @@ def _assign_bins(self, trips: List[ecwc.Confirmedtrip]):
                 logging.debug(f"adding trip to bin {bin_id} with features {trip_features}")
                 self.bins[bin_id]['feature_rows'].append(trip_features)
                 self.bins[bin_id]['labels'].append(trip_labels)
+                self.tripLabels.append(bin_id)
             else:
                 # create new bin
                 new_bin_id = str(len(self.bins))
+                self.tripLabels.append(new_bin_id)
                 new_bin_record = {
                     'feature_rows': [trip_features],
                     'labels': [trip_labels],
@@ -204,7 +211,7 @@ def _find_matching_bin_id(self, trip_features: List[float]) -> Optional[str]:
         :return: the id of a bin if a match was found, otherwise None
         """
         for bin_id, bin_record in self.bins.items():
-                matches_bin = all([self.metric.similar(trip_features, bin_sample, self.sim_thresh)
+                matches_bin = all([self.metric.similar(trip_features, bin_sample, self.sim_thresh,self.clusteringWay)
                     for bin_sample in bin_record['feature_rows']])
                 if matches_bin:
                     return bin_id

From d9b4f7073c03838e01e5ee5d416aa6d462f0441e Mon Sep 17 00:00:00 2001
From: $aTyam <satyam.saini@rutgers.edu>
Date: Thu, 24 Aug 2023 02:07:25 -0400
Subject: [PATCH 02/10] Generalising similarity calculations

Moved the `clusteringWay` based decision making while binning further upstream, thus generalising `similar` (in `similarity_metrics.py`) and `similarity` ( in `od_similarity.py`) functions. Can now be used across modules without the need for `clusteringWay` parameter.
---
 .../modelling/similarity/od_similarity.py      | 16 +++++++++++++---
 .../modelling/similarity/similarity_metric.py  | 14 +++++---------
 .../trip_model/greedy_similarity_binning.py    | 18 +++++++++++++-----
 3 files changed, 31 insertions(+), 17 deletions(-)

diff --git a/emission/analysis/modelling/similarity/od_similarity.py b/emission/analysis/modelling/similarity/od_similarity.py
index 3b84bd764..162b6f123 100644
--- a/emission/analysis/modelling/similarity/od_similarity.py
+++ b/emission/analysis/modelling/similarity/od_similarity.py
@@ -16,6 +16,16 @@ def extract_features(self, trip: ecwc.Confirmedtrip) -> List[float]:
         return ctfe.od_features(trip)
 
     def similarity(self, a: List[float], b: List[float]) -> List[float]:
-        o_dist = ecc.calDistance([a[0], a[1]], [b[0], b[1]])
-        d_dist = ecc.calDistance([a[2], a[3]], [b[2], b[3]])
-        return [o_dist, d_dist]
\ No newline at end of file
+        """
+        a : a list of point features that can take either of two forms
+                    1. [point1_latitude,point1_longitude]  
+                    2. [point1_latitude,point1_longitude,point2_latitude,point2_longitude] 
+                    
+        b : a list of point features that can take either of two forms
+                    1. [point1_latitude,point1_longitude]  
+                    2. [point1_latitude,point1_longitude,point2_latitude,point2_longitude] 
+        """
+        point_dist = [ecc.calDistance(a[i:i+2], b[i:i+2]) 
+                      for i in range (0,len(a),2)] 
+        
+        return point_dist
\ No newline at end of file
diff --git a/emission/analysis/modelling/similarity/similarity_metric.py b/emission/analysis/modelling/similarity/similarity_metric.py
index 7a88f9da7..e9c645e59 100644
--- a/emission/analysis/modelling/similarity/similarity_metric.py
+++ b/emission/analysis/modelling/similarity/similarity_metric.py
@@ -27,21 +27,17 @@ def similarity(self, a: List[float], b: List[float]) -> List[float]:
         """
         pass
 
-    def similar(self, a: List[float], b: List[float], thresh: float, clusteringWay :str = 'origin-destination') -> bool:
+    def similar(self, a: List[float], b: List[float], thresh: float) -> bool:
         """compares the features, returning true if they are similar
         within some threshold
 
-        :param a: features for a trip
+        :param a: features for a trip , 
         :param b: features for another trip
         :param thresh: threshold for similarity
-        :clusterinWay: clustering based on origin/destination/origin-destination-pair
+        :clusteringWay: clustering based on origin/destination/origin-destination-pair
         :return: true if the feature similarity is within some threshold
         """
         similarity_values = self.similarity(a, b)
-        if clusteringWay == 'origin':
-            is_similar = similarity_values[0] <= thresh
-        elif clusteringWay == 'destination':
-            is_similar = similarity_values[1] <= thresh
-        else:
-            is_similar = all(map(lambda sim: sim <= thresh, similarity_values))
+        is_similar = all(sim <= thresh for sim in similarity_values)
+
         return is_similar
diff --git a/emission/analysis/modelling/trip_model/greedy_similarity_binning.py b/emission/analysis/modelling/trip_model/greedy_similarity_binning.py
index 34157dee3..efcce4f02 100644
--- a/emission/analysis/modelling/trip_model/greedy_similarity_binning.py
+++ b/emission/analysis/modelling/trip_model/greedy_similarity_binning.py
@@ -207,14 +207,22 @@ def _find_matching_bin_id(self, trip_features: List[float]) -> Optional[str]:
         finds an existing bin where all bin features are "similar" to the incoming
         trip features.
 
-        :param trip_features: feature row for the incoming trip
+        :param trip_features: feature row for the incoming trip. 
+                            takes the form [orig_lat, orig_lon, dest_lat, dest_lon]
         :return: the id of a bin if a match was found, otherwise None
         """
         for bin_id, bin_record in self.bins.items():
-                matches_bin = all([self.metric.similar(trip_features, bin_sample, self.sim_thresh,self.clusteringWay)
-                    for bin_sample in bin_record['feature_rows']])
-                if matches_bin:
-                    return bin_id
+            if self.clusteringWay == 'origin':
+                start,end=0,2  #since first two features in trip_features are for origin
+            elif self.clusteringWay == 'destination':
+                start,end=2,4  #third and fourth values intrip_features are for destination
+            elif self.clusteringWay == 'origin-destination':
+                start,end=0,4  #when clusteromgWay is 'origin-destination',we pass all four features
+
+            matches_bin = all([self.metric.similar(trip_features[start:end], bin_sample[start:end], self.sim_thresh)
+                for bin_sample in bin_record['feature_rows']])
+            if matches_bin:
+                return bin_id
         return None
 
     def _nearest_bin(self, trip: ecwc.Confirmedtrip) -> Tuple[Optional[int], Optional[Dict]]:

From b5595c9b409e23a35e7c00792adebe806ad928c4 Mon Sep 17 00:00:00 2001
From: $aTyam <satyam.saini@rutgers.edu>
Date: Thu, 24 Aug 2023 02:31:41 -0400
Subject: [PATCH 03/10] Minor Comment fixes

Comment fixes for better readability.
---
 .../analysis/modelling/similarity/od_similarity.py    | 11 +++++++++--
 .../modelling/similarity/similarity_metric.py         |  3 +--
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/emission/analysis/modelling/similarity/od_similarity.py b/emission/analysis/modelling/similarity/od_similarity.py
index 162b6f123..9a6a49d0d 100644
--- a/emission/analysis/modelling/similarity/od_similarity.py
+++ b/emission/analysis/modelling/similarity/od_similarity.py
@@ -22,9 +22,16 @@ def similarity(self, a: List[float], b: List[float]) -> List[float]:
                     2. [point1_latitude,point1_longitude,point2_latitude,point2_longitude] 
                     
         b : a list of point features that can take either of two forms
-                    1. [point1_latitude,point1_longitude]  
-                    2. [point1_latitude,point1_longitude,point2_latitude,point2_longitude] 
+                    1. [point3_latitude,point3_longitude]  
+                    2. [point3_latitude,point3_longitude,point4_latitude,point4_longitude] 
+            
+            It'll always take the same form as parameter a.
+
+        return: a list of size 1 ([distance between point1-point3]) if a and b take form 1
+                or of size 2 ([distance between point1-point3, distance between point2-point4])
+                if a and b take form 2.
         """
+        
         point_dist = [ecc.calDistance(a[i:i+2], b[i:i+2]) 
                       for i in range (0,len(a),2)] 
         
diff --git a/emission/analysis/modelling/similarity/similarity_metric.py b/emission/analysis/modelling/similarity/similarity_metric.py
index e9c645e59..1b520318f 100644
--- a/emission/analysis/modelling/similarity/similarity_metric.py
+++ b/emission/analysis/modelling/similarity/similarity_metric.py
@@ -31,10 +31,9 @@ def similar(self, a: List[float], b: List[float], thresh: float) -> bool:
         """compares the features, returning true if they are similar
         within some threshold
 
-        :param a: features for a trip , 
+        :param a: features for a trip 
         :param b: features for another trip
         :param thresh: threshold for similarity
-        :clusteringWay: clustering based on origin/destination/origin-destination-pair
         :return: true if the feature similarity is within some threshold
         """
         similarity_values = self.similarity(a, b)

From 710d1a5791212b540b883dedd5613a562071edc9 Mon Sep 17 00:00:00 2001
From: $aTyam <satyam.saini@rutgers.edu>
Date: Thu, 31 Aug 2023 02:10:39 -0400
Subject: [PATCH 04/10] Upgrading tests  in `TestGreedySimilarityBinning.py`

Tests created to confirm  configuration for trip clustering (origin, destination and origin-destination) work as expected inside the GreedySimilarityBinning class  in `greedy_similarity_binning.py` file.

In order to upgrade old tests, `generate_mock_trips` in `modellingTestAssets.py` was also changed. Previously, out of the n trips generated, m had both origin and destination either inside or outside threshold,thus allowing only 2 configs. Now, 4 configurations are possible, one among origin OR destination OR origin-and-destination or Neither-origin-nor-destination. Default is set to 'origin-and-destination' since this was the old default.
---
 .../TestGreedySimilarityBinning.py            | 416 +++++++++++++++++-
 .../modellingTests/modellingTestAssets.py     |  16 +-
 2 files changed, 419 insertions(+), 13 deletions(-)

diff --git a/emission/tests/modellingTests/TestGreedySimilarityBinning.py b/emission/tests/modellingTests/TestGreedySimilarityBinning.py
index 32bed47aa..620f2cf99 100644
--- a/emission/tests/modellingTests/TestGreedySimilarityBinning.py
+++ b/emission/tests/modellingTests/TestGreedySimilarityBinning.py
@@ -10,10 +10,16 @@ def setUp(self) -> None:
         logging.basicConfig(format='%(asctime)s:%(levelname)s:%(message)s',
         level=logging.DEBUG)
 
-    def testBinning(self):
+    def testNoBinning(self):
         """
-        when $should_be_grouped trips are the same, they should appear in a bin
+        Tests the three (origin, destination and origin-destination based) 
+        binning configuration for trips.
+
+        When both the origin and destination points of trips are outside a threshold
+        limit, none of the trips should be binned with the other in any of the three 
+        configs (origin, destination or origin-and-destination based).       
         """
+
         label_data = {
             "mode_confirm": ['walk', 'bike', 'transit'],
             "purpose_confirm": ['work', 'home', 'school'],
@@ -24,31 +30,414 @@ def testBinning(self):
         # within a radius that should have them binned.
         n = 20
         m = 5
+        
+        # trip_part: when mock trips are generated, coordinates of this part of 
+        #            m trips will be within the threshold. trip_part can take one
+        #            among the four values:
+        #
+        #            1. '__' ->(None, meaning NEITHER origin nor destination of any trip will lie 
+        #             within the mentioned threshold when trips are generated),
+        #
+        #            2. 'o_' ->(origin, meaning ONLY origin of m trips will lie within the mentioned 
+        #             threshold when trips are generated),
+        #
+        #            3. '_d' ->(destination),meaning ONLY destination of m trips will lie within the 
+        #             mentioned threshold when trips are generated)
+        #
+        #            4. 'od' ->(origin and destination,meaning BOTH origin and destination of m trips
+        #             will lie within the mentioned threshold when trips are generated)
+
         trips = etmm.generate_mock_trips(
             user_id="joe", 
             trips=n, 
             origin=(0, 0), 
-            destination=(1, 1), 
+            destination=(1, 1),
+            trip_part='__',
             label_data=label_data, 
             within_threshold=m, 
             threshold=0.001,  # ~ 111 meters in degrees WGS84
         )
+    
 
-        # pass in a test configuration to the binning algorithm
-        model_config = {
+        # pass in a test configuration to the binning algorithm.
+        #
+        # clustering_way : Part of the trip used for checking pairwise proximity.
+        #                  Can take one of the three values:
+        #                  
+        #                   1. 'origin' -> using origin of the trip to check if 2 points
+        #                                   lie within the mentioned similarity_threshold_meters
+        #                   2. 'destination' -> using destination of the trip to check if 2 points
+        #                                       lie within the mentioned similarity_threshold_meters
+        #                   3. 'origin-destination' -> both origin and destination of the trip to check 
+        #                                             if 2 points lie within the mentioned 
+        #                                              similarity_threshold_meters
+        
+        model1_config = {
             "metric": "od_similarity",
-            "similarity_threshold_meters": 500,  # meters,
+            "similarity_threshold_meters": 111,  # meters,
             "apply_cutoff": False,
+            "clustering_way": "origin",  
             "incremental_evaluation": False
         }
-        model = eamtg.GreedySimilarityBinning(model_config)
+        model1 = eamtg.GreedySimilarityBinning(model1_config)
+        model1.fit(trips)
+
+
+        model2_config = {
+            "metric": "od_similarity",
+            "similarity_threshold_meters":111,  # meters,
+            "apply_cutoff": False,
+            "clustering_way": "destination",
+            "incremental_evaluation": False
+        }
+        model2 = eamtg.GreedySimilarityBinning(model2_config)
+        model2.fit(trips)
+
+
+        model3_config = {
+            "metric": "od_similarity",
+            "similarity_threshold_meters": 111,  # meters,
+            "apply_cutoff": False,
+            "clustering_way": "origin-destination",
+            "incremental_evaluation": False
+        }
+        model3 = eamtg.GreedySimilarityBinning(model3_config)
+        model3.fit(trips)
+
+        # Since neither the origin nor the destination of the points generated lie
+        # within the threshold, there should be no binning at all. All the bins should
+        # have size 1.
+
+        at_least_one_large_bin = all(map(lambda b: len(b['feature_rows']) == 1, model1.bins.values()))
+        self.assertTrue(at_least_one_large_bin, "no bin should have more than 1 features in it")
+
+        at_least_one_large_bin = all(map(lambda b: len(b['feature_rows']) == 1, model2.bins.values()))
+        self.assertTrue(at_least_one_large_bin, "no bin should have more than 1 features in it")
+
+        at_least_one_large_bin = all(map(lambda b: len(b['feature_rows']) == 1, model3.bins.values()))
+        self.assertTrue(at_least_one_large_bin, "no bin should have more than 1 features in it")
+
+
+
+    def testBinningByOrigin(self):
+        """
+        Tests the 'origin' based binning method for trips.
+
+        When only the origin points of trips are within a threshold
+        limit, trips must be binned together that too if binned based on 
+        'origins', otherwise no binning.       
+        """
+
+        label_data = {
+            "mode_confirm": ['walk', 'bike', 'transit'],
+            "purpose_confirm": ['work', 'home', 'school'],
+            "replaced_mode": ['drive']
+        }
+
+        # generate $n trips. $m of them should have origin and destinations sampled
+        # within a radius that should have them binned.
+        n = 20
+        m = 5
+
+        # trip_part: when mock trips are generated, coordinates of this part of 
+        #            m trips will be within the threshold. trip_part can take one
+        #            among the four values:
+        #
+        #            1. '__' ->(None, meaning NEITHER origin nor destination of any trip will lie 
+        #             within the mentioned threshold when trips are generated),
+        #
+        #            2. 'o_' ->(origin, meaning ONLY origin of m trips will lie within the mentioned 
+        #             threshold when trips are generated),
+        #
+        #            3. '_d' ->(destination),meaning ONLY destination of m trips will lie within the 
+        #             mentioned threshold when trips are generated)
+        #
+        #            4. 'od' ->(origin and destination,meaning BOTH origin and destination of m trips
+        #             will lie within the mentioned threshold when trips are generated)
+
+        trips = etmm.generate_mock_trips(
+            user_id="joe", 
+            trips=n, 
+            origin=(0, 0), 
+            destination=(1, 1),
+            trip_part='o_',
+            label_data=label_data, 
+            within_threshold=m, 
+            threshold=0.001,  # ~ 111 meters in degrees WGS84
+        )
+
+        # pass in a test configuration to the binning algorithm.
+        #
+        # clustering_way : Part of the trip used for checking pairwise proximity.
+        #                  Can take one of the three values:
+        #                  
+        #                   1. 'origin' -> using origin of the trip to check if 2 points
+        #                                   lie within the mentioned similarity_threshold_meters
+        #                   2. 'destination' -> using destination of the trip to check if 2 points
+        #                                       lie within the mentioned similarity_threshold_meters
+        #                   3. 'origin-destination' -> both origin and destination of the trip to check 
+        #                                             if 2 points lie within the mentioned 
+        #                                              similarity_threshold_meters
+        
+        model1_config = {
+            "metric": "od_similarity",
+            "similarity_threshold_meters": 111,  # meters,
+            "apply_cutoff": False,
+            "clustering_way": "origin",
+            "incremental_evaluation": False
+        }
+        model1 = eamtg.GreedySimilarityBinning(model1_config)
+        model1.fit(trips)
+
+
+        model2_config = {
+            "metric": "od_similarity",
+            "similarity_threshold_meters":111,  # meters,
+            "apply_cutoff": False,
+            "clustering_way": "destination",
+            "incremental_evaluation": False
+        }
+        model2 = eamtg.GreedySimilarityBinning(model2_config)
+        model2.fit(trips)
+
+
+        model3_config = {
+            "metric": "od_similarity",
+            "similarity_threshold_meters": 111,  # meters,
+            "apply_cutoff": False,
+            "clustering_way": "origin-destination",
+            "incremental_evaluation": False
+        }
+        model3 = eamtg.GreedySimilarityBinning(model3_config)
+        model3.fit(trips)
+        
+
+        # Since only the origin of the points generated lies within the threshold,
+        # there should be binning only when 'origin' config is used. Otherwise all 
+        # the bins should have size 1.
+
+        at_least_one_large_bin = any(map(lambda b: len(b['feature_rows']) == m, model1.bins.values()))
+        self.assertTrue(at_least_one_large_bin, "at least one bin should have at least 5 features in it")
+
+        at_least_one_large_bin = all(map(lambda b: len(b['feature_rows']) ==1, model2.bins.values()))
+        self.assertTrue(at_least_one_large_bin, "no bin should have more than 1 features in it")
+
+        at_least_one_large_bin = all(map(lambda b: len(b['feature_rows']) == 1, model3.bins.values()))
+        self.assertTrue(at_least_one_large_bin, "no bin should have more than 1 features in it")
+
+
+
+    def testBinningByDestination(self):
+        """
+        Tests the 'destination' based binning method for trips.
+
+        When only the destination points of trips are within a threshold
+        limit, trips must be binned together that too if binned based on 
+        'destination', otherwise no binning.       
+        """
+
+        label_data = {
+            "mode_confirm": ['walk', 'bike', 'transit'],
+            "purpose_confirm": ['work', 'home', 'school'],
+            "replaced_mode": ['drive']
+        }
+
+        # generate $n trips. $m of them should have origin and destinations sampled
+        # within a radius that should have them binned.
+        n = 20
+        m = 5
+
+        # trip_part: when mock trips are generated, coordinates of this part of 
+        #            m trips will be within the threshold. trip_part can take one
+        #            among the four values:
+        #
+        #            1. '__' ->(None, meaning NEITHER origin nor destination of any trip will lie 
+        #             within the mentioned threshold when trips are generated),
+        #
+        #            2. 'o_' ->(origin, meaning ONLY origin of m trips will lie within the mentioned 
+        #             threshold when trips are generated),
+        #
+        #            3. '_d' ->(destination),meaning ONLY destination of m trips will lie within the 
+        #             mentioned threshold when trips are generated)
+        #
+        #            4. 'od' ->(origin and destination,meaning BOTH origin and destination of m trips
+        #             will lie within the mentioned threshold when trips are generated)
+
+        trips = etmm.generate_mock_trips(
+            user_id="joe", 
+            trips=n, 
+            origin=(0, 0), 
+            destination=(1, 1),
+            trip_part='_d',
+            label_data=label_data, 
+            within_threshold=m, 
+            threshold=0.001,  # ~ 111 meters in degrees WGS84
+        )
+
+        # pass in a test configuration to the binning algorithm.
+        #
+        # clustering_way : Part of the trip used for checking pairwise proximity.
+        #                  Can take one of the three values:
+        #                  
+        #                   1. 'origin' -> using origin of the trip to check if 2 points
+        #                                   lie within the mentioned similarity_threshold_meters
+        #                   2. 'destination' -> using destination of the trip to check if 2 points
+        #                                       lie within the mentioned similarity_threshold_meters
+        #                   3. 'origin-destination' -> both origin and destination of the trip to check 
+        #                                             if 2 points lie within the mentioned 
+        #                                              similarity_threshold_meters
         
-        model.fit(trips)
+        model1_config = {
+            "metric": "od_similarity",
+            "similarity_threshold_meters": 111,  # meters,
+            "apply_cutoff": False,
+            "clustering_way": "origin",
+            "incremental_evaluation": False
+        }
+        model1 = eamtg.GreedySimilarityBinning(model1_config)
+        model1.fit(trips)
+
 
-        # $m trip features should appear together in one bin
-        at_least_one_large_bin = any(map(lambda b: len(b['feature_rows']) == m, model.bins.values()))
+        model2_config = {
+            "metric": "od_similarity",
+            "similarity_threshold_meters":111,  # meters,
+            "apply_cutoff": False,
+            "clustering_way": "destination",
+            "incremental_evaluation": False
+        }
+        model2 = eamtg.GreedySimilarityBinning(model2_config)
+        model2.fit(trips)
+
+
+        model3_config = {
+            "metric": "od_similarity",
+            "similarity_threshold_meters": 111,  # meters,
+            "apply_cutoff": False,
+            "clustering_way": "origin-destination",
+            "incremental_evaluation": False
+        }
+        model3 = eamtg.GreedySimilarityBinning(model3_config)
+        model3.fit(trips)
+
+        # Since only the destination of the points generated lies within the threshold,
+        # there should be binning only when 'destination' config is used. Otherwise all 
+        # the bins should have size 1.
+
+        at_least_one_large_bin = all(map(lambda b: len(b['feature_rows']) == 1, model1.bins.values()))
         self.assertTrue(at_least_one_large_bin, "at least one bin should have at least 5 features in it")
 
+        at_least_one_large_bin = any(map(lambda b: len(b['feature_rows']) ==m, model2.bins.values()))
+        self.assertTrue(at_least_one_large_bin, "no bin should have more than 1 features in it")
+
+        at_least_one_large_bin = all(map(lambda b: len(b['feature_rows']) == 1, model3.bins.values()))
+        self.assertTrue(at_least_one_large_bin, "no bin should have more than 1 features in it")
+
+
+    def testBinningByOriginAndDestination(self):
+        """
+        Tests the 'origin-destination' based binning method for trips.
+
+        When both the origin and destination points of trips are within
+        a threshold limit, trips will be binned together in all three (origin , 
+        destination, origin-and-destinaiton) configurations. 
+        """        
+
+        label_data = {
+            "mode_confirm": ['walk', 'bike', 'transit'],
+            "purpose_confirm": ['work', 'home', 'school'],
+            "replaced_mode": ['drive']
+        }
+
+        # generate $n trips. $m of them should have origin and destinations sampled
+        # within a radius that should have them binned.
+        n = 20
+        m = 5
+
+        # trip_part: when mock trips are generated, coordinates of this part of 
+        #            m trips will be within the threshold. trip_part can take one
+        #            among the four values:
+        #
+        #            1. '__' ->(None, meaning NEITHER origin nor destination of any trip will lie 
+        #             within the mentioned threshold when trips are generated),
+        #
+        #            2. 'o_' ->(origin, meaning ONLY origin of m trips will lie within the mentioned 
+        #             threshold when trips are generated),
+        #
+        #            3. '_d' ->(destination),meaning ONLY destination of m trips will lie within the 
+        #             mentioned threshold when trips are generated)
+        #
+        #            4. 'od' ->(origin and destination,meaning BOTH origin and destination of m trips
+        #             will lie within the mentioned threshold when trips are generated)
+
+        trips = etmm.generate_mock_trips(
+            user_id="joe", 
+            trips=n, 
+            origin=(0, 0), 
+            destination=(1, 1),
+            trip_part='od',
+            label_data=label_data, 
+            within_threshold=m, 
+            threshold=0.001,  # ~ 111 meters in degrees WGS84
+        )
+
+        # pass in a test configuration to the binning algorithm.
+        #
+        # clustering_way : Part of the trip used for checking pairwise proximity.
+        #                  Can take one of the three values:
+        #                  
+        #                   1. 'origin' -> using origin of the trip to check if 2 points
+        #                                   lie within the mentioned similarity_threshold_meters
+        #                   2. 'destination' -> using destination of the trip to check if 2 points
+        #                                       lie within the mentioned similarity_threshold_meters
+        #                   3. 'origin-destination' -> both origin and destination of the trip to check 
+        #                                             if 2 points lie within the mentioned 
+        #                                              similarity_threshold_meters
+        
+        model1_config = {
+            "metric": "od_similarity",
+            "similarity_threshold_meters": 111,  # meters,
+            "apply_cutoff": False,
+            "clustering_way": "origin",
+            "incremental_evaluation": False
+        }
+        model1 = eamtg.GreedySimilarityBinning(model1_config)
+        model1.fit(trips)
+
+
+        model2_config = {
+            "metric": "od_similarity",
+            "similarity_threshold_meters":111,  # meters,
+            "apply_cutoff": False,
+            "clustering_way": "destination",
+            "incremental_evaluation": False
+        }
+        model2 = eamtg.GreedySimilarityBinning(model2_config)
+        model2.fit(trips)
+
+
+        model3_config = {
+            "metric": "od_similarity",
+            "similarity_threshold_meters": 111,  # meters,
+            "apply_cutoff": False,
+            "clustering_way": "origin-destination",
+            "incremental_evaluation": False
+        }
+        model3 = eamtg.GreedySimilarityBinning(model3_config)
+        model3.fit(trips)
+
+        # Since both the origin and the destination points of the generated trips lie 
+        # within the threshold, there should be binning in all three configs.
+
+        at_least_one_large_bin = any(map(lambda b: len(b['feature_rows']) == m, model1.bins.values()))
+        self.assertTrue(at_least_one_large_bin, "at least one bin should have at least 5 features in it")
+
+        at_least_one_large_bin = any(map(lambda b: len(b['feature_rows']) ==m, model2.bins.values()))
+        self.assertTrue(at_least_one_large_bin, "no bin should have more than 1 features in it")
+
+        at_least_one_large_bin = any(map(lambda b: len(b['feature_rows']) == m, model3.bins.values()))
+        self.assertTrue(at_least_one_large_bin, "no bin should have more than 1 features in it")
+
+
     def testPrediction(self):
         """
         training and testing with similar trips should lead to a positive bin match
@@ -64,7 +453,8 @@ def testPrediction(self):
             user_id="joe", 
             trips=n, 
             origin=(0, 0), 
-            destination=(1, 1), 
+            destination=(1, 1),
+            trip_part='od', 
             label_data=label_data, 
             threshold=0.001,  # ~ 111 meters in degrees WGS84
         )
@@ -73,6 +463,7 @@ def testPrediction(self):
             "metric": "od_similarity",
             "similarity_threshold_meters": 500,      # meters,
             "apply_cutoff": False,
+            "clustering_way": "origin-destination",
             "incremental_evaluation": False
         }
         model = eamtg.GreedySimilarityBinning(model_config)
@@ -102,6 +493,7 @@ def testNoPrediction(self):
             trips=n, 
             origin=(39.7645187, -104.9951944),       # Denver, CO
             destination=(39.7435206, -105.2369292),  # Golden, CO
+            trip_part='od',
             label_data=label_data, 
             threshold=0.001,  # ~ 111 meters in degrees WGS84
         )
@@ -110,6 +502,7 @@ def testNoPrediction(self):
             trips=1, 
             origin=(61.1042262, -150.5611644),       # Anchorage, AK
             destination=(62.2721466, -150.3233046),  # Talkeetna, AK
+            trip_part='od',
             label_data=label_data, 
             threshold=0.001,  # ~ 111 meters in degrees WGS84
         )
@@ -118,6 +511,7 @@ def testNoPrediction(self):
             "metric": "od_similarity",
             "similarity_threshold_meters": 500,      # meters,
             "apply_cutoff": False,
+            "clustering_way": "origin-destination",
             "incremental_evaluation": False
         }
         model = eamtg.GreedySimilarityBinning(model_config)
diff --git a/emission/tests/modellingTests/modellingTestAssets.py b/emission/tests/modellingTests/modellingTestAssets.py
index 879a3a2ca..de9b26cf4 100644
--- a/emission/tests/modellingTests/modellingTestAssets.py
+++ b/emission/tests/modellingTests/modellingTestAssets.py
@@ -131,6 +131,7 @@ def generate_mock_trips(
     trips,
     origin, 
     destination, 
+    trip_part='od',
     label_data = None, 
     within_threshold = None,
     start_ts: None = None,
@@ -159,6 +160,17 @@ def generate_mock_trips(
     :param trips: number of trips
     :param origin: origin coordinates
     :param destination: destination coordinates
+    :param trip_part: when mock trips are generated, coordinates of this part of 
+                      the trips will be within the threshold. trip_part can take one
+                      among the four values:
+                    1. '__' ->(None, meaning NEITHER origin nor destination of any trip will lie 
+                     within the mentioned threshold when trips are generated),        
+                    2. 'o_' ->(origin, meaning ONLY origin of m trips will lie within the mentioned 
+                     threshold when trips are generated),        
+                    3. '_d' ->(destination),meaning ONLY destination of m trips will lie within the 
+                     mentioned threshold when trips are generated)        
+                    4. 'od' ->(origin and destination,meaning BOTH origin and destination of m trips
+                     will lie within the mentioned threshold when trips are generated)
     :param label_data: dictionary of label data, see above, defaults to None
     :param within_threshold: number of trips that should fall within the provided
            distance threshold in degrees WGS84, defaults to None
@@ -175,8 +187,8 @@ def generate_mock_trips(
     trips_within_threshold = [i < within for i in range(trips)]
     result = []
     for within in trips_within_threshold:
-        o = generate_trip_coordinates(origin, within, threshold, max)
-        d = generate_trip_coordinates(destination, within, threshold, max)
+        o = generate_trip_coordinates(origin, (trip_part[0] == 'o' and within), threshold, max)
+        d = generate_trip_coordinates(destination, (trip_part[1] == 'd' and within), threshold, max)
         labels = {} if label_data is None or random.random() > has_label_p \
             else sample_trip_labels(
             mode_labels=label_data.get('mode_confirm'),

From 6d9ea7786142f7e10a30ab4be5b326b6634248bd Mon Sep 17 00:00:00 2001
From: $aTyam <satyam.saini@rutgers.edu>
Date: Thu, 31 Aug 2023 15:28:14 -0400
Subject: [PATCH 05/10] Testing upgraded `Similarity` functionality

Checking `Similarity` behaves as expected when list of size 2 ( for only origin OR only destination ) or size 4 (for origin AND destination) are  passed.
---
 .../modellingTests/TestBackwardsCompat.py     |  5 +
 .../TestRunGreedyIncrementalModel.py          |  2 +
 .../modellingTests/TestRunGreedyModel.py      |  3 +
 .../modellingTests/TestSimilarityMetric.py    | 95 ++++++++++++++++---
 .../modellingTests/modellingTestAssets.py     |  2 +-
 5 files changed, 92 insertions(+), 15 deletions(-)

diff --git a/emission/tests/modellingTests/TestBackwardsCompat.py b/emission/tests/modellingTests/TestBackwardsCompat.py
index b81b5f529..c3cba4fae 100644
--- a/emission/tests/modellingTests/TestBackwardsCompat.py
+++ b/emission/tests/modellingTests/TestBackwardsCompat.py
@@ -59,6 +59,7 @@ def testAnyVsAllWhilePredicting(self):
             "metric": "od_similarity",
             "similarity_threshold_meters": 16000,      # meters,
             "apply_cutoff": False,
+            "clustering_way": 'origin-destination',
             "incremental_evaluation": False
         }
         new_builder = eamtg.GreedySimilarityBinning(model_config)
@@ -96,6 +97,7 @@ def testRandomTripsWithinTheSameThreshold(self):
             trips=n, 
             origin=(0, 0), 
             destination=(1, 1), 
+            trip_part='od',
             label_data=label_data, 
             threshold=0.001,  # ~ 111 meters in degrees WGS84
         )
@@ -113,6 +115,7 @@ def testRandomTripsWithinTheSameThreshold(self):
             "metric": "od_similarity",
             "similarity_threshold_meters": 500,      # meters,
             "apply_cutoff": False,
+            "clustering_way": 'origin-destination',
             "incremental_evaluation": False
         }
         new_model = eamtg.GreedySimilarityBinning(model_config)
@@ -156,6 +159,7 @@ def testRandomTripsOutsideTheSameThreshold(self):
             trips=n, 
             origin=(0, 0), 
             destination=(1, 1), 
+            trip_part='od', 
             label_data=label_data, 
             threshold=0.1,  # Much bigger than the 500m threshold, so we will get multiple bins
         )
@@ -173,6 +177,7 @@ def testRandomTripsOutsideTheSameThreshold(self):
             "metric": "od_similarity",
             "similarity_threshold_meters": 500,      # meters,
             "apply_cutoff": False,
+            "clustering_way": 'origin-destination',
             "incremental_evaluation": False
         }
         new_model = eamtg.GreedySimilarityBinning(model_config)
diff --git a/emission/tests/modellingTests/TestRunGreedyIncrementalModel.py b/emission/tests/modellingTests/TestRunGreedyIncrementalModel.py
index aee6a6f09..1529f8df5 100644
--- a/emission/tests/modellingTests/TestRunGreedyIncrementalModel.py
+++ b/emission/tests/modellingTests/TestRunGreedyIncrementalModel.py
@@ -44,6 +44,7 @@ def setUp(self):
             "metric": "od_similarity",
             "similarity_threshold_meters": sim_threshold,
             "apply_cutoff": False,
+            "clustering_way": 'origin-destination',
             "incremental_evaluation": True
         }
 
@@ -162,6 +163,7 @@ def testIncrementalRun(self):
             trips=self.new_trips_per_invocation,
             origin=self.origin,
             destination=self.destination,
+            trip_part='od',
             label_data=label_data,
             threshold=0.0001, # ~10m,
             start_ts=time.time() - 20,
diff --git a/emission/tests/modellingTests/TestRunGreedyModel.py b/emission/tests/modellingTests/TestRunGreedyModel.py
index 10f221909..9e4431fa3 100644
--- a/emission/tests/modellingTests/TestRunGreedyModel.py
+++ b/emission/tests/modellingTests/TestRunGreedyModel.py
@@ -62,6 +62,7 @@ def setUp(self):
                 trips=self.total_trips,
                 origin=self.origin,
                 destination=self.destination,
+                trip_part='od',
                 label_data=label_data,
                 within_threshold=self.clustered_trips,  
                 threshold=0.004, # ~400m
@@ -106,6 +107,7 @@ def testTrainGreedyModelWithZeroTrips(self):
             "metric": "od_similarity",
             "similarity_threshold_meters": 500,
             "apply_cutoff": False,
+            "clustering_way": 'origin-destination',
             "incremental_evaluation": False
         }
 
@@ -142,6 +144,7 @@ def test1RoundTripGreedySimilarityBinning(self):
             "metric": "od_similarity",
             "similarity_threshold_meters": 500,
             "apply_cutoff": False,
+            "clustering_way": 'origin-destination',
             "incremental_evaluation": False
         }
 
diff --git a/emission/tests/modellingTests/TestSimilarityMetric.py b/emission/tests/modellingTests/TestSimilarityMetric.py
index ae37fc39a..f7c7b195f 100644
--- a/emission/tests/modellingTests/TestSimilarityMetric.py
+++ b/emission/tests/modellingTests/TestSimilarityMetric.py
@@ -6,26 +6,93 @@ class TestSimilarityMetric(unittest.TestCase):
 
     def testODsAreSimilar(self):
         generate_points_thresh = 0.001  # approx. 111 meters
-        similarity_threshold = 500  # 
-        # random, but, points are sampled within a circle and should always be < sim threshold
-        trips = etmm.generate_mock_trips('bob', 2, [0, 0], [1, 1], threshold=generate_points_thresh)
+        similarity_threshold = 111  # 
+
         metric = eamso.OriginDestinationSimilarity()
+        ## Sub-Test 1 - 3 :
+        # random, but, origin and destination points are sampled within a circle and should always be < sim threshold
+        # Since both origin and destination poitns lie within threshold limits,they should be similar
+        # when we check by just origin or just destination or both origin-and-destination
+
+        trips = etmm.generate_mock_trips('bob', 2, [0, 0], [1, 1], 'od',threshold=generate_points_thresh) 
         coords0 = metric.extract_features(trips[0])
-        coords1 = metric.extract_features(trips[1])
-        similar = metric.similar(coords0, coords1, similarity_threshold)
-        self.assertTrue(similar)
+        coords1 = metric.extract_features(trips[1])        
+        similarOD1 = metric.similar(coords0, coords1, similarity_threshold)
+        similarOD2 = metric.similar(coords0[:2], coords1[:2], similarity_threshold)
+        similarOD3 = metric.similar(coords0[2:], coords1[2:], similarity_threshold)
+
+        ## Sub-Test 4 :
+        # random, but, only origin points are sampled within a circle and should always be < sim threshold
+        # Since origin of two points lies within threshold limits,they should be similar
+        # when we check just origin for similarity.
+
+
+        trips = etmm.generate_mock_trips('alice', 2, [0, 0], [1, 1], 'o_',threshold=generate_points_thresh)        
+        coords0 = metric.extract_features(trips[0])[:2]
+        coords1 = metric.extract_features(trips[1])[:2]        
+        similarO = metric.similar(coords0, coords1, similarity_threshold)
+
+        ##Sub-Test 5 :
+        # random, but, only destination points are sampled within a circle and should always be < sim threshold
+        # Since destination of two points lies within threshold limits,they should be similar
+        # when we check just destination for similarity.
+
+        trips = etmm.generate_mock_trips('Caty', 2, [0, 0], [1, 1], '_d',threshold=generate_points_thresh)        
+        coords0 = metric.extract_features(trips[0])[2:]
+        coords1 = metric.extract_features(trips[1])[2:]        
+        similarD = metric.similar(coords0, coords1, similarity_threshold)
+
+        # All the similars must be true
+        self.assertTrue(similarOD1) # RESULT SUB-TEST 1
+        self.assertTrue(similarOD2) # RESULT SUB-TEST 2
+        self.assertTrue(similarOD3) # RESULT SUB-TEST 3
+        self.assertTrue(similarO)  # RESULT SUB-TEST 4
+        self.assertTrue(similarD) # RESULT SUB-TEST 5
     
     def testODsAreNotSimilar(self):
         generate_points_thresh = 0.001  # approx. 111 meters
-        similarity_threshold = 500  # 
-        
-        trips0 = etmm.generate_mock_trips('bob', 1, [0, 0], [1, 1], threshold=generate_points_thresh)
-        trips1 = etmm.generate_mock_trips('alice', 1, [2, 2], [3, 3], threshold=generate_points_thresh)
+        similarity_threshold = 111  # 
+        metric = eamso.OriginDestinationSimilarity()
+
+        ## Sub-Test 1-2: 
+        # Two trips with neither origin nor destination coordinates within threshold
+        # must not be similar in any configuration of similarity testing.
+        trips = etmm.generate_mock_trips('bob', 2, [0, 0], [1, 1], '__', threshold=generate_points_thresh)  
+        coords0 = metric.extract_features(trips[0])
+        coords1 = metric.extract_features(trips[1])
+        similar11 = metric.similar(coords0[:2], coords1[:2], similarity_threshold)
+        similar12 = metric.similar(coords0[2:], coords1[:], similarity_threshold)
+
+        ## Sub-Test 3-4: 
+        # Two trips with  origin coordinates within threshold but we check  
+        # similarity using destination coordinates or origin-and-destination
+        # should not be similar.
+        trips = etmm.generate_mock_trips('Alice', 2, [2, 2], [3, 3], 'o_', threshold=generate_points_thresh)
         metric = eamso.OriginDestinationSimilarity()
-        coords0 = metric.extract_features(trips0[0])
-        coords1 = metric.extract_features(trips1[0])
-        similar = metric.similar(coords0, coords1, similarity_threshold)
-        self.assertFalse(similar)
+        coords0 = metric.extract_features(trips[0])
+        coords1 = metric.extract_features(trips[1])
+        similar21 = metric.similar(coords0[2:], coords1[2:], similarity_threshold)
+        similar22 = metric.similar(coords0, coords1, similarity_threshold)
+
+        ## Sub-Test 5-6: 
+        # Two trips with destination coordinates within threshold but we check 
+        # similarity using origin coordinates or origin-and-destination 
+        # should not be similar.        
+        trips = etmm.generate_mock_trips('Caty', 2, [3, 3], [4, 4], '_d', threshold=generate_points_thresh)
+        metric = eamso.OriginDestinationSimilarity()
+        coords0 = metric.extract_features(trips[0])
+        coords1 = metric.extract_features(trips[1])
+        similar31 = metric.similar(coords0[:2], coords1[:2], similarity_threshold)
+        similar32 = metric.similar(coords0, coords1, similarity_threshold)
+
+        # All the similars must be False
+        self.assertFalse(similar11) # RESULT SUB-TEST 1
+        self.assertFalse(similar12) # RESULT SUB-TEST 2
+        self.assertFalse(similar21) # RESULT SUB-TEST 3
+        self.assertFalse(similar22) # RESULT SUB-TEST 4
+        self.assertFalse(similar31) # RESULT SUB-TEST 5
+        self.assertFalse(similar32) # RESULT SUB-TEST 6
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/emission/tests/modellingTests/modellingTestAssets.py b/emission/tests/modellingTests/modellingTestAssets.py
index de9b26cf4..cb886670c 100644
--- a/emission/tests/modellingTests/modellingTestAssets.py
+++ b/emission/tests/modellingTests/modellingTestAssets.py
@@ -211,6 +211,6 @@ def generate_mock_trips(
         "purpose_confirm": ['work', 'home', 'school'],
         "replaced_mode": ['walk', 'bike', 'drive']
     }
-    result = generate_mock_trips('joe-bob', 14, [0, 0], [1,1], label_data, 6)
+    result = generate_mock_trips('joe-bob', 14, [0, 0], [1,1],'od', label_data, 6)
     for r in result:
         print(r)
\ No newline at end of file

From 7f6d7542b5561861e5edfd0e27435cb11564078a Mon Sep 17 00:00:00 2001
From: $aTyam <satyam.saini@rutgers.edu>
Date: Tue, 5 Sep 2023 16:02:28 -0400
Subject: [PATCH 06/10] Correcting logic behind binning and non-binning while
 testing

1. improved logic based on this comment . https://github.com/e-mission/e-mission-server/pull/933/commits/710d1a5791212b540b883dedd5613a562071edc9#r1314065502

2.Created a utilities file for repetitive code required by multiple files.

3. clustering threshold back to 500

4. More in-code comments.
---
 .../TestGreedySimilarityBinning.py            | 505 ++----------------
 .../modellingTests/TestSimilarityMetric.py    | 127 ++---
 emission/tests/modellingTests/utilities.py    |  68 +++
 3 files changed, 165 insertions(+), 535 deletions(-)
 create mode 100644 emission/tests/modellingTests/utilities.py

diff --git a/emission/tests/modellingTests/TestGreedySimilarityBinning.py b/emission/tests/modellingTests/TestGreedySimilarityBinning.py
index 620f2cf99..3e1cd78c2 100644
--- a/emission/tests/modellingTests/TestGreedySimilarityBinning.py
+++ b/emission/tests/modellingTests/TestGreedySimilarityBinning.py
@@ -1,6 +1,6 @@
 import unittest
 import emission.analysis.modelling.trip_model.greedy_similarity_binning as eamtg
-import emission.tests.modellingTests.modellingTestAssets as etmm
+import emission.tests.modellingTests.utilities as etmu
 import logging
 
 
@@ -15,428 +15,60 @@ def testNoBinning(self):
         Tests the three (origin, destination and origin-destination based) 
         binning configuration for trips.
 
-        When both the origin and destination points of trips are outside a threshold
+        When the origin and destination points of trips are outside a threshold
         limit, none of the trips should be binned with the other in any of the three 
         configs (origin, destination or origin-and-destination based).       
         """
 
-        label_data = {
-            "mode_confirm": ['walk', 'bike', 'transit'],
-            "purpose_confirm": ['work', 'home', 'school'],
-            "replaced_mode": ['drive']
-        }
-
-        # generate $n trips. $m of them should have origin and destinations sampled
-        # within a radius that should have them binned.
-        n = 20
-        m = 5
-        
-        # trip_part: when mock trips are generated, coordinates of this part of 
-        #            m trips will be within the threshold. trip_part can take one
-        #            among the four values:
-        #
-        #            1. '__' ->(None, meaning NEITHER origin nor destination of any trip will lie 
-        #             within the mentioned threshold when trips are generated),
-        #
-        #            2. 'o_' ->(origin, meaning ONLY origin of m trips will lie within the mentioned 
-        #             threshold when trips are generated),
-        #
-        #            3. '_d' ->(destination),meaning ONLY destination of m trips will lie within the 
-        #             mentioned threshold when trips are generated)
-        #
-        #            4. 'od' ->(origin and destination,meaning BOTH origin and destination of m trips
-        #             will lie within the mentioned threshold when trips are generated)
-
-        trips = etmm.generate_mock_trips(
-            user_id="joe", 
-            trips=n, 
-            origin=(0, 0), 
-            destination=(1, 1),
-            trip_part='__',
-            label_data=label_data, 
-            within_threshold=m, 
-            threshold=0.001,  # ~ 111 meters in degrees WGS84
-        )
-    
-
-        # pass in a test configuration to the binning algorithm.
-        #
-        # clustering_way : Part of the trip used for checking pairwise proximity.
-        #                  Can take one of the three values:
-        #                  
-        #                   1. 'origin' -> using origin of the trip to check if 2 points
-        #                                   lie within the mentioned similarity_threshold_meters
-        #                   2. 'destination' -> using destination of the trip to check if 2 points
-        #                                       lie within the mentioned similarity_threshold_meters
-        #                   3. 'origin-destination' -> both origin and destination of the trip to check 
-        #                                             if 2 points lie within the mentioned 
-        #                                              similarity_threshold_meters
-        
-        model1_config = {
-            "metric": "od_similarity",
-            "similarity_threshold_meters": 111,  # meters,
-            "apply_cutoff": False,
-            "clustering_way": "origin",  
-            "incremental_evaluation": False
-        }
-        model1 = eamtg.GreedySimilarityBinning(model1_config)
-        model1.fit(trips)
-
-
-        model2_config = {
-            "metric": "od_similarity",
-            "similarity_threshold_meters":111,  # meters,
-            "apply_cutoff": False,
-            "clustering_way": "destination",
-            "incremental_evaluation": False
-        }
-        model2 = eamtg.GreedySimilarityBinning(model2_config)
-        model2.fit(trips)
-
-
-        model3_config = {
-            "metric": "od_similarity",
-            "similarity_threshold_meters": 111,  # meters,
-            "apply_cutoff": False,
-            "clustering_way": "origin-destination",
-            "incremental_evaluation": False
-        }
-        model3 = eamtg.GreedySimilarityBinning(model3_config)
-        model3.fit(trips)
-
-        # Since neither the origin nor the destination of the points generated lie
-        # within the threshold, there should be no binning at all. All the bins should
-        # have size 1.
-
-        at_least_one_large_bin = all(map(lambda b: len(b['feature_rows']) == 1, model1.bins.values()))
-        self.assertTrue(at_least_one_large_bin, "no bin should have more than 1 features in it")
-
-        at_least_one_large_bin = all(map(lambda b: len(b['feature_rows']) == 1, model2.bins.values()))
-        self.assertTrue(at_least_one_large_bin, "no bin should have more than 1 features in it")
-
-        at_least_one_large_bin = all(map(lambda b: len(b['feature_rows']) == 1, model3.bins.values()))
-        self.assertTrue(at_least_one_large_bin, "no bin should have more than 1 features in it")
-
-
-
-    def testBinningByOrigin(self):
-        """
-        Tests the 'origin' based binning method for trips.
-
-        When only the origin points of trips are within a threshold
-        limit, trips must be binned together that too if binned based on 
-        'origins', otherwise no binning.       
-        """
-
-        label_data = {
-            "mode_confirm": ['walk', 'bike', 'transit'],
-            "purpose_confirm": ['work', 'home', 'school'],
-            "replaced_mode": ['drive']
-        }
-
-        # generate $n trips. $m of them should have origin and destinations sampled
-        # within a radius that should have them binned.
-        n = 20
-        m = 5
-
-        # trip_part: when mock trips are generated, coordinates of this part of 
-        #            m trips will be within the threshold. trip_part can take one
-        #            among the four values:
-        #
-        #            1. '__' ->(None, meaning NEITHER origin nor destination of any trip will lie 
-        #             within the mentioned threshold when trips are generated),
-        #
-        #            2. 'o_' ->(origin, meaning ONLY origin of m trips will lie within the mentioned 
-        #             threshold when trips are generated),
-        #
-        #            3. '_d' ->(destination),meaning ONLY destination of m trips will lie within the 
-        #             mentioned threshold when trips are generated)
-        #
-        #            4. 'od' ->(origin and destination,meaning BOTH origin and destination of m trips
-        #             will lie within the mentioned threshold when trips are generated)
+        # generate $n trips.
+        n = 20     
 
-        trips = etmm.generate_mock_trips(
-            user_id="joe", 
-            trips=n, 
-            origin=(0, 0), 
-            destination=(1, 1),
-            trip_part='o_',
-            label_data=label_data, 
-            within_threshold=m, 
-            threshold=0.001,  # ~ 111 meters in degrees WGS84
-        )
+        #this generates 20 trips one-by-one, where each trip's respective origin and destination 
+        # points are more than 500m away.
+        trips = [ etmu.setTripConfig(1, (i, i), (i+1, i+1), 'od', 1)[0] for i in range(n)]    
 
-        # pass in a test configuration to the binning algorithm.
-        #
-        # clustering_way : Part of the trip used for checking pairwise proximity.
-        #                  Can take one of the three values:
-        #                  
-        #                   1. 'origin' -> using origin of the trip to check if 2 points
-        #                                   lie within the mentioned similarity_threshold_meters
-        #                   2. 'destination' -> using destination of the trip to check if 2 points
-        #                                       lie within the mentioned similarity_threshold_meters
-        #                   3. 'origin-destination' -> both origin and destination of the trip to check 
-        #                                             if 2 points lie within the mentioned 
-        #                                              similarity_threshold_meters
+        # parameters passed for testing. A list, where each element is one way of clustering
+        clustering_ways_paramters= ["origin","destination","origin-destination"]
         
-        model1_config = {
-            "metric": "od_similarity",
-            "similarity_threshold_meters": 111,  # meters,
-            "apply_cutoff": False,
-            "clustering_way": "origin",
-            "incremental_evaluation": False
-        }
-        model1 = eamtg.GreedySimilarityBinning(model1_config)
-        model1.fit(trips)
-
-
-        model2_config = {
-            "metric": "od_similarity",
-            "similarity_threshold_meters":111,  # meters,
-            "apply_cutoff": False,
-            "clustering_way": "destination",
-            "incremental_evaluation": False
-        }
-        model2 = eamtg.GreedySimilarityBinning(model2_config)
-        model2.fit(trips)
-
-
-        model3_config = {
-            "metric": "od_similarity",
-            "similarity_threshold_meters": 111,  # meters,
-            "apply_cutoff": False,
-            "clustering_way": "origin-destination",
-            "incremental_evaluation": False
-        }
-        model3 = eamtg.GreedySimilarityBinning(model3_config)
-        model3.fit(trips)
-        
-
-        # Since only the origin of the points generated lies within the threshold,
-        # there should be binning only when 'origin' config is used. Otherwise all 
-        # the bins should have size 1.
-
-        at_least_one_large_bin = any(map(lambda b: len(b['feature_rows']) == m, model1.bins.values()))
-        self.assertTrue(at_least_one_large_bin, "at least one bin should have at least 5 features in it")
-
-        at_least_one_large_bin = all(map(lambda b: len(b['feature_rows']) ==1, model2.bins.values()))
-        self.assertTrue(at_least_one_large_bin, "no bin should have more than 1 features in it")
-
-        at_least_one_large_bin = all(map(lambda b: len(b['feature_rows']) == 1, model3.bins.values()))
-        self.assertTrue(at_least_one_large_bin, "no bin should have more than 1 features in it")
-
-
-
-    def testBinningByDestination(self):
-        """
-        Tests the 'destination' based binning method for trips.
-
-        When only the destination points of trips are within a threshold
-        limit, trips must be binned together that too if binned based on 
-        'destination', otherwise no binning.       
+        #Testing each of the three clustering_ways by passing them as parameters
+        for cw in clustering_ways_paramters:
+            with self.subTest(clustering_way=cw):
+                #initialise the binning model and fit with previously generated trips
+                model = etmu.setModelConfig("od_similarity",  500,  False, cw, False)
+                model.fit(trips)
+                #check each bins for no of trips
+                no_large_bin = all(map(lambda b: len(b['feature_rows']) == 1, model.bins.values()))
+                #Since all trips were sampled outside the threshold, there should be no bin
+                # with more then 1 trip
+                self.assertTrue(no_large_bin,"no bin should have more than 1 features in it")
+
+    def testBinning(self):
         """
+        Tests the three (origin, destination and origin-destination based) 
+        binning configuration for trips.
 
-        label_data = {
-            "mode_confirm": ['walk', 'bike', 'transit'],
-            "purpose_confirm": ['work', 'home', 'school'],
-            "replaced_mode": ['drive']
-        }
-
-        # generate $n trips. $m of them should have origin and destinations sampled
-        # within a radius that should have them binned.
-        n = 20
-        m = 5
-
-        # trip_part: when mock trips are generated, coordinates of this part of 
-        #            m trips will be within the threshold. trip_part can take one
-        #            among the four values:
-        #
-        #            1. '__' ->(None, meaning NEITHER origin nor destination of any trip will lie 
-        #             within the mentioned threshold when trips are generated),
-        #
-        #            2. 'o_' ->(origin, meaning ONLY origin of m trips will lie within the mentioned 
-        #             threshold when trips are generated),
-        #
-        #            3. '_d' ->(destination),meaning ONLY destination of m trips will lie within the 
-        #             mentioned threshold when trips are generated)
-        #
-        #            4. 'od' ->(origin and destination,meaning BOTH origin and destination of m trips
-        #             will lie within the mentioned threshold when trips are generated)
-
-        trips = etmm.generate_mock_trips(
-            user_id="joe", 
-            trips=n, 
-            origin=(0, 0), 
-            destination=(1, 1),
-            trip_part='_d',
-            label_data=label_data, 
-            within_threshold=m, 
-            threshold=0.001,  # ~ 111 meters in degrees WGS84
-        )
-
-        # pass in a test configuration to the binning algorithm.
-        #
-        # clustering_way : Part of the trip used for checking pairwise proximity.
-        #                  Can take one of the three values:
-        #                  
-        #                   1. 'origin' -> using origin of the trip to check if 2 points
-        #                                   lie within the mentioned similarity_threshold_meters
-        #                   2. 'destination' -> using destination of the trip to check if 2 points
-        #                                       lie within the mentioned similarity_threshold_meters
-        #                   3. 'origin-destination' -> both origin and destination of the trip to check 
-        #                                             if 2 points lie within the mentioned 
-        #                                              similarity_threshold_meters
-        
-        model1_config = {
-            "metric": "od_similarity",
-            "similarity_threshold_meters": 111,  # meters,
-            "apply_cutoff": False,
-            "clustering_way": "origin",
-            "incremental_evaluation": False
-        }
-        model1 = eamtg.GreedySimilarityBinning(model1_config)
-        model1.fit(trips)
-
-
-        model2_config = {
-            "metric": "od_similarity",
-            "similarity_threshold_meters":111,  # meters,
-            "apply_cutoff": False,
-            "clustering_way": "destination",
-            "incremental_evaluation": False
-        }
-        model2 = eamtg.GreedySimilarityBinning(model2_config)
-        model2.fit(trips)
-
-
-        model3_config = {
-            "metric": "od_similarity",
-            "similarity_threshold_meters": 111,  # meters,
-            "apply_cutoff": False,
-            "clustering_way": "origin-destination",
-            "incremental_evaluation": False
-        }
-        model3 = eamtg.GreedySimilarityBinning(model3_config)
-        model3.fit(trips)
-
-        # Since only the destination of the points generated lies within the threshold,
-        # there should be binning only when 'destination' config is used. Otherwise all 
-        # the bins should have size 1.
-
-        at_least_one_large_bin = all(map(lambda b: len(b['feature_rows']) == 1, model1.bins.values()))
-        self.assertTrue(at_least_one_large_bin, "at least one bin should have at least 5 features in it")
-
-        at_least_one_large_bin = any(map(lambda b: len(b['feature_rows']) ==m, model2.bins.values()))
-        self.assertTrue(at_least_one_large_bin, "no bin should have more than 1 features in it")
-
-        at_least_one_large_bin = all(map(lambda b: len(b['feature_rows']) == 1, model3.bins.values()))
-        self.assertTrue(at_least_one_large_bin, "no bin should have more than 1 features in it")
-
-
-    def testBinningByOriginAndDestination(self):
+        When the points lie within threshold ,the trips are binned together.
         """
-        Tests the 'origin-destination' based binning method for trips.
-
-        When both the origin and destination points of trips are within
-        a threshold limit, trips will be binned together in all three (origin , 
-        destination, origin-and-destinaiton) configurations. 
-        """        
-
-        label_data = {
-            "mode_confirm": ['walk', 'bike', 'transit'],
-            "purpose_confirm": ['work', 'home', 'school'],
-            "replaced_mode": ['drive']
-        }
-
-        # generate $n trips. $m of them should have origin and destinations sampled
+        # generate $n trips. $m of them should have origin sampled
         # within a radius that should have them binned.
         n = 20
         m = 5
 
-        # trip_part: when mock trips are generated, coordinates of this part of 
-        #            m trips will be within the threshold. trip_part can take one
-        #            among the four values:
-        #
-        #            1. '__' ->(None, meaning NEITHER origin nor destination of any trip will lie 
-        #             within the mentioned threshold when trips are generated),
-        #
-        #            2. 'o_' ->(origin, meaning ONLY origin of m trips will lie within the mentioned 
-        #             threshold when trips are generated),
-        #
-        #            3. '_d' ->(destination),meaning ONLY destination of m trips will lie within the 
-        #             mentioned threshold when trips are generated)
-        #
-        #            4. 'od' ->(origin and destination,meaning BOTH origin and destination of m trips
-        #             will lie within the mentioned threshold when trips are generated)
-
-        trips = etmm.generate_mock_trips(
-            user_id="joe", 
-            trips=n, 
-            origin=(0, 0), 
-            destination=(1, 1),
-            trip_part='od',
-            label_data=label_data, 
-            within_threshold=m, 
-            threshold=0.001,  # ~ 111 meters in degrees WGS84
-        )
-
-        # pass in a test configuration to the binning algorithm.
-        #
-        # clustering_way : Part of the trip used for checking pairwise proximity.
-        #                  Can take one of the three values:
-        #                  
-        #                   1. 'origin' -> using origin of the trip to check if 2 points
-        #                                   lie within the mentioned similarity_threshold_meters
-        #                   2. 'destination' -> using destination of the trip to check if 2 points
-        #                                       lie within the mentioned similarity_threshold_meters
-        #                   3. 'origin-destination' -> both origin and destination of the trip to check 
-        #                                             if 2 points lie within the mentioned 
-        #                                              similarity_threshold_meters
-        
-        model1_config = {
-            "metric": "od_similarity",
-            "similarity_threshold_meters": 111,  # meters,
-            "apply_cutoff": False,
-            "clustering_way": "origin",
-            "incremental_evaluation": False
-        }
-        model1 = eamtg.GreedySimilarityBinning(model1_config)
-        model1.fit(trips)
-
-
-        model2_config = {
-            "metric": "od_similarity",
-            "similarity_threshold_meters":111,  # meters,
-            "apply_cutoff": False,
-            "clustering_way": "destination",
-            "incremental_evaluation": False
-        }
-        model2 = eamtg.GreedySimilarityBinning(model2_config)
-        model2.fit(trips)
-
-
-        model3_config = {
-            "metric": "od_similarity",
-            "similarity_threshold_meters": 111,  # meters,
-            "apply_cutoff": False,
-            "clustering_way": "origin-destination",
-            "incremental_evaluation": False
-        }
-        model3 = eamtg.GreedySimilarityBinning(model3_config)
-        model3.fit(trips)
-
-        # Since both the origin and the destination points of the generated trips lie 
-        # within the threshold, there should be binning in all three configs.
-
-        at_least_one_large_bin = any(map(lambda b: len(b['feature_rows']) == m, model1.bins.values()))
-        self.assertTrue(at_least_one_large_bin, "at least one bin should have at least 5 features in it")
-
-        at_least_one_large_bin = any(map(lambda b: len(b['feature_rows']) ==m, model2.bins.values()))
-        self.assertTrue(at_least_one_large_bin, "no bin should have more than 1 features in it")
-
-        at_least_one_large_bin = any(map(lambda b: len(b['feature_rows']) == m, model3.bins.values()))
-        self.assertTrue(at_least_one_large_bin, "no bin should have more than 1 features in it")
-
+        # parameters passed for testing. A list, where each element of this list takes the form 
+        # [trip part to be sampled within mentioned threshold , clustering way used to check similarity]
+        parameters= [["o_",'origin'],["_d",'destination'],["od",'origin-destination']]
+        for tp,cw in parameters:
+            with self.subTest(trip_part=tp,clustering_way=cw):
+                #generate random trips using utilities
+                trips = etmu.setTripConfig(trips=n, org=(0, 0), dest=(1, 1),
+                                trip_part=tp, within_thr=m)
+                #initialise the binning model and fit with previously generated trips
+                model = etmu.setModelConfig("od_similarity",  500,  False, cw, False)
+                model.fit(trips)
+                #check each bins for no of trips
+                at_least_one_large_bin = any(map(lambda b: len(b['feature_rows']) == m, model.bins.values()))
+                #Since 5 trips were sampled within the threshold, there should be one bin with 5 trips
+                self.assertTrue(at_least_one_large_bin, "at least one bin should have at least 5 features in it")
 
     def testPrediction(self):
         """
@@ -449,24 +81,10 @@ def testPrediction(self):
         }
 
         n = 6
-        trips = etmm.generate_mock_trips(
-            user_id="joe", 
-            trips=n, 
-            origin=(0, 0), 
-            destination=(1, 1),
-            trip_part='od', 
-            label_data=label_data, 
-            threshold=0.001,  # ~ 111 meters in degrees WGS84
+        trips = etmu.setTripConfig(trips=n, org=(0, 0), dest=(1, 1),
+                                   trip_part='od', label_data=label_data,                                   
         )
-
-        model_config = {
-            "metric": "od_similarity",
-            "similarity_threshold_meters": 500,      # meters,
-            "apply_cutoff": False,
-            "clustering_way": "origin-destination",
-            "incremental_evaluation": False
-        }
-        model = eamtg.GreedySimilarityBinning(model_config)
+        model = etmu.setModelConfig("od_similarity",  500,  False, "origin-destination", False)
         
         train = trips[0:5]
         test = trips[5]
@@ -486,36 +104,17 @@ def testNoPrediction(self):
             "purpose_confirm": ['pizza_party'],
             "replaced_mode": ['crabwalking']
         }
-
         n = 5
-        train = etmm.generate_mock_trips(
-            user_id="joe", 
-            trips=n, 
-            origin=(39.7645187, -104.9951944),       # Denver, CO
-            destination=(39.7435206, -105.2369292),  # Golden, CO
-            trip_part='od',
-            label_data=label_data, 
-            threshold=0.001,  # ~ 111 meters in degrees WGS84
+
+        train = etmu.setTripConfig(trips=n, org=(39.7645187, -104.9951944), # Denver, CO
+                                   dest=(39.7435206, -105.2369292),  # Golden, CO
+                                   trip_part='od', label_data=label_data                                 
         )
-        test = etmm.generate_mock_trips(
-            user_id="joe", 
-            trips=1, 
-            origin=(61.1042262, -150.5611644),       # Anchorage, AK
-            destination=(62.2721466, -150.3233046),  # Talkeetna, AK
-            trip_part='od',
-            label_data=label_data, 
-            threshold=0.001,  # ~ 111 meters in degrees WGS84
+        test = etmu.setTripConfig(trips=n, org=(61.1042262, -150.5611644), # Denver, CO
+                                   dest=(62.2721466, -150.3233046),  # Golden, CO
+                                   trip_part='od', label_data=label_data,                                   
         )
-
-        model_config = {
-            "metric": "od_similarity",
-            "similarity_threshold_meters": 500,      # meters,
-            "apply_cutoff": False,
-            "clustering_way": "origin-destination",
-            "incremental_evaluation": False
-        }
-        model = eamtg.GreedySimilarityBinning(model_config)
-
+        model = etmu.setModelConfig("od_similarity",  500,  False, "origin-destination", False)
         model.fit(train)
         results, n = model.predict(test[0])
 
diff --git a/emission/tests/modellingTests/TestSimilarityMetric.py b/emission/tests/modellingTests/TestSimilarityMetric.py
index f7c7b195f..cbe500b23 100644
--- a/emission/tests/modellingTests/TestSimilarityMetric.py
+++ b/emission/tests/modellingTests/TestSimilarityMetric.py
@@ -1,98 +1,61 @@
 import unittest
-import emission.tests.modellingTests.modellingTestAssets as etmm
 import emission.analysis.modelling.similarity.od_similarity as eamso
+import emission.tests.modellingTests.utilities as etmu
 
 class TestSimilarityMetric(unittest.TestCase):
 
     def testODsAreSimilar(self):
         generate_points_thresh = 0.001  # approx. 111 meters
-        similarity_threshold = 111  # 
-
+        similarity_threshold = 500  # in meters
         metric = eamso.OriginDestinationSimilarity()
-        ## Sub-Test 1 - 3 :
-        # random, but, origin and destination points are sampled within a circle and should always be < sim threshold
-        # Since both origin and destination poitns lie within threshold limits,they should be similar
-        # when we check by just origin or just destination or both origin-and-destination
-
-        trips = etmm.generate_mock_trips('bob', 2, [0, 0], [1, 1], 'od',threshold=generate_points_thresh) 
-        coords0 = metric.extract_features(trips[0])
-        coords1 = metric.extract_features(trips[1])        
-        similarOD1 = metric.similar(coords0, coords1, similarity_threshold)
-        similarOD2 = metric.similar(coords0[:2], coords1[:2], similarity_threshold)
-        similarOD3 = metric.similar(coords0[2:], coords1[2:], similarity_threshold)
-
-        ## Sub-Test 4 :
-        # random, but, only origin points are sampled within a circle and should always be < sim threshold
-        # Since origin of two points lies within threshold limits,they should be similar
-        # when we check just origin for similarity.
-
-
-        trips = etmm.generate_mock_trips('alice', 2, [0, 0], [1, 1], 'o_',threshold=generate_points_thresh)        
-        coords0 = metric.extract_features(trips[0])[:2]
-        coords1 = metric.extract_features(trips[1])[:2]        
-        similarO = metric.similar(coords0, coords1, similarity_threshold)
-
-        ##Sub-Test 5 :
-        # random, but, only destination points are sampled within a circle and should always be < sim threshold
-        # Since destination of two points lies within threshold limits,they should be similar
-        # when we check just destination for similarity.
-
-        trips = etmm.generate_mock_trips('Caty', 2, [0, 0], [1, 1], '_d',threshold=generate_points_thresh)        
-        coords0 = metric.extract_features(trips[0])[2:]
-        coords1 = metric.extract_features(trips[1])[2:]        
-        similarD = metric.similar(coords0, coords1, similarity_threshold)
 
-        # All the similars must be true
-        self.assertTrue(similarOD1) # RESULT SUB-TEST 1
-        self.assertTrue(similarOD2) # RESULT SUB-TEST 2
-        self.assertTrue(similarOD3) # RESULT SUB-TEST 3
-        self.assertTrue(similarO)  # RESULT SUB-TEST 4
-        self.assertTrue(similarD) # RESULT SUB-TEST 5
+        # parameters passed for testing is set here. A list, where each element of this list takes the form 
+        # [trip part to be sampled within mentioned threshold, (start_coord,end_coord)]
+        # Since the extracted_features function returns in the form [origin_lat,origin_long,destination_lat,destination_long],
+        # if clustering is to be done by :
+        #   a.origin, we pass first two values of this list,i.e. from 0 till before 2 index
+        #   b.destination, we pas last two values of this list,i.e. from 2 till before 4 index
+        #   c.origin-destination, we pass the entire list , i.e. from 0 till before 4 index
+        parameters= [["od",(0,4)],["_d",(2,4)],["o_",(0,2)]]
+
+        for tp,(coord_start,coord_end) in parameters:
+            with self.subTest(trip_part=tp):
+                #generate 2 trips with parameter values
+                trips = etmu.setTripConfig(2, [0, 0], [1, 1], trip_part=tp,threshold=generate_points_thresh) 
+                # depending on the parametrs, extract the relevant coordinates
+                trip0_coords = metric.extract_features(trips[0])[coord_start:coord_end]
+                trip1_coords = metric.extract_features(trips[1])[coord_start:coord_end]
+                #check for similarity using relevant coordinates
+                similarOD = metric.similar(trip0_coords,trip1_coords, similarity_threshold)
+                # Since both origin and destination poitns lie within threshold limits,they should be similar
+                # when we check by just origin or just destination or both origin-and-destination
+                self.assertTrue(similarOD)
     
     def testODsAreNotSimilar(self):
-        generate_points_thresh = 0.001  # approx. 111 meters
-        similarity_threshold = 111  # 
-        metric = eamso.OriginDestinationSimilarity()
-
-        ## Sub-Test 1-2: 
-        # Two trips with neither origin nor destination coordinates within threshold
-        # must not be similar in any configuration of similarity testing.
-        trips = etmm.generate_mock_trips('bob', 2, [0, 0], [1, 1], '__', threshold=generate_points_thresh)  
-        coords0 = metric.extract_features(trips[0])
-        coords1 = metric.extract_features(trips[1])
-        similar11 = metric.similar(coords0[:2], coords1[:2], similarity_threshold)
-        similar12 = metric.similar(coords0[2:], coords1[:], similarity_threshold)
-
-        ## Sub-Test 3-4: 
-        # Two trips with  origin coordinates within threshold but we check  
-        # similarity using destination coordinates or origin-and-destination
-        # should not be similar.
-        trips = etmm.generate_mock_trips('Alice', 2, [2, 2], [3, 3], 'o_', threshold=generate_points_thresh)
+        similarity_threshold = 500
         metric = eamso.OriginDestinationSimilarity()
-        coords0 = metric.extract_features(trips[0])
-        coords1 = metric.extract_features(trips[1])
-        similar21 = metric.similar(coords0[2:], coords1[2:], similarity_threshold)
-        similar22 = metric.similar(coords0, coords1, similarity_threshold)
-
-        ## Sub-Test 5-6: 
-        # Two trips with destination coordinates within threshold but we check 
-        # similarity using origin coordinates or origin-and-destination 
-        # should not be similar.        
-        trips = etmm.generate_mock_trips('Caty', 2, [3, 3], [4, 4], '_d', threshold=generate_points_thresh)
-        metric = eamso.OriginDestinationSimilarity()
-        coords0 = metric.extract_features(trips[0])
-        coords1 = metric.extract_features(trips[1])
-        similar31 = metric.similar(coords0[:2], coords1[:2], similarity_threshold)
-        similar32 = metric.similar(coords0, coords1, similarity_threshold)
-
-        # All the similars must be False
-        self.assertFalse(similar11) # RESULT SUB-TEST 1
-        self.assertFalse(similar12) # RESULT SUB-TEST 2
-        self.assertFalse(similar21) # RESULT SUB-TEST 3
-        self.assertFalse(similar22) # RESULT SUB-TEST 4
-        self.assertFalse(similar31) # RESULT SUB-TEST 5
-        self.assertFalse(similar32) # RESULT SUB-TEST 6
 
+        # parameters passed for testing is set. A list, where each element of this list takes the form 
+        # [(start_coord,end_coord)]
+        # Since the extracted_features function return in the form [origin_lat,origin_long,destination_lat,destination_long],
+        # if clustering shouldn't happend, then
+        #   a.origin, we pass first two values of this list,i.e. from 0 till before 2 index
+        #   b.destination, we pas last two values of this list,i.e. from 2 till before 4 index
+        #   c.origin-destination, we pass the entire list , i.e. from 0 till before 4 index
+        parameters= [(0,2),(2,4),[0,4]]
+        n=2
+        #this generates 2 trips one-by-one, where each trip's respective origin and destination 
+        # points are more than 500m away.
+        trips = [etmu.setTripConfig(1, (i, i), (i+1, i+1), 'od', 1)[0] for i in range(n)]    
+        trip0_coord = metric.extract_features(trips[0])
+        trip1_coord = metric.extract_features(trips[1])
+
+        for (coord_start,coord_end) in parameters:
+            with self.subTest(coordinates=(coord_start,coord_end)):      
+                IsSimilar = metric.similar(trip0_coord[coord_start:coord_end],trip1_coord[coord_start:coord_end], similarity_threshold)
+                # Two trips with neither origin nor destination coordinates within the threshold
+                # must not be similar by any configuration of similarity testing.
+                self.assertFalse(IsSimilar)
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/emission/tests/modellingTests/utilities.py b/emission/tests/modellingTests/utilities.py
new file mode 100644
index 000000000..9f03358bb
--- /dev/null
+++ b/emission/tests/modellingTests/utilities.py
@@ -0,0 +1,68 @@
+import emission.analysis.modelling.trip_model.greedy_similarity_binning as eamtg
+import emission.tests.modellingTests.modellingTestAssets as etmm
+
+def setModelConfig(metric,threshold,cutoff,clustering_way,incrementalevaluation):
+    """
+    TODO : tell about each param.
+    pass in a test configuration to the binning algorithm.
+    
+    clustering_way : Part of the trip used for checking pairwise proximity.
+                        Can take one of the three values:
+                        
+                        1. 'origin' -> using origin of the trip to check if 2 points
+                                        lie within the mentioned similarity_threshold_meters
+                        2. 'destination' -> using destination of the trip to check if 2 points
+                                            lie within the mentioned similarity_threshold_meters
+                        3. 'origin-destination' -> both origin and destination of the trip to check 
+                                                if 2 points lie within the mentioned 
+                                                    similarity_threshold_meters
+    """        
+    model_config = {
+        "metric": metric,
+        "similarity_threshold_meters": threshold,  # meters,
+        "apply_cutoff": cutoff,
+        "clustering_way": clustering_way,  
+        "incremental_evaluation": incrementalevaluation
+    }
+
+    return eamtg.GreedySimilarityBinning(model_config)
+
+
+def setTripConfig(trips,org,dest,trip_part,within_thr=None,label_data=None,threshold=0.001):
+    """
+    TODO: Tell about each
+                trip_part: when mock trips are generated, coordinates of this part of 
+                m trips will be within the threshold. trip_part can take one
+                among the four values:
+    
+                1. '__' ->(None, meaning NEITHER origin nor destination of any trip will lie 
+                within the mentioned threshold when trips are generated),
+    
+                2. 'o_' ->(origin, meaning ONLY origin of m trips will lie within the mentioned 
+                threshold when trips are generated),
+    
+                3. '_d' ->(destination),meaning ONLY destination of m trips will lie within the 
+                mentioned threshold when trips are generated)
+    
+                4. 'od' ->(origin and destination,meaning BOTH origin and destination of m trips
+                will lie within the mentioned threshold when trips are generated)
+    """
+    if label_data == None:            
+        label_data = {
+            "mode_confirm": ['walk', 'bike', 'transit'],
+            "purpose_confirm": ['work', 'home', 'school'],
+            "replaced_mode": ['drive']
+        }
+
+    trip =etmm.generate_mock_trips(
+            user_id="joe", 
+            trips=trips, 
+            origin=org, 
+            destination=dest,
+            trip_part=trip_part,
+            label_data=label_data, 
+            within_threshold=within_thr, 
+            threshold=threshold,  
+        )
+    return trip  
+    
\ No newline at end of file

From c35b7c1b6ce94786511f0884d04445e50625a157 Mon Sep 17 00:00:00 2001
From: $aTyam <satyam.saini@rutgers.edu>
Date: Fri, 8 Sep 2023 18:08:46 -0400
Subject: [PATCH 07/10] Changed logic for Random trip genration

Random trips are now generated like this :

if  certain trips is are to be binned together ( by 'o','d' or 'od' or '__' (meaning NONE)) they are generated in proximity of the previous in-bin trip.  Otherwise, if they are not to be binned together, we keep generating a random trip unless we find one that would not bin with previously accepted trips.
---
 .../modellingTests/modellingTestAssets.py     | 164 +++++++++++++++---
 emission/tests/modellingTests/utilities.py    |  68 --------
 2 files changed, 137 insertions(+), 95 deletions(-)
 delete mode 100644 emission/tests/modellingTests/utilities.py

diff --git a/emission/tests/modellingTests/modellingTestAssets.py b/emission/tests/modellingTests/modellingTestAssets.py
index cb886670c..f98736048 100644
--- a/emission/tests/modellingTests/modellingTestAssets.py
+++ b/emission/tests/modellingTests/modellingTestAssets.py
@@ -1,35 +1,145 @@
 import random
 from typing import Optional, Tuple, List, Dict
 from uuid import UUID
-import emission.analysis.modelling.trip_model.trip_model as eamtm
+import emission.analysis.modelling.trip_model.greedy_similarity_binning as eamtg
+import emission.tests.modellingTests.modellingTestAssets as etmm
 import emission.core.wrapper.confirmedtrip as ecwc
 
 import emission.core.wrapper.entry as ecwe
 import time 
 import math
 
+def setModelConfig(metric,threshold,cutoff,clustering_way,incrementalevaluation):
+    """
+    TODO: Write about each parameter to the function
+    pass in a test configuration to the binning algorithm.
+    
+    clustering_way : Part of the trip used for checking pairwise proximity.
+                        Can take one of the three values:
+                        
+                        1. 'origin' -> using origin of the trip to check if 2 points
+                                        lie within the mentioned similarity_threshold_meters
+                        2. 'destination' -> using destination of the trip to check if 2 points
+                                            lie within the mentioned similarity_threshold_meters
+                        3. 'origin-destination' -> both origin and destination of the trip to check 
+                                                if 2 points lie within the mentioned 
+                                                    similarity_threshold_meters
+    """        
+    model_config = {
+        "metric": metric,
+        "similarity_threshold_meters": threshold,  # meters,
+        "apply_cutoff": cutoff,
+        "clustering_way": clustering_way,  
+        "incremental_evaluation": incrementalevaluation
+    }
+
+    return eamtg.GreedySimilarityBinning(model_config)
+
+def generate_random_point():
+    """Generate a completetly random point valid WGS84 latitiude and longtidude"""
+    lat=random.uniform(-90,90)
+    lon=random.uniform(-180,180)
+    return [lat,lon]
+
+def generate_nearby_random_points(ref_coords,threshold):
+    """
+    Generate valid WGS84 latitiude and longtidude in threshold(m) proximity to
+    ref coordinates
+    """
+
+    thresholdInWGS84 = threshold* (0.000001/0.11)
+    dx=random.uniform(-thresholdInWGS84/2,thresholdInWGS84/2)
+    dy=random.uniform(-thresholdInWGS84/2,thresholdInWGS84/2)
+    return [ref_coords[0] +dx , ref_coords[1] +dy]
+
+def calDistanceTest(point1, point2, coordinates=False):
+    """haversine distance
+
+    :param point1: a coordinate in degrees WGS84
+    :param point2: another coordinate in degrees WGS84
+    :param coordinates: if false, expect a list of coordinates, defaults to False
+    :return: distance approximately in meters
+    """
+    earthRadius = 6371000  # meters
+    if coordinates:
+        dLat = math.radians(point1.lat-point2.lat)
+        dLon = math.radians(point1.lon-point2.lon)
+        lat1 = math.radians(point1.lat)
+        lat2 = math.radians(point2.lat)
+    else:
+        dLat = math.radians(point1[1]-point2[1])
+        dLon = math.radians(point1[0]-point2[0])
+        lat1 = math.radians(point1[1])
+        lat2 = math.radians(point2[1])
+
+
+    a = (math.sin(dLat/2) ** 2) + ((math.sin(dLon/2) ** 2) * math.cos(lat1) * math.cos(lat2))
+    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
+    d = earthRadius * c
+
+    return d
+
+def setTripConfig(trips,trip_part,threshold,within_thr=None,label_data=None):
+    """
+    TODO: Write about each parameter to the function
+                trip_part: when mock trips are generated, coordinates of this part of 
+                m trips will be within the threshold. trip_part can take one
+                among the four values:
+    
+                1. '__' ->(None, meaning NEITHER origin nor destination of any trip will lie 
+                within the mentioned threshold when trips are generated),
+    
+                2. 'o_' ->(origin, meaning ONLY origin of m trips will lie within the mentioned 
+                threshold when trips are generated),
+    
+                3. '_d' ->(destination),meaning ONLY destination of m trips will lie within the 
+                mentioned threshold when trips are generated)
+    
+                4. 'od' ->(origin and destination,meaning BOTH origin and destination of m trips
+                will lie within the mentioned threshold when trips are generated)
+    """
+    if label_data == None:            
+        label_data = {
+            "mode_confirm": ['walk', 'bike', 'transit'],
+            "purpose_confirm": ['work', 'home', 'school'],
+            "replaced_mode": ['drive']
+        }
+
+    trip =etmm.generate_mock_trips(
+            user_id="joe", 
+            trips=trips, 
+            trip_part=trip_part,
+            label_data=label_data, 
+            within_threshold=within_thr, 
+            threshold=threshold,  
+        )
+    return trip  
 
 def generate_trip_coordinates(
-    ref_coords: Tuple[float, float], 
+    points_list: list[float], 
     within_threshold: bool,
-    threshold: float, 
-    max: float = 0.1  # approx. 10km in WGS84 
+    threshold_meters: float, 
     ) -> Tuple[float, float]:
-    """generates trip coordinate data to use when mocking a set of trip data.
+    """generates trip coordinate data to use when mocking a set of trip data.i
+    If the coordinate generated  is to be binned together, it is generated in proximity of
+    the previous points in the points_list. Otherwise, if this point is not to be binned together,
+    keep generating a random trip unless we find one that would not bin with previously
+    accepeted trips.
 
-    :param ref_coords: reference coordinates to use as the center of the sampling circle
-    :param within_threshold: how many of these trips are within some distance threshold
-    :param threshold: the distance threshold, in WGS84
-    :param max: max distance, in WGS84, defaults to 0.1 (approx. 10km)
+    :param points_list: list of all the previoushlt selected points 
+    :param within_threshold: is this point  supposed to be within some distance threshold
+    :param threshold_meters: the distance threshold, in meters
     :return: generated coordinate pairs sampled in a 
              circle from some coordinates up to some threshold
     """
-    angle = 2 * math.pi * random.random()
-    radius_threshold = threshold / 2
-    radius = random.uniform(0, radius_threshold) if within_threshold else random.uniform(radius_threshold, max)
-    x = radius * math.cos(angle) + ref_coords[0]
-    y = radius * math.sin(angle) + ref_coords[1]
-    return (x, y)
+
+    if within_threshold and points_list:
+        new_point = generate_nearby_random_points(random.choice(points_list), threshold_meters)    
+    else:
+        new_point = generate_random_point()
+        while not all(calDistanceTest(new_point, pt) > threshold_meters for pt in points_list):
+            new_point = generate_random_point()
+    return new_point
 
 
 def extract_trip_labels(trips: List[ecwc.Confirmedtrip]) -> Dict:
@@ -129,15 +239,12 @@ def build_mock_trip(
 def generate_mock_trips(
     user_id, 
     trips,
-    origin, 
-    destination, 
+    threshold,
     trip_part='od',
     label_data = None, 
     within_threshold = None,
     start_ts: None = None,
     end_ts: None = None,
-    threshold = 0.01,
-    max = 0.1, 
     has_label_p = 1.0,
     seed = 0):
     """mocking function that generates multiple trips for a user. some are sampled 
@@ -158,8 +265,6 @@ def generate_mock_trips(
 
     :param user_id: user UUID
     :param trips: number of trips
-    :param origin: origin coordinates
-    :param destination: destination coordinates
     :param trip_part: when mock trips are generated, coordinates of this part of 
                       the trips will be within the threshold. trip_part can take one
                       among the four values:
@@ -173,10 +278,8 @@ def generate_mock_trips(
                      will lie within the mentioned threshold when trips are generated)
     :param label_data: dictionary of label data, see above, defaults to None
     :param within_threshold: number of trips that should fall within the provided
-           distance threshold in degrees WGS84, defaults to None
-    :param threshold: distance threshold in WGS84 for sampling, defaults to 0.01
-    :param max: maximum distance beyond the threshold for trips sampled that
-                are not within the threshold, defaults to 0.1 degrees WGS84
+           distance threshold in m
+    :param threshold: distance threshold in WGS84 for sampling
     :param has_label_p: probability a trip has labels, defaults to 1.0
     :param seed: random seed, defaults to 0
     :return: randomly sampled trips
@@ -186,9 +289,16 @@ def generate_mock_trips(
     within = within_threshold if within_threshold is not None else trips
     trips_within_threshold = [i < within for i in range(trips)]
     result = []
+    origin_points=[]
+    destination_points=[]    
+
+    # generate trip number of points based on which among 'o' ,'d' or 'od' should be in threshold 
+    # proximity to each other. 
     for within in trips_within_threshold:
-        o = generate_trip_coordinates(origin, (trip_part[0] == 'o' and within), threshold, max)
-        d = generate_trip_coordinates(destination, (trip_part[1] == 'd' and within), threshold, max)
+        origin_points.append(generate_trip_coordinates(origin_points, (trip_part[0] == 'o' and within), threshold))
+        destination_points.append(generate_trip_coordinates(destination_points, (trip_part[1] == 'd' and within), threshold))
+
+    for o,d in zip(origin_points,destination_points):    
         labels = {} if label_data is None or random.random() > has_label_p \
             else sample_trip_labels(
             mode_labels=label_data.get('mode_confirm'),
diff --git a/emission/tests/modellingTests/utilities.py b/emission/tests/modellingTests/utilities.py
deleted file mode 100644
index 9f03358bb..000000000
--- a/emission/tests/modellingTests/utilities.py
+++ /dev/null
@@ -1,68 +0,0 @@
-import emission.analysis.modelling.trip_model.greedy_similarity_binning as eamtg
-import emission.tests.modellingTests.modellingTestAssets as etmm
-
-def setModelConfig(metric,threshold,cutoff,clustering_way,incrementalevaluation):
-    """
-    TODO : tell about each param.
-    pass in a test configuration to the binning algorithm.
-    
-    clustering_way : Part of the trip used for checking pairwise proximity.
-                        Can take one of the three values:
-                        
-                        1. 'origin' -> using origin of the trip to check if 2 points
-                                        lie within the mentioned similarity_threshold_meters
-                        2. 'destination' -> using destination of the trip to check if 2 points
-                                            lie within the mentioned similarity_threshold_meters
-                        3. 'origin-destination' -> both origin and destination of the trip to check 
-                                                if 2 points lie within the mentioned 
-                                                    similarity_threshold_meters
-    """        
-    model_config = {
-        "metric": metric,
-        "similarity_threshold_meters": threshold,  # meters,
-        "apply_cutoff": cutoff,
-        "clustering_way": clustering_way,  
-        "incremental_evaluation": incrementalevaluation
-    }
-
-    return eamtg.GreedySimilarityBinning(model_config)
-
-
-def setTripConfig(trips,org,dest,trip_part,within_thr=None,label_data=None,threshold=0.001):
-    """
-    TODO: Tell about each
-                trip_part: when mock trips are generated, coordinates of this part of 
-                m trips will be within the threshold. trip_part can take one
-                among the four values:
-    
-                1. '__' ->(None, meaning NEITHER origin nor destination of any trip will lie 
-                within the mentioned threshold when trips are generated),
-    
-                2. 'o_' ->(origin, meaning ONLY origin of m trips will lie within the mentioned 
-                threshold when trips are generated),
-    
-                3. '_d' ->(destination),meaning ONLY destination of m trips will lie within the 
-                mentioned threshold when trips are generated)
-    
-                4. 'od' ->(origin and destination,meaning BOTH origin and destination of m trips
-                will lie within the mentioned threshold when trips are generated)
-    """
-    if label_data == None:            
-        label_data = {
-            "mode_confirm": ['walk', 'bike', 'transit'],
-            "purpose_confirm": ['work', 'home', 'school'],
-            "replaced_mode": ['drive']
-        }
-
-    trip =etmm.generate_mock_trips(
-            user_id="joe", 
-            trips=trips, 
-            origin=org, 
-            destination=dest,
-            trip_part=trip_part,
-            label_data=label_data, 
-            within_threshold=within_thr, 
-            threshold=threshold,  
-        )
-    return trip  
-    
\ No newline at end of file

From f5944ccef1140db347211c93b851b8864c0362ee Mon Sep 17 00:00:00 2001
From: $aTyam <satyam.saini@rutgers.edu>
Date: Tue, 12 Sep 2023 12:27:47 -0400
Subject: [PATCH 08/10] [TESTED] Explicit clustering method, Improved mock trip
 generation

`od_similarity.py`
1.  Explicitly passing 'origin', 'destination', 'origin-destination' for similarity check  in `similarity`

`similarity_metric.py`
2.  Passing the clustering_way parameter

`greedy_similarity_binning.py`
3.  Since this decision making is moved downstream to `similarity`, so removing it from here.

`modellingTestAssets.py`
4. Removing both 2 line wrappers (SetModelConfig, setTripConfig ) from this file since this was parametrised using sub-Test 2 commits back.

5. Removed CalDistanceTest. This was introduced to keep calDistance of test separate from the calDistance being used by the one being used by `greedySimilaritybinning`.  Unnecesary.

6.  Using ref. coordinates whenever provided to generate trip coordinates. If not, use randomly generated coordinates as reference points.

7. receiving and passing origin and destination ref. points.  in `generate_mock_trips'

`TestGreedySimilarityBinning.py`

8. removed wrappers for trip and model generation.

9. Using just single threshold for generating trips and for binning. Removed two thresholds.

`TestSimilarityMetric.py`

10. Removing the implicitness used in binning by passing this as a parameter.
---
 .../modelling/similarity/od_similarity.py     |  34 ++--
 .../modelling/similarity/similarity_metric.py |  12 +-
 .../trip_model/greedy_similarity_binning.py   |   9 +-
 .../TestGreedySimilarityBinning.py            | 109 ++++++++++---
 .../modellingTests/TestSimilarityMetric.py    |  26 ++--
 .../modellingTests/modellingTestAssets.py     | 145 +++++-------------
 6 files changed, 168 insertions(+), 167 deletions(-)

diff --git a/emission/analysis/modelling/similarity/od_similarity.py b/emission/analysis/modelling/similarity/od_similarity.py
index 9a6a49d0d..056c721a3 100644
--- a/emission/analysis/modelling/similarity/od_similarity.py
+++ b/emission/analysis/modelling/similarity/od_similarity.py
@@ -15,24 +15,28 @@ class OriginDestinationSimilarity(eamss.SimilarityMetric):
     def extract_features(self, trip: ecwc.Confirmedtrip) -> List[float]:
         return ctfe.od_features(trip)
 
-    def similarity(self, a: List[float], b: List[float]) -> List[float]:
+    def similarity(self, a: List[float], b: List[float], clustering_way='origin-destination') -> List[float]:
         """
-        a : a list of point features that can take either of two forms
-                    1. [point1_latitude,point1_longitude]  
-                    2. [point1_latitude,point1_longitude,point2_latitude,point2_longitude] 
+        a : a list of point features that takes the forms
+          [point1_longitude,point1_latitude,point2_longitude,point2_latitude] 
                     
-        b : a list of point features that can take either of two forms
-                    1. [point3_latitude,point3_longitude]  
-                    2. [point3_latitude,point3_longitude,point4_latitude,point4_longitude] 
-            
-            It'll always take the same form as parameter a.
-
+        b : a list of point features that takes the forms
+          [point1_longitude,point1_latitude,point2_longitude,point2_latitude] 
+        
+        clustering_way : takes one among 'origin', 'destination', 'origin-destination' as value.
+                         tells the part of the trip to be used for binning trips together if that 
+                         part lies within threshold.
+                                                  
         return: a list of size 1 ([distance between point1-point3]) if a and b take form 1
                 or of size 2 ([distance between point1-point3, distance between point2-point4])
                 if a and b take form 2.
         """
-        
-        point_dist = [ecc.calDistance(a[i:i+2], b[i:i+2]) 
-                      for i in range (0,len(a),2)] 
-        
-        return point_dist
\ No newline at end of file
+        origin_dist = ecc.calDistance(a[0:2], b[0:2])
+        destination_dist=ecc.calDistance(a[2:4], b[2:4])
+
+        if clustering_way == 'origin-destination':
+            return [origin_dist,destination_dist]
+        elif clustering_way == 'origin':
+            return [origin_dist]
+        else:
+            return [destination_dist]
\ No newline at end of file
diff --git a/emission/analysis/modelling/similarity/similarity_metric.py b/emission/analysis/modelling/similarity/similarity_metric.py
index 1b520318f..c009be9e9 100644
--- a/emission/analysis/modelling/similarity/similarity_metric.py
+++ b/emission/analysis/modelling/similarity/similarity_metric.py
@@ -17,26 +17,32 @@ def extract_features(self, trip: ecwc.Confirmedtrip) -> List[float]:
         pass
 
     @abstractmethod
-    def similarity(self, a: List[float], b: List[float]) -> List[float]:
+    def similarity(self, a: List[float], b: List[float], clustering_way = 'origin-destination') -> List[float]:
         """compares the features, producing their similarity
         as computed by this similarity metric
 
         :param a: features for a trip
         :param b: features for another trip
+        :param clustering_way : takes one among 'origin', 'destination', 'origin-destination' as value.
+                                tells the part of the trip to be used for binning trips together if that 
+                                part lies within a threshold.
         :return: for each feature, the similarity of these features
         """
         pass
 
-    def similar(self, a: List[float], b: List[float], thresh: float) -> bool:
+    def similar(self, a: List[float], b: List[float], thresh: float, clustering_way= 'origin-destination') -> bool:
         """compares the features, returning true if they are similar
         within some threshold
 
         :param a: features for a trip 
         :param b: features for another trip
         :param thresh: threshold for similarity
+        :param clustering_way : takes one among 'origin', 'destination', 'origin-destination' as value.
+                                tells the part of the trip to be used for binning trips together if that 
+                                part lies within a threshold.
         :return: true if the feature similarity is within some threshold
         """
-        similarity_values = self.similarity(a, b)
+        similarity_values = self.similarity(a, b, clustering_way)
         is_similar = all(sim <= thresh for sim in similarity_values)
 
         return is_similar
diff --git a/emission/analysis/modelling/trip_model/greedy_similarity_binning.py b/emission/analysis/modelling/trip_model/greedy_similarity_binning.py
index efcce4f02..226fdefb5 100644
--- a/emission/analysis/modelling/trip_model/greedy_similarity_binning.py
+++ b/emission/analysis/modelling/trip_model/greedy_similarity_binning.py
@@ -212,14 +212,7 @@ def _find_matching_bin_id(self, trip_features: List[float]) -> Optional[str]:
         :return: the id of a bin if a match was found, otherwise None
         """
         for bin_id, bin_record in self.bins.items():
-            if self.clusteringWay == 'origin':
-                start,end=0,2  #since first two features in trip_features are for origin
-            elif self.clusteringWay == 'destination':
-                start,end=2,4  #third and fourth values intrip_features are for destination
-            elif self.clusteringWay == 'origin-destination':
-                start,end=0,4  #when clusteromgWay is 'origin-destination',we pass all four features
-
-            matches_bin = all([self.metric.similar(trip_features[start:end], bin_sample[start:end], self.sim_thresh)
+            matches_bin = all([self.metric.similar(trip_features, bin_sample, self.sim_thresh,self.clusteringWay)
                 for bin_sample in bin_record['feature_rows']])
             if matches_bin:
                 return bin_id
diff --git a/emission/tests/modellingTests/TestGreedySimilarityBinning.py b/emission/tests/modellingTests/TestGreedySimilarityBinning.py
index 3e1cd78c2..937effc94 100644
--- a/emission/tests/modellingTests/TestGreedySimilarityBinning.py
+++ b/emission/tests/modellingTests/TestGreedySimilarityBinning.py
@@ -1,6 +1,7 @@
 import unittest
 import emission.analysis.modelling.trip_model.greedy_similarity_binning as eamtg
-import emission.tests.modellingTests.utilities as etmu
+import emission.tests.modellingTests.modellingTestAssets as etmm
+
 import logging
 
 
@@ -21,11 +22,29 @@ def testNoBinning(self):
         """
 
         # generate $n trips.
-        n = 20     
-
+        n = 20   
+        binning_threshold=500
         #this generates 20 trips one-by-one, where each trip's respective origin and destination 
         # points are more than 500m away.
-        trips = [ etmu.setTripConfig(1, (i, i), (i+1, i+1), 'od', 1)[0] for i in range(n)]    
+ 
+        
+        label_data = {
+            "mode_confirm": ['walk', 'bike', 'transit'],
+            "purpose_confirm": ['work', 'home', 'school'],
+            "replaced_mode": ['drive']
+        }         
+
+
+        trips =etmm.generate_mock_trips(
+                user_id="joe", 
+                trips=n, 
+                trip_part='__',
+                label_data=label_data, 
+                within_threshold=1, 
+                threshold=binning_threshold,
+                origin=(0,0),
+                destination=(1,1)
+            )
 
         # parameters passed for testing. A list, where each element is one way of clustering
         clustering_ways_paramters= ["origin","destination","origin-destination"]
@@ -34,7 +53,14 @@ def testNoBinning(self):
         for cw in clustering_ways_paramters:
             with self.subTest(clustering_way=cw):
                 #initialise the binning model and fit with previously generated trips
-                model = etmu.setModelConfig("od_similarity",  500,  False, cw, False)
+                model_config = {
+                                    "metric": "od_similarity",
+                                    "similarity_threshold_meters": binning_threshold,  # meters,
+                                    "apply_cutoff": False,
+                                    "clustering_way": cw,  
+                                    "incremental_evaluation": False
+                                }
+                model= eamtg.GreedySimilarityBinning(model_config)
                 model.fit(trips)
                 #check each bins for no of trips
                 no_large_bin = all(map(lambda b: len(b['feature_rows']) == 1, model.bins.values()))
@@ -53,6 +79,12 @@ def testBinning(self):
         # within a radius that should have them binned.
         n = 20
         m = 5
+        binning_threshold=500
+        label_data = {
+            "mode_confirm": ['walk', 'bike', 'transit'],
+            "purpose_confirm": ['work', 'home', 'school'],
+            "replaced_mode": ['drive']
+        }
 
         # parameters passed for testing. A list, where each element of this list takes the form 
         # [trip part to be sampled within mentioned threshold , clustering way used to check similarity]
@@ -60,10 +92,25 @@ def testBinning(self):
         for tp,cw in parameters:
             with self.subTest(trip_part=tp,clustering_way=cw):
                 #generate random trips using utilities
-                trips = etmu.setTripConfig(trips=n, org=(0, 0), dest=(1, 1),
-                                trip_part=tp, within_thr=m)
+                trips =etmm.generate_mock_trips(
+                    user_id="joe", 
+                    trips=n, 
+                    trip_part=tp,
+                    label_data=label_data, 
+                    within_threshold=m, 
+                    threshold=binning_threshold,
+                    origin=(0,0),
+                    destination=(1,1)
+                )
                 #initialise the binning model and fit with previously generated trips
-                model = etmu.setModelConfig("od_similarity",  500,  False, cw, False)
+                model_config = {
+                            "metric": "od_similarity" ,
+                            "similarity_threshold_meters": binning_threshold,  # meters,
+                            "apply_cutoff": False,
+                            "clustering_way": cw,  
+                            "incremental_evaluation": False
+                 }
+                model = eamtg.GreedySimilarityBinning(model_config)
                 model.fit(trips)
                 #check each bins for no of trips
                 at_least_one_large_bin = any(map(lambda b: len(b['feature_rows']) == m, model.bins.values()))
@@ -81,11 +128,24 @@ def testPrediction(self):
         }
 
         n = 6
-        trips = etmu.setTripConfig(trips=n, org=(0, 0), dest=(1, 1),
-                                   trip_part='od', label_data=label_data,                                   
-        )
-        model = etmu.setModelConfig("od_similarity",  500,  False, "origin-destination", False)
-        
+        trips =etmm.generate_mock_trips(
+                user_id="joe", 
+                trips=n, 
+                trip_part='od',
+                label_data=label_data, 
+                within_threshold=n, 
+                threshold=500,
+                origin=(0,0),
+                destination=(1,1)
+            )
+        model_config = {
+                    "metric": "od_similarity",
+                    "similarity_threshold_meters": 500,  # meters,
+                    "apply_cutoff": False,
+                    "clustering_way": 'origin_destination',  
+                    "incremental_evaluation": False
+                                }
+        model= eamtg.GreedySimilarityBinning(model_config)
         train = trips[0:5]
         test = trips[5]
 
@@ -105,16 +165,25 @@ def testNoPrediction(self):
             "replaced_mode": ['crabwalking']
         }
         n = 5
-
-        train = etmu.setTripConfig(trips=n, org=(39.7645187, -104.9951944), # Denver, CO
-                                   dest=(39.7435206, -105.2369292),  # Golden, CO
-                                   trip_part='od', label_data=label_data                                 
+        binning_threshold = 500
+        train = etmm.generate_mock_trips( user_id="joe",trips=n, origin=(39.7645187, -104.9951944), # Denver, CO
+                                   destination=(39.7435206, -105.2369292),  # Golden, CO
+                                   trip_part='od', label_data=label_data,
+                                   threshold=binning_threshold, within_threshold=n
         )
-        test = etmu.setTripConfig(trips=n, org=(61.1042262, -150.5611644), # Denver, CO
-                                   dest=(62.2721466, -150.3233046),  # Golden, CO
+        test = etmm.generate_mock_trips( user_id="amanda",trips=n, origin=(61.1042262, -150.5611644), # Denver, CO
+                                   destination=(62.2721466, -150.3233046),  # Golden, CO
                                    trip_part='od', label_data=label_data,                                   
+                                    threshold=binning_threshold, within_threshold=n
         )
-        model = etmu.setModelConfig("od_similarity",  500,  False, "origin-destination", False)
+        model_config = {
+                    "metric": "od_similarity",
+                    "similarity_threshold_meters": 500,  # meters,
+                    "apply_cutoff": False,
+                    "clustering_way": 'origin_destination',  
+                    "incremental_evaluation": False
+                                }
+        model= eamtg.GreedySimilarityBinning(model_config)
         model.fit(train)
         results, n = model.predict(test[0])
 
diff --git a/emission/tests/modellingTests/TestSimilarityMetric.py b/emission/tests/modellingTests/TestSimilarityMetric.py
index cbe500b23..fe038be4e 100644
--- a/emission/tests/modellingTests/TestSimilarityMetric.py
+++ b/emission/tests/modellingTests/TestSimilarityMetric.py
@@ -1,11 +1,9 @@
 import unittest
 import emission.analysis.modelling.similarity.od_similarity as eamso
-import emission.tests.modellingTests.utilities as etmu
-
+import emission.tests.modellingTests.modellingTestAssets as etmm
 class TestSimilarityMetric(unittest.TestCase):
 
     def testODsAreSimilar(self):
-        generate_points_thresh = 0.001  # approx. 111 meters
         similarity_threshold = 500  # in meters
         metric = eamso.OriginDestinationSimilarity()
 
@@ -16,17 +14,17 @@ def testODsAreSimilar(self):
         #   a.origin, we pass first two values of this list,i.e. from 0 till before 2 index
         #   b.destination, we pas last two values of this list,i.e. from 2 till before 4 index
         #   c.origin-destination, we pass the entire list , i.e. from 0 till before 4 index
-        parameters= [["od",(0,4)],["_d",(2,4)],["o_",(0,2)]]
+        parameters= [["o_",'origin'],["_d",'destination'],["od",'origin-destination']]
 
-        for tp,(coord_start,coord_end) in parameters:
+        for tp,cw in parameters:
             with self.subTest(trip_part=tp):
                 #generate 2 trips with parameter values
-                trips = etmu.setTripConfig(2, [0, 0], [1, 1], trip_part=tp,threshold=generate_points_thresh) 
+                trips = etmm.generate_mock_trips('joe',2, threshold=similarity_threshold,origin=[0, 0], destination=[1, 1], within_threshold=2,trip_part=tp) 
                 # depending on the parametrs, extract the relevant coordinates
-                trip0_coords = metric.extract_features(trips[0])[coord_start:coord_end]
-                trip1_coords = metric.extract_features(trips[1])[coord_start:coord_end]
+                trip0_coords = metric.extract_features(trips[0])
+                trip1_coords = metric.extract_features(trips[1])
                 #check for similarity using relevant coordinates
-                similarOD = metric.similar(trip0_coords,trip1_coords, similarity_threshold)
+                similarOD = metric.similar(trip0_coords,trip1_coords, similarity_threshold,cw)
                 # Since both origin and destination poitns lie within threshold limits,they should be similar
                 # when we check by just origin or just destination or both origin-and-destination
                 self.assertTrue(similarOD)
@@ -42,17 +40,17 @@ def testODsAreNotSimilar(self):
         #   a.origin, we pass first two values of this list,i.e. from 0 till before 2 index
         #   b.destination, we pas last two values of this list,i.e. from 2 till before 4 index
         #   c.origin-destination, we pass the entire list , i.e. from 0 till before 4 index
-        parameters= [(0,2),(2,4),[0,4]]
+        parameters= ['origin','destination','origin-destination']
         n=2
         #this generates 2 trips one-by-one, where each trip's respective origin and destination 
         # points are more than 500m away.
-        trips = [etmu.setTripConfig(1, (i, i), (i+1, i+1), 'od', 1)[0] for i in range(n)]    
+        trips = [ etmm.generate_mock_trips('joe',2, origin=[i, i], destination=[i+1, i+1], trip_part= 'od', within_threshold=1,threshold=500)[0] for i in range(n)]    
         trip0_coord = metric.extract_features(trips[0])
         trip1_coord = metric.extract_features(trips[1])
 
-        for (coord_start,coord_end) in parameters:
-            with self.subTest(coordinates=(coord_start,coord_end)):      
-                IsSimilar = metric.similar(trip0_coord[coord_start:coord_end],trip1_coord[coord_start:coord_end], similarity_threshold)
+        for cw in parameters:
+            with self.subTest(clustering_way=cw):      
+                IsSimilar = metric.similar(trip0_coord,trip1_coord, similarity_threshold,cw)
                 # Two trips with neither origin nor destination coordinates within the threshold
                 # must not be similar by any configuration of similarity testing.
                 self.assertFalse(IsSimilar)
diff --git a/emission/tests/modellingTests/modellingTestAssets.py b/emission/tests/modellingTests/modellingTestAssets.py
index f98736048..9ad662fe3 100644
--- a/emission/tests/modellingTests/modellingTestAssets.py
+++ b/emission/tests/modellingTests/modellingTestAssets.py
@@ -2,122 +2,39 @@
 from typing import Optional, Tuple, List, Dict
 from uuid import UUID
 import emission.analysis.modelling.trip_model.greedy_similarity_binning as eamtg
-import emission.tests.modellingTests.modellingTestAssets as etmm
 import emission.core.wrapper.confirmedtrip as ecwc
-
+import emission.core.common as ecc
 import emission.core.wrapper.entry as ecwe
 import time 
 import math
 
-def setModelConfig(metric,threshold,cutoff,clustering_way,incrementalevaluation):
-    """
-    TODO: Write about each parameter to the function
-    pass in a test configuration to the binning algorithm.
-    
-    clustering_way : Part of the trip used for checking pairwise proximity.
-                        Can take one of the three values:
-                        
-                        1. 'origin' -> using origin of the trip to check if 2 points
-                                        lie within the mentioned similarity_threshold_meters
-                        2. 'destination' -> using destination of the trip to check if 2 points
-                                            lie within the mentioned similarity_threshold_meters
-                        3. 'origin-destination' -> both origin and destination of the trip to check 
-                                                if 2 points lie within the mentioned 
-                                                    similarity_threshold_meters
-    """        
-    model_config = {
-        "metric": metric,
-        "similarity_threshold_meters": threshold,  # meters,
-        "apply_cutoff": cutoff,
-        "clustering_way": clustering_way,  
-        "incremental_evaluation": incrementalevaluation
-    }
-
-    return eamtg.GreedySimilarityBinning(model_config)
-
 def generate_random_point():
-    """Generate a completetly random point valid WGS84 latitiude and longtidude"""
+    """Generate a completetly random point valid WGS84 latitiude and longtidude.   
+    CAUTION : In order to save trips, GeoJSON requires points in [lon,lat] format"""    
     lat=random.uniform(-90,90)
     lon=random.uniform(-180,180)
-    return [lat,lon]
+    return [lon,lat]
 
 def generate_nearby_random_points(ref_coords,threshold):
     """
     Generate valid WGS84 latitiude and longtidude in threshold(m) proximity to
-    ref coordinates
+    ref coordinates.
     """
-
+    #convert given threshold in m to approx WGS84 coord dist.
     thresholdInWGS84 = threshold* (0.000001/0.11)
+    
+    #generate a random coordinate in threshold's limit around the ref points. 
     dx=random.uniform(-thresholdInWGS84/2,thresholdInWGS84/2)
     dy=random.uniform(-thresholdInWGS84/2,thresholdInWGS84/2)
-    return [ref_coords[0] +dx , ref_coords[1] +dy]
-
-def calDistanceTest(point1, point2, coordinates=False):
-    """haversine distance
-
-    :param point1: a coordinate in degrees WGS84
-    :param point2: another coordinate in degrees WGS84
-    :param coordinates: if false, expect a list of coordinates, defaults to False
-    :return: distance approximately in meters
-    """
-    earthRadius = 6371000  # meters
-    if coordinates:
-        dLat = math.radians(point1.lat-point2.lat)
-        dLon = math.radians(point1.lon-point2.lon)
-        lat1 = math.radians(point1.lat)
-        lat2 = math.radians(point2.lat)
-    else:
-        dLat = math.radians(point1[1]-point2[1])
-        dLon = math.radians(point1[0]-point2[0])
-        lat1 = math.radians(point1[1])
-        lat2 = math.radians(point2[1])
-
 
-    a = (math.sin(dLat/2) ** 2) + ((math.sin(dLon/2) ** 2) * math.cos(lat1) * math.cos(lat2))
-    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
-    d = earthRadius * c
-
-    return d
-
-def setTripConfig(trips,trip_part,threshold,within_thr=None,label_data=None):
-    """
-    TODO: Write about each parameter to the function
-                trip_part: when mock trips are generated, coordinates of this part of 
-                m trips will be within the threshold. trip_part can take one
-                among the four values:
-    
-                1. '__' ->(None, meaning NEITHER origin nor destination of any trip will lie 
-                within the mentioned threshold when trips are generated),
-    
-                2. 'o_' ->(origin, meaning ONLY origin of m trips will lie within the mentioned 
-                threshold when trips are generated),
-    
-                3. '_d' ->(destination),meaning ONLY destination of m trips will lie within the 
-                mentioned threshold when trips are generated)
-    
-                4. 'od' ->(origin and destination,meaning BOTH origin and destination of m trips
-                will lie within the mentioned threshold when trips are generated)
-    """
-    if label_data == None:            
-        label_data = {
-            "mode_confirm": ['walk', 'bike', 'transit'],
-            "purpose_confirm": ['work', 'home', 'school'],
-            "replaced_mode": ['drive']
-        }
-
-    trip =etmm.generate_mock_trips(
-            user_id="joe", 
-            trips=trips, 
-            trip_part=trip_part,
-            label_data=label_data, 
-            within_threshold=within_thr, 
-            threshold=threshold,  
-        )
-    return trip  
+    #This basically gives a way to sample a point from within a square of length thresholdInWGS84 
+    # around the ref. point.  
+    return [ref_coords[0] +dx , ref_coords[1] +dy]
 
 def generate_trip_coordinates(
-    points_list: list[float], 
-    within_threshold: bool,
+    points_list: list[float],
+    ref_coords, 
+    InsideThreshold: bool,
     threshold_meters: float, 
     ) -> Tuple[float, float]:
     """generates trip coordinate data to use when mocking a set of trip data.i
@@ -132,12 +49,22 @@ def generate_trip_coordinates(
     :return: generated coordinate pairs sampled in a 
              circle from some coordinates up to some threshold
     """
-
-    if within_threshold and points_list:
-        new_point = generate_nearby_random_points(random.choice(points_list), threshold_meters)    
-    else:
-        new_point = generate_random_point()
-        while not all(calDistanceTest(new_point, pt) > threshold_meters for pt in points_list):
+    # if the point is to be generated within a threshold and it's not the first point
+    if InsideThreshold and points_list:
+        # if  no ref. coordinates are provided, use any previously accepted point as ref.
+        if ref_coords == None:
+            ref_coords=random.choice(points_list)
+        # generate a new point in threshold proximity to ref. point
+        new_point = generate_nearby_random_points(ref_coords, threshold_meters)    
+    else: # If point need not be in the threshold  OR if its the first point we are generating, then
+         #Generate random coordinates if no reference coords were provided  
+        if ref_coords == None:            
+            new_point = generate_random_point()
+        else:
+         # if ref coordinate are provided, use them as the starting point and iterate till required
+         # condition is satisfied
+            new_point = ref_coords
+        while not all(ecc.calDistance(new_point, pt) > threshold_meters for pt in points_list):
             new_point = generate_random_point()
     return new_point
 
@@ -241,6 +168,8 @@ def generate_mock_trips(
     trips,
     threshold,
     trip_part='od',
+    origin=None,
+    destination=None,
     label_data = None, 
     within_threshold = None,
     start_ts: None = None,
@@ -276,6 +205,8 @@ def generate_mock_trips(
                      mentioned threshold when trips are generated)        
                     4. 'od' ->(origin and destination,meaning BOTH origin and destination of m trips
                      will lie within the mentioned threshold when trips are generated)
+    :param origin : reference point for trip origin generally
+    :param destination : reference point for trip origin generally
     :param label_data: dictionary of label data, see above, defaults to None
     :param within_threshold: number of trips that should fall within the provided
            distance threshold in m
@@ -292,11 +223,11 @@ def generate_mock_trips(
     origin_points=[]
     destination_points=[]    
 
-    # generate trip number of points based on which among 'o' ,'d' or 'od' should be in threshold 
-    # proximity to each other. 
+    # generate 'trip' number of points based on which among 'o' (Origin) ,'d' (Destination) or
+    # 'od' (Origin-Destination) or '__' (None) should be in threshold proximity to each other. 
     for within in trips_within_threshold:
-        origin_points.append(generate_trip_coordinates(origin_points, (trip_part[0] == 'o' and within), threshold))
-        destination_points.append(generate_trip_coordinates(destination_points, (trip_part[1] == 'd' and within), threshold))
+        origin_points.append(generate_trip_coordinates(origin_points, origin, InsideThreshold= (trip_part[0] == 'o' and within), threshold_meters= threshold))
+        destination_points.append(generate_trip_coordinates(destination_points, destination, InsideThreshold=(trip_part[1] == 'd' and within), threshold_meters=threshold))
 
     for o,d in zip(origin_points,destination_points):    
         labels = {} if label_data is None or random.random() > has_label_p \

From 1c0526bb43d07807c9297dd2584ee926a27bf62b Mon Sep 17 00:00:00 2001
From: $aTyam <satyam.saini@rutgers.edu>
Date: Wed, 13 Sep 2023 01:41:33 -0400
Subject: [PATCH 09/10] [TESTED]Improved random point generation logic

Generating Random points from  circle ( rather than Square) around ref_points.

Better Explanations for  random point generation.

Whitespace fixes.
---
 .../TestGreedySimilarityBinning.py            | 24 +++++++--------
 .../modellingTests/modellingTestAssets.py     | 29 ++++++++++++-------
 2 files changed, 31 insertions(+), 22 deletions(-)

diff --git a/emission/tests/modellingTests/TestGreedySimilarityBinning.py b/emission/tests/modellingTests/TestGreedySimilarityBinning.py
index 937effc94..b96147706 100644
--- a/emission/tests/modellingTests/TestGreedySimilarityBinning.py
+++ b/emission/tests/modellingTests/TestGreedySimilarityBinning.py
@@ -139,12 +139,12 @@ def testPrediction(self):
                 destination=(1,1)
             )
         model_config = {
-                    "metric": "od_similarity",
-                    "similarity_threshold_meters": 500,  # meters,
-                    "apply_cutoff": False,
-                    "clustering_way": 'origin_destination',  
-                    "incremental_evaluation": False
-                                }
+            "metric": "od_similarity",
+            "similarity_threshold_meters": 500, # meters,
+            "apply_cutoff": False,
+            "clustering_way": 'origin_destination',
+            "incremental_evaluation": False
+        }
         model= eamtg.GreedySimilarityBinning(model_config)
         train = trips[0:5]
         test = trips[5]
@@ -177,12 +177,12 @@ def testNoPrediction(self):
                                     threshold=binning_threshold, within_threshold=n
         )
         model_config = {
-                    "metric": "od_similarity",
-                    "similarity_threshold_meters": 500,  # meters,
-                    "apply_cutoff": False,
-                    "clustering_way": 'origin_destination',  
-                    "incremental_evaluation": False
-                                }
+            "metric": "od_similarity",
+            "similarity_threshold_meters": 500,  # meters,
+            "apply_cutoff": False,
+            "clustering_way": 'origin_destination',  
+            "incremental_evaluation": False
+        }
         model= eamtg.GreedySimilarityBinning(model_config)
         model.fit(train)
         results, n = model.predict(test[0])
diff --git a/emission/tests/modellingTests/modellingTestAssets.py b/emission/tests/modellingTests/modellingTestAssets.py
index 9ad662fe3..2e2fe8361 100644
--- a/emission/tests/modellingTests/modellingTestAssets.py
+++ b/emission/tests/modellingTests/modellingTestAssets.py
@@ -23,18 +23,26 @@ def generate_nearby_random_points(ref_coords,threshold):
     #convert given threshold in m to approx WGS84 coord dist.
     thresholdInWGS84 = threshold* (0.000001/0.11)
     
-    #generate a random coordinate in threshold's limit around the ref points. 
-    dx=random.uniform(-thresholdInWGS84/2,thresholdInWGS84/2)
-    dy=random.uniform(-thresholdInWGS84/2,thresholdInWGS84/2)
+    #generate a random coordinate in threshold's limit around the ref points. OR we 
 
+    # for eg, ref point is 0,0 and threshold is  100m , so we generate a radius from 0 to 50, say 34 
+    # in this example. A random radius is also generted from 0 to 360,say 0. We then take 34 step along x axis direction 
+    # till radius length to get our new point, (34,0). When this function is called the next time to generate a point
+    #that has to be binned with previous one, we again generate r and theta , say 24 , 180 this time. 
+    # Now this new point is at (-24,0). Both these points are within threshold (100 in this case)limit and therefore will 
+    #be binned together.
+    radius=random.uniform(0,thresholdInWGS84/2)
+    theta=random.uniform(0,2*math.pi)
+    dx = radius * math.cos(theta)
+    dy = radius * math.sin (theta)
     #This basically gives a way to sample a point from within a square of length thresholdInWGS84 
     # around the ref. point.  
-    return [ref_coords[0] +dx , ref_coords[1] +dy]
+    return [ref_coords[0] + dy , ref_coords[1] + dx]
 
 def generate_trip_coordinates(
     points_list: list[float],
     ref_coords, 
-    InsideThreshold: bool,
+    insideThreshold: bool,
     threshold_meters: float, 
     ) -> Tuple[float, float]:
     """generates trip coordinate data to use when mocking a set of trip data.i
@@ -50,7 +58,7 @@ def generate_trip_coordinates(
              circle from some coordinates up to some threshold
     """
     # if the point is to be generated within a threshold and it's not the first point
-    if InsideThreshold and points_list:
+    if insideThreshold and points_list:
         # if  no ref. coordinates are provided, use any previously accepted point as ref.
         if ref_coords == None:
             ref_coords=random.choice(points_list)
@@ -61,9 +69,10 @@ def generate_trip_coordinates(
         if ref_coords == None:            
             new_point = generate_random_point()
         else:
-         # if ref coordinate are provided, use them as the starting point and iterate till required
-         # condition is satisfied
+         # if ref coordinate are provided, use them as the startisng point. 
             new_point = ref_coords
+        # If the newly generated new_point ( be it when ref_coords given or not given) is not more 
+        # than threshold_meters away from all the previously accepted points, keep generating new_point        # 
         while not all(ecc.calDistance(new_point, pt) > threshold_meters for pt in points_list):
             new_point = generate_random_point()
     return new_point
@@ -226,8 +235,8 @@ def generate_mock_trips(
     # generate 'trip' number of points based on which among 'o' (Origin) ,'d' (Destination) or
     # 'od' (Origin-Destination) or '__' (None) should be in threshold proximity to each other. 
     for within in trips_within_threshold:
-        origin_points.append(generate_trip_coordinates(origin_points, origin, InsideThreshold= (trip_part[0] == 'o' and within), threshold_meters= threshold))
-        destination_points.append(generate_trip_coordinates(destination_points, destination, InsideThreshold=(trip_part[1] == 'd' and within), threshold_meters=threshold))
+        origin_points.append(generate_trip_coordinates(origin_points, origin, insideThreshold= (trip_part[0] == 'o' and within), threshold_meters= threshold))
+        destination_points.append(generate_trip_coordinates(destination_points, destination, insideThreshold=(trip_part[1] == 'd' and within), threshold_meters=threshold))
 
     for o,d in zip(origin_points,destination_points):    
         labels = {} if label_data is None or random.random() > has_label_p \

From 21305df9ce756a6e032064ca64a24e3185cd8b81 Mon Sep 17 00:00:00 2001
From: $aTyam <satyam.saini@rutgers.edu>
Date: Thu, 14 Sep 2023 00:15:30 -0400
Subject: [PATCH 10/10] Minor fixes

Comments and variable names fixed
---
 emission/tests/modellingTests/TestGreedySimilarityBinning.py | 4 ++--
 emission/tests/modellingTests/modellingTestAssets.py         | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/emission/tests/modellingTests/TestGreedySimilarityBinning.py b/emission/tests/modellingTests/TestGreedySimilarityBinning.py
index b96147706..31b3261ae 100644
--- a/emission/tests/modellingTests/TestGreedySimilarityBinning.py
+++ b/emission/tests/modellingTests/TestGreedySimilarityBinning.py
@@ -113,9 +113,9 @@ def testBinning(self):
                 model = eamtg.GreedySimilarityBinning(model_config)
                 model.fit(trips)
                 #check each bins for no of trips
-                at_least_one_large_bin = any(map(lambda b: len(b['feature_rows']) == m, model.bins.values()))
+                one_large_bin = any(map(lambda b: len(b['feature_rows']) == m, model.bins.values()))
                 #Since 5 trips were sampled within the threshold, there should be one bin with 5 trips
-                self.assertTrue(at_least_one_large_bin, "at least one bin should have at least 5 features in it")
+                self.assertTrue(one_large_bin, "one bin should have 5 features in it")
 
     def testPrediction(self):
         """
diff --git a/emission/tests/modellingTests/modellingTestAssets.py b/emission/tests/modellingTests/modellingTestAssets.py
index 2e2fe8361..252b2ad34 100644
--- a/emission/tests/modellingTests/modellingTestAssets.py
+++ b/emission/tests/modellingTests/modellingTestAssets.py
@@ -35,7 +35,7 @@ def generate_nearby_random_points(ref_coords,threshold):
     theta=random.uniform(0,2*math.pi)
     dx = radius * math.cos(theta)
     dy = radius * math.sin (theta)
-    #This basically gives a way to sample a point from within a square of length thresholdInWGS84 
+    #This basically gives a way to sample a point from within a circle of radius thresholdInWGS84/2 
     # around the ref. point.  
     return [ref_coords[0] + dy , ref_coords[1] + dx]