Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Moving dependence from custom branch's tour_model to master's trip_model #933

Merged
merged 10 commits into from
Sep 14, 2023
23 changes: 20 additions & 3 deletions emission/analysis/modelling/similarity/od_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,23 @@ def extract_features(self, trip: ecwc.Confirmedtrip) -> List[float]:
return ctfe.od_features(trip)

def similarity(self, a: List[float], b: List[float]) -> List[float]:
o_dist = ecc.calDistance([a[0], a[1]], [b[0], b[1]])
d_dist = ecc.calDistance([a[2], a[3]], [b[2], b[3]])
return [o_dist, d_dist]
"""
a : a list of point features that can take either of two forms
1. [point1_latitude,point1_longitude]
2. [point1_latitude,point1_longitude,point2_latitude,point2_longitude]

b : a list of point features that can take either of two forms
1. [point3_latitude,point3_longitude]
2. [point3_latitude,point3_longitude,point4_latitude,point4_longitude]

It'll always take the same form as parameter a.

return: a list of size 1 ([distance between point1-point3]) if a and b take form 1
or of size 2 ([distance between point1-point3, distance between point2-point4])
if a and b take form 2.
"""

shankari marked this conversation as resolved.
Show resolved Hide resolved
point_dist = [ecc.calDistance(a[i:i+2], b[i:i+2])
for i in range (0,len(a),2)]

return point_dist
5 changes: 3 additions & 2 deletions emission/analysis/modelling/similarity/similarity_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,12 @@ def similar(self, a: List[float], b: List[float], thresh: float) -> bool:
"""compares the features, returning true if they are similar
within some threshold

:param a: features for a trip
:param a: features for a trip
:param b: features for another trip
:param thresh: threshold for similarity
:return: true if the feature similarity is within some threshold
"""
similarity_values = self.similarity(a, b)
shankari marked this conversation as resolved.
Show resolved Hide resolved
is_similar = all(map(lambda sim: sim <= thresh, similarity_values))
shankari marked this conversation as resolved.
Show resolved Hide resolved
is_similar = all(sim <= thresh for sim in similarity_values)
shankari marked this conversation as resolved.
Show resolved Hide resolved

return is_similar
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,11 @@ class label to apply:
self.sim_thresh = config['similarity_threshold_meters']
self.apply_cutoff = config['apply_cutoff']
self.is_incremental = config['incremental_evaluation']
if config.get('clustering_way') is None:
shankari marked this conversation as resolved.
Show resolved Hide resolved
self.clusteringWay='origin-destination' # previous default
else:
self.clusteringWay= config['clustering_way']
self.tripLabels=[]

self.bins: Dict[str, Dict] = {}

Expand Down Expand Up @@ -184,9 +189,11 @@ def _assign_bins(self, trips: List[ecwc.Confirmedtrip]):
logging.debug(f"adding trip to bin {bin_id} with features {trip_features}")
self.bins[bin_id]['feature_rows'].append(trip_features)
self.bins[bin_id]['labels'].append(trip_labels)
self.tripLabels.append(bin_id)
else:
# create new bin
new_bin_id = str(len(self.bins))
self.tripLabels.append(new_bin_id)
new_bin_record = {
'feature_rows': [trip_features],
'labels': [trip_labels],
Expand All @@ -200,14 +207,22 @@ def _find_matching_bin_id(self, trip_features: List[float]) -> Optional[str]:
finds an existing bin where all bin features are "similar" to the incoming
trip features.

:param trip_features: feature row for the incoming trip
:param trip_features: feature row for the incoming trip.
takes the form [orig_lat, orig_lon, dest_lat, dest_lon]
:return: the id of a bin if a match was found, otherwise None
"""
for bin_id, bin_record in self.bins.items():
matches_bin = all([self.metric.similar(trip_features, bin_sample, self.sim_thresh)
for bin_sample in bin_record['feature_rows']])
if matches_bin:
return bin_id
if self.clusteringWay == 'origin':
shankari marked this conversation as resolved.
Show resolved Hide resolved
start,end=0,2 #since first two features in trip_features are for origin
elif self.clusteringWay == 'destination':
start,end=2,4 #third and fourth values intrip_features are for destination
elif self.clusteringWay == 'origin-destination':
start,end=0,4 #when clusteromgWay is 'origin-destination',we pass all four features

matches_bin = all([self.metric.similar(trip_features[start:end], bin_sample[start:end], self.sim_thresh)
for bin_sample in bin_record['feature_rows']])
if matches_bin:
return bin_id
return None

def _nearest_bin(self, trip: ecwc.Confirmedtrip) -> Tuple[Optional[int], Optional[Dict]]:
Expand Down
Loading