Skip to content

Commit

Permalink
Changed count entries function_name and signature and modified test c…
Browse files Browse the repository at this point in the history
…ases

- Updated the function name and the function signature to include other common parameters like time_query, geo_query along with key and extra_queries to maintain consistency with existing functions.

- Modified test cases to include dataset samples with more specific keys.

- Provided code comments to clarify validation process for inputs and outputs of test datasets.
  • Loading branch information
Mahadik, Mukul Chandrakant authored and Mahadik, Mukul Chandrakant committed Sep 1, 2023
1 parent c4ea9a8 commit e773c34
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 14 deletions.
11 changes: 7 additions & 4 deletions emission/storage/timeseries/builtin_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,13 +440,16 @@ def update_data(user_id, key, obj_id, data):
logging.debug("updating entry %s into timeseries" % new_entry)
edb.save(ts.get_timeseries_db(key), new_entry)

def count_data(self, key, extra_query_list):
def find_entries_count(self, key, time_query = None, geo_query = None, extra_query_list = None):
"""
Returns the total number of documents for the specific key referring to a timeseries db.
Additional keys can be passed as an optional list for filtering data.
:param key: the metadata key we are querying for. Only supports one key for now.
:param time_query: the time range in which to search the stream
:param geo_query: the query for a geographical area
:param extra_query_list: any additional queries to filter out data
"""
logging.debug("count_data timeseries called")
created_query = self._get_query(key_list=[key], extra_query_list=extra_query_list)
logging.debug("builtin_timeseries.find_entries_count() called")
created_query = self._get_query([key], time_query, geo_query, extra_query_list)
result_dataset = self.get_timeseries_db(key)
total_entries = result_dataset.count_documents(created_query)
return total_entries
Expand Down
32 changes: 22 additions & 10 deletions emission/tests/storageTests/TestTimeSeries.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,18 +81,30 @@ def testExtraQueries(self):
with self.assertRaises(AttributeError):
list(ts.find_entries(time_query=tq, extra_query_list=[ignored_phones]))

def testCountData(self):
def testFindEntriesCount(self):
'''
Test 1 : Specific key with empty extra_queries
key = 'background/location', extra_query_list = []
Results in empty query = {}, which matches all documents for a user for that key.
Hence should return total count of all documents matching that key.
Testing this with sample dataset: "shankari_2015-aug-27"
Test: Specific keys with other parameters not passed values.
Input: For each dataset: ["background/location", "background/filtered_location]
- Testing this with sample dataset: "shankari_2015-aug-21", "shankari_2015-aug-27"
Output: Aug_21: [738, 508], Aug_27: [555, 327]
- Actual output just returns a single number for count of entries.
- Validated using grep count of occurrences for keys: 1) "background/location" 2) "background/filtered_location"
- $ grep -c <key> <dataset>.json
'''
ts = esta.TimeSeries.get_time_series(self.testUUID)
total_count = ts.count_data("background/location",[])
print(total_count)
self.assertEqual(total_count, 555)
# Fetching the two test datasets defined in setup()
ts1_aug_21 = esta.TimeSeries.get_time_series(self.testUUID1)
ts2_aug_27 = esta.TimeSeries.get_time_series(self.testUUID)

# Counts for each of the two keys in each dataset
count_ts1 = [ts1_aug_21.find_entries_count(key="background/location"), ts1_aug_21.find_entries_count(key="background/filtered_location")]
count_ts2 = [ts2_aug_27.find_entries_count(key="background/location"), ts2_aug_27.find_entries_count(key="background/filtered_location")]

print("\nEntry counts for location, filtered_location on {} = {}".format("Aug_21", count_ts1))
print("Entry counts for location, filtered_location on {} = {}".format("Aug_27", count_ts2))

self.assertEqual(count_ts1, [738, 508])
self.assertEqual(count_ts2, [555, 327])

print("Assert Test for Count Data successful!")


Expand Down

0 comments on commit e773c34

Please sign in to comment.