From c6672bd524f44bb2ee93c1c625bf1657dde87d57 Mon Sep 17 00:00:00 2001 From: Paul Leclercq Date: Tue, 30 Jul 2024 16:24:38 +0200 Subject: [PATCH 1/2] Log: parsed data (#216) * wip: adding log * chores: downgrading numpy * lock * fix: use apply * fix: apply * test * numpy 2.0.1 * log: more logs --- .../data_processing/mediatree/api_import.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/quotaclimat/data_processing/mediatree/api_import.py b/quotaclimat/data_processing/mediatree/api_import.py index 893810b7..647347e1 100644 --- a/quotaclimat/data_processing/mediatree/api_import.py +++ b/quotaclimat/data_processing/mediatree/api_import.py @@ -115,8 +115,6 @@ async def get_and_save_api_data(exit_event): save_to_pg(df, keywords_table, conn) else: logging.info("Nothing to save to Postgresql") - - del df # memory leak test for long running jobs gc.collect() except Exception as err: logging.error(f"continuing loop but met error : {err}") @@ -239,6 +237,7 @@ def parse_reponse_subtitle(response_sub, channel = None, channel_program = "", c new_df['timestamp'] = new_df.apply(lambda x: pd.to_datetime(x['start'], unit='s', utc=True), axis=1) logging.debug("timestamp was set") + logging.debug("droping start column") new_df.drop('start', axis=1, inplace=True) logging.debug("renaming columns") new_df.rename(columns={'channel.name':'channel_name', @@ -249,27 +248,20 @@ def parse_reponse_subtitle(response_sub, channel = None, channel_program = "", c inplace=True ) - logging.debug(f"setting program {channel_program} type { type(channel_program)}") + logging.debug(f"setting program {channel_program}") # weird error if not using this way: (ValueError) format number 1 of "20h30 le samedi" is not recognized new_df['channel_program'] = new_df.apply(lambda x: channel_program, axis=1) new_df['channel_program_type'] = new_df.apply(lambda x: channel_program_type, axis=1) - logging.debug("programs were set") - - log_dataframe_size(new_df, channel) logging.debug("Parsed Schema\n%s", new_df.dtypes) + logging.debug("head parsed: :\n%s", new_df.head()) return new_df else: logging.warning("No result (total_results = 0) for this channel") return None -def log_dataframe_size(df, channel): - if(len(df) == 1000): - logging.error(f"We might lose data for {channel} - df size is 1000 out of 1000 - we should divide this querry") - - async def main(): with monitor(monitor_slug='mediatree'): #https://docs.sentry.io/platforms/python/crons/ try: From ad8d57b1fc406fd6da95058f389e56a211f0e48f Mon Sep 17 00:00:00 2001 From: barometre-github-actions Date: Tue, 30 Jul 2024 14:25:11 +0000 Subject: [PATCH 2/2] [no ci]: 0.2.55 bumping version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 834c9001..5ef5a75d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "quotaclimat" -version = "0.2.54" +version = "0.2.55" description = "" authors = [ "Rambier Estelle ",