Fix/Using apply to set timestamp (#215)

* wip: adding log * chores: downgrading numpy * lock * fix: use apply * fix: apply * test * numpy 2.0.1
dataforgoodfr · Jul 30, 2024 · 2b47ea9 · 2b47ea9 · github-actions · Jul 30, 2024
1 parent 934edb1
commit 2b47ea9
Show file tree

Hide file tree

Showing 3 changed files with 53 additions and 41 deletions.
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -137,7 +137,7 @@ services:
     #entrypoint: ["python", "quotaclimat/data_processing/mediatree/api_import.py"]
     environment:
       ENV: docker # change me to prod for real cases
-      LOGLEVEL: INFO # Change me to info (debug, info, warning, error) to have less log
+      LOGLEVEL: DEBUG # Change me to info (debug, info, warning, error) to have less log
       PYTHONPATH: /app
       POSTGRES_USER: user
       POSTGRES_DB: barometre

diff --git a/poetry.lock b/poetry.lock
diff --git a/quotaclimat/data_processing/mediatree/api_import.py b/quotaclimat/data_processing/mediatree/api_import.py
@@ -230,13 +230,15 @@ def parse_reponse_subtitle(response_sub, channel = None, channel_program = "", c
         logging.getLogger("modin.logging.default").setLevel(logging.WARNING)
         if(total_results > 0):
             logging.info(f"{total_results} 'total_results' field")
-
             new_df : pd.DataFrame = json_normalize(response_sub.get('data'))
             logging.debug("Schema from API before formatting :\n%s", new_df.dtypes)
             pd.set_option('display.max_columns', None)
             logging.debug("head:  :\n%s", new_df.head())
-            new_df['timestamp'] = pd.to_datetime(new_df['start'], unit='s', utc=True)
+
             logging.debug("setting timestamp")
+            new_df['timestamp'] = new_df.apply(lambda x: pd.to_datetime(x['start'], unit='s', utc=True), axis=1)
+            logging.debug("timestamp was set")
+
             new_df.drop('start', axis=1, inplace=True)
             logging.debug("renaming columns")
             new_df.rename(columns={'channel.name':'channel_name', 
@@ -246,13 +248,14 @@ def parse_reponse_subtitle(response_sub, channel = None, channel_program = "", c
                                   },
                         inplace=True
             )
+
             logging.debug(f"setting program {channel_program} type { type(channel_program)}")
-
             # weird error if not using this way: (ValueError) format number 1 of "20h30 le samedi" is not recognized
             new_df['channel_program'] = new_df.apply(lambda x: channel_program, axis=1)
             new_df['channel_program_type'] = new_df.apply(lambda x: channel_program_type, axis=1)
 
             logging.debug("programs were set")
+
             log_dataframe_size(new_df, channel)
 
             logging.debug("Parsed Schema\n%s", new_df.dtypes)
File	Stmts	Miss	Cover	Missing
postgres
insert_data.py	43	7	84%	36–38, 56–58, 63
insert_existing_data_example.py	19	3	84%	25–27
postgres/schemas
models.py	147	10	93%	121–128, 140–141, 199–200, 214–215
quotaclimat/data_ingestion
scrap_sitemap.py	134	17	87%	27–28, 33–34, 66–71, 95–97, 138–140, 202, 223–228
quotaclimat/data_ingestion/ingest_db
ingest_sitemap_in_db.py	55	37	33%	21–42, 45–58, 62–73
quotaclimat/data_ingestion/scrap_html
scrap_description_article.py	36	3	92%	19–20, 32
quotaclimat/data_processing/mediatree
api_import.py	213	131	38%	44–48, 53–69, 73–76, 82, 85–127, 133–148, 152–153, 166–178, 182–188, 201–212, 215–219, 225, 265–266, 270, 274–308, 311–313
channel_program.py	136	51	62%	30–32, 43–45, 59, 95, 104, 142–183
config.py	15	2	87%	7, 16
detect_keywords.py	213	8	96%	169–172, 216, 271–273
update_pg_keywords.py	54	39	28%	14–100, 125–129, 152–178, 184
utils.py	69	22	68%	27–51, 54, 63, 84–85
quotaclimat/utils
healthcheck_config.py	29	14	52%	22–24, 27–38
logger.py	24	11	54%	22–24, 28–37
sentry.py	10	2	80%	21–22
TOTAL	1223	357	71%