Skip to content

Commit

Permalink
refacto: dataframe loop with itertuples to spot memory leak (#238)
Browse files Browse the repository at this point in the history
  • Loading branch information
polomarcus authored Sep 13, 2024
1 parent 4e8d60d commit 39ab7ec
Showing 1 changed file with 6 additions and 5 deletions.
11 changes: 6 additions & 5 deletions quotaclimat/data_processing/mediatree/api_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,16 +103,17 @@ async def get_and_save_api_data(exit_event):
try:
programs_for_this_day = get_programs_for_this_day(day, channel, df_programs)

for index, program in programs_for_this_day.iterrows():
start_epoch = program['start']
end_epoch = program['end']
channel_program = str(program['program_name'])
channel_program_type = str(program['program_type'])
for program in programs_for_this_day.itertuples(index=False):
start_epoch = program.start
end_epoch = program.end
channel_program = str(program.program_name)
channel_program_type = str(program.program_type)
logging.info(f"Querying API for {channel} - {channel_program} - {channel_program_type} - {start_epoch} - {end_epoch}")
df = extract_api_sub(token, channel, type_sub, start_epoch,end_epoch, channel_program,channel_program_type)
if(df is not None):
logging.debug(f"Memory df {df.memory_usage()}")
save_to_pg(df, keywords_table, conn)
del df
else:
logging.info("Nothing to save to Postgresql")
gc.collect()
Expand Down

1 comment on commit 39ab7ec

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Coverage

Coverage Report
FileStmtsMissCoverMissing
postgres
   insert_data.py43784%36–38, 56–58, 63
   insert_existing_data_example.py19384%25–27
postgres/schemas
   models.py1501093%124–131, 143–144, 202–203, 217–218
quotaclimat/data_ingestion
   scrap_sitemap.py1341787%27–28, 33–34, 66–71, 95–97, 138–140, 202, 223–228
quotaclimat/data_ingestion/ingest_db
   ingest_sitemap_in_db.py553733%21–42, 45–58, 62–73
quotaclimat/data_ingestion/scrap_html
   scrap_description_article.py36392%19–20, 32
quotaclimat/data_processing/mediatree
   api_import.py21213337%44–48, 53–69, 73–76, 82, 85–127, 133–148, 152–153, 166–178, 182–188, 201–213, 216–220, 226, 262–263, 266–302, 305–307
   channel_program.py1575664%28–30, 41–43, 60–61, 64–66, 93, 105, 114, 154–195
   config.py15287%7, 16
   detect_keywords.py209896%222, 272–279
   update_pg_keywords.py543928%14–100, 125–129, 152–178, 184
   utils.py692268%27–51, 54, 63, 84–85
quotaclimat/utils
   healthcheck_config.py291452%22–24, 27–38
   logger.py241154%22–24, 28–37
   sentry.py11282%22–23
TOTAL124336471% 

Tests Skipped Failures Errors Time
87 0 💤 0 ❌ 0 🔥 1m 35s ⏱️

Please sign in to comment.