Skip to content

Commit

Permalink
refacto: change variable name from fifteen to twenty (#259)
Browse files Browse the repository at this point in the history
  • Loading branch information
polomarcus authored Oct 9, 2024
1 parent a5bbe03 commit 763e1b0
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 92 deletions.
7 changes: 4 additions & 3 deletions quotaclimat/data_processing/mediatree/detect_keywords.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
dask.config.set({'dataframe.query-planning': True})

indirectes = 'indirectes'
DEFAULT_WINDOW_DURATION = 20

def get_cts_in_ms_for_keywords(subtitle_duration: List[dict], keywords: List[dict], theme: str) -> List[dict]:
result = []
Expand Down Expand Up @@ -117,7 +118,7 @@ def remove_stopwords(plaintext: str) -> str:
def get_themes_keywords_duration(plaintext: str, subtitle_duration: List[str], start: datetime):
keywords_with_timestamp = []
number_of_elements_in_array = 17
default_window_in_seconds = 20
default_window_in_seconds = DEFAULT_WINDOW_DURATION
plaitext_without_stopwords = remove_stopwords(plaintext)
logging.debug(f"display datetime start {start}")

Expand Down Expand Up @@ -200,7 +201,7 @@ def get_themes_keywords_duration(plaintext: str, subtitle_duration: List[str], s
else:
return [None] * number_of_elements_in_array

def get_keywords_with_timestamp_with_false_positive(keywords_with_timestamp, start, duration_seconds: int = 15):
def get_keywords_with_timestamp_with_false_positive(keywords_with_timestamp, start, duration_seconds: int = 20):
logging.debug(f"Using duration_seconds {duration_seconds}")

# Shallow copy to avoid unnecessary deep copying (wip: for memory leak)
Expand Down Expand Up @@ -330,7 +331,7 @@ def transform_false_positive_keywords_to_positive(keywords_with_timestamp: List[

return keywords_with_timestamp

def tag_wanted_duration_second_window_number(keywords_with_timestamp: List[dict], start, duration_seconds: int = 15) -> List[dict]:
def tag_wanted_duration_second_window_number(keywords_with_timestamp: List[dict], start, duration_seconds: int = 20) -> List[dict]:
window_size_seconds = get_keyword_time_separation_ms(duration_seconds=duration_seconds)
total_seconds_in_window = get_chunk_duration_api()
number_of_windows = int(total_seconds_in_window // window_size_seconds)
Expand Down
2 changes: 1 addition & 1 deletion quotaclimat/data_processing/mediatree/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
EPOCH__5MIN_MARGIN = 300
EPOCH__1MIN_MARGIN = 60 # to add margin for program

def get_keyword_time_separation_ms(duration_seconds: int = 15):
def get_keyword_time_separation_ms(duration_seconds: int = 20):
return duration_seconds * 1000

def get_chunk_duration_api():
Expand Down
Loading

1 comment on commit 763e1b0

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Coverage

Coverage Report
FileStmtsMissCoverMissing
postgres
   insert_data.py43784%36–38, 56–58, 63
   insert_existing_data_example.py19384%25–27
postgres/schemas
   models.py1551094%126–133, 145–146, 211–212, 226–227
quotaclimat/data_ingestion
   scrap_sitemap.py1341787%27–28, 33–34, 66–71, 95–97, 138–140, 202, 223–228
quotaclimat/data_ingestion/ingest_db
   ingest_sitemap_in_db.py553733%21–42, 45–58, 62–73
quotaclimat/data_ingestion/scrap_html
   scrap_description_article.py36392%19–20, 32
quotaclimat/data_processing/mediatree
   api_import.py21113337%44–48, 53–74, 78–81, 87, 90–132, 138–153, 158, 171–183, 187–193, 206–218, 221–225, 231, 266–267, 270–301, 304–306
   channel_program.py1655765%25–27, 38–40, 57–58, 61–63, 102–103, 112, 128, 179–220
   config.py15287%7, 16
   detect_keywords.py210896%221, 279–286
   update_pg_keywords.py674927%15–108, 132, 135, 142–157, 180–206, 213
   utils.py792568%29–53, 56, 65, 86–87, 117–120
quotaclimat/utils
   healthcheck_config.py291452%22–24, 27–38
   logger.py241154%22–24, 28–37
   sentry.py11282%22–23
TOTAL127937870% 

Tests Skipped Failures Errors Time
93 0 💤 0 ❌ 0 🔥 7m 48s ⏱️

Please sign in to comment.