From 4e0dfa0a938b638663684c25dd2511cddeb5b62e Mon Sep 17 00:00:00 2001 From: Paul Leclercq Date: Fri, 13 Sep 2024 09:53:45 +0200 Subject: [PATCH] feat: number_of_keywords climate/biodiv/ressources - 20 secondes window (#236) * feat: number_of_keywords climate/biodiv/ressources - 20 secondes window * doc: alembic * db: alembic --- README.md | 13 ++- ..._add_new_column_test_for_table_keywords.py | 30 +++++++ ...658f_add_new_column_number_of_keywords_.py | 34 ++++++++ docker-compose.yml | 3 +- postgres/schemas/models.py | 9 +- .../mediatree/detect_keywords.py | 86 ++++++++++--------- .../mediatree/update_pg_keywords.py | 24 +++--- test/sitemap/test_detect_keywords.py | 55 ++++++------ test/sitemap/test_main_import_api.py | 15 ++-- test/sitemap/test_update_pg_keywords.py | 22 ++--- 10 files changed, 180 insertions(+), 111 deletions(-) create mode 100644 alembic/versions/a5c39db3c8e9_add_new_column_test_for_table_keywords.py create mode 100644 alembic/versions/af956a85658f_add_new_column_number_of_keywords_.py diff --git a/README.md b/README.md index 35751489..956ee1fb 100644 --- a/README.md +++ b/README.md @@ -314,10 +314,6 @@ We can adjust batch update with these env variables (as in the docker-compose.ym BATCH_SIZE: 50000 # number of records to update in one batch ``` -### Comparison between 15/20/30/40 window -Set `COMPARE_DURATION` to true such as in the docker-compose.yml to calculate number_of_keywords_20/30/40 in addition of 15. -The goal is to compare different durations to select one, it should be desactivated to have more effective program. - ### Batch program data `UPDATE_PROGRAM_ONLY` to true will only update program metadata, otherwise, it will update program metadata and all theme/keywords calculations. @@ -336,12 +332,15 @@ We can use [a Github actions to start multiple update operations with different Using [Alembic](https://alembic.sqlalchemy.org/en/latest/autogenerate.html) Auto Generating Migrations¶ we can add a new column inside `models.py` and it will automatically make the schema evolution : ``` -# If changes have already been applied and you want to recreate your alembic file: -# 1. change to you main branch +# If changes have already been applied (on your feature vranch) and you have to recreate your alembic file by doing : +# 1. change to your main branch +git switch main # 2. start test container and run "pytest -vv -k api" to rebuild the state of the DB (or drop table the table you want) # 3. rechange to your WIP branch +git switch - # 4. connect to the test container : docker compose up test -d / docker compose exec test bash -# 5. reapply the latest saved state : poetry run alembic upgrade head +# 5. reapply the latest saved state : +poetry run alembic stamp head # 6. Save the new columns poetry run alembic revision --autogenerate -m "Add new column test for table keywords" # this should generate a file to commit inside "alembic/versions" diff --git a/alembic/versions/a5c39db3c8e9_add_new_column_test_for_table_keywords.py b/alembic/versions/a5c39db3c8e9_add_new_column_test_for_table_keywords.py new file mode 100644 index 00000000..a6290409 --- /dev/null +++ b/alembic/versions/a5c39db3c8e9_add_new_column_test_for_table_keywords.py @@ -0,0 +1,30 @@ +"""Add new column test for table keywords + +Revision ID: a5c39db3c8e9 +Revises: 5ccd746ee292 +Create Date: 2024-09-12 14:10:26.305593 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = 'a5c39db3c8e9' +down_revision: Union[str, None] = '5ccd746ee292' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + pass + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + pass + # ### end Alembic commands ### diff --git a/alembic/versions/af956a85658f_add_new_column_number_of_keywords_.py b/alembic/versions/af956a85658f_add_new_column_number_of_keywords_.py new file mode 100644 index 00000000..749e6f79 --- /dev/null +++ b/alembic/versions/af956a85658f_add_new_column_number_of_keywords_.py @@ -0,0 +1,34 @@ +"""Add new column number_of_keywords climat/biod/r + +Revision ID: af956a85658f +Revises: a5c39db3c8e9 +Create Date: 2024-09-12 14:15:12.049367 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = 'af956a85658f' +down_revision: Union[str, None] = 'a5c39db3c8e9' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('keywords', sa.Column('number_of_keywords_climat', sa.Integer(), nullable=True)) + op.add_column('keywords', sa.Column('number_of_keywords_biodiversite', sa.Integer(), nullable=True)) + op.add_column('keywords', sa.Column('number_of_keywords_ressources', sa.Integer(), nullable=True)) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('keywords', 'number_of_keywords_ressources') + op.drop_column('keywords', 'number_of_keywords_biodiversite') + op.drop_column('keywords', 'number_of_keywords_climat') + # ### end Alembic commands ### diff --git a/docker-compose.yml b/docker-compose.yml index aab83f3e..fb995fcb 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,4 +1,3 @@ -version: '3.8' services: test: build: @@ -41,7 +40,7 @@ services: environment: ENV: docker # CHANNEL: "fr3-idf" - LOGLEVEL: INFO + LOGLEVEL: DEBUG PYTHONPATH: /app POSTGRES_USER: user POSTGRES_DB: barometre diff --git a/postgres/schemas/models.py b/postgres/schemas/models.py index 50536c15..419cdb05 100644 --- a/postgres/schemas/models.py +++ b/postgres/schemas/models.py @@ -81,9 +81,12 @@ class Keywords(Base): number_of_biodiversite_causes_directes= Column(Integer) # ALTER TABLE keywords ADD number_of_biodiversite_causes_directes integer; number_of_biodiversite_consequences= Column(Integer) # ALTER TABLE keywords ADD number_of_biodiversite_consequences integer; number_of_biodiversite_solutions_directes= Column(Integer) # ALTER TABLE keywords ADD number_of_biodiversite_solutions_directes integer; - number_of_keywords_20 = Column(Integer) # ALTER TABLE keywords ADD number_of_keywords_20 integer; - number_of_keywords_30 = Column(Integer) # ALTER TABLE keywords ADD number_of_keywords_30 integer; - number_of_keywords_40 = Column(Integer) # ALTER TABLE keywords ADD number_of_keywords_40 integer; + number_of_keywords_20 = Column(Integer) # NOT USED ANYMORE -- ALTER TABLE keywords ADD number_of_keywords_20 integer; + number_of_keywords_30 = Column(Integer) # NOT USED ANYMORE -- ALTER TABLE keywords ADD number_of_keywords_30 integer; + number_of_keywords_40 = Column(Integer) # NOT USED ANYMORE -- ALTER TABLE keywords ADD number_of_keywords_40 integer; + number_of_keywords_climat = Column(Integer) # sum of all climatique counters without duplicate (like number_of_keywords) + number_of_keywords_biodiversite = Column(Integer) # sum of all biodiversite counters without duplicate + number_of_keywords_ressources = Column(Integer) # sum of all ressources counters without duplicate class Channel_Metadata(Base): __tablename__ = channel_metadata_table diff --git a/quotaclimat/data_processing/mediatree/detect_keywords.py b/quotaclimat/data_processing/mediatree/detect_keywords.py index 122a6278..c7a64f17 100644 --- a/quotaclimat/data_processing/mediatree/detect_keywords.py +++ b/quotaclimat/data_processing/mediatree/detect_keywords.py @@ -117,6 +117,7 @@ def remove_stopwords(plaintext: str) -> str: def get_themes_keywords_duration(plaintext: str, subtitle_duration: List[str], start: datetime): keywords_with_timestamp = [] number_of_elements_in_array = 17 + default_window_in_seconds = 20 plaitext_without_stopwords = remove_stopwords(plaintext) logging.debug(f"display datetime start {start}") @@ -137,40 +138,46 @@ def get_themes_keywords_duration(plaintext: str, subtitle_duration: List[str], s keywords_with_timestamp.extend(keywords_to_add) if len(keywords_with_timestamp) > 0: - # count false positive near of 15" of positive keywords - keywords_with_timestamp_15 = get_keywords_with_timestamp_with_false_positive(keywords_with_timestamp, start, duration_seconds=15) - filtered_keywords_with_timestamp = filter_indirect_words(keywords_with_timestamp_15) + # count false positive near of default_window_in_seconds of positive keywords + keywords_with_timestamp_default = get_keywords_with_timestamp_with_false_positive(keywords_with_timestamp, start, duration_seconds=default_window_in_seconds) + filtered_keywords_with_timestamp = filter_indirect_words(keywords_with_timestamp_default) - theme= get_themes(keywords_with_timestamp_15) - keywords_with_timestamp= clean_metadata(keywords_with_timestamp_15) + theme= get_themes(keywords_with_timestamp_default) + keywords_with_timestamp= clean_metadata(keywords_with_timestamp_default) number_of_keywords= count_keywords_duration_overlap(filtered_keywords_with_timestamp, start) + + themes_climat = ["changement_climatique_constat", + "changement_climatique_causes", + "changement_climatique_consequences", + "attenuation_climatique_solutions", + "adaptation_climatique_solutions" + ] + number_of_keywords_climat= count_keywords_duration_overlap(filtered_keywords_with_timestamp, start, theme=themes_climat) + themes_biodiversite = [ + "biodiversite_concepts_generaux", + "biodiversite_causes", + "biodiversite_consequences", + "biodiversite_solutions", + ] + number_of_keywords_biodiversite= count_keywords_duration_overlap(filtered_keywords_with_timestamp, start, themes_biodiversite) + + themes_ressources = ["ressources", + "ressources_solutions", + ] + number_of_keywords_ressources= count_keywords_duration_overlap(filtered_keywords_with_timestamp, start, themes_ressources) + + number_of_changement_climatique_constat = count_keywords_duration_overlap(filtered_keywords_with_timestamp, start,theme=["changement_climatique_constat"]) + number_of_changement_climatique_causes = count_keywords_duration_overlap(filtered_keywords_with_timestamp, start,theme=["changement_climatique_causes"]) + number_of_changement_climatique_consequences = count_keywords_duration_overlap(filtered_keywords_with_timestamp, start,theme=["changement_climatique_consequences"]) + number_of_attenuation_climatique_solutions = count_keywords_duration_overlap(filtered_keywords_with_timestamp, start,theme=["attenuation_climatique_solutions"]) + number_of_adaptation_climatique_solutions = count_keywords_duration_overlap(filtered_keywords_with_timestamp, start,theme=["adaptation_climatique_solutions"]) + number_of_ressources = count_keywords_duration_overlap(filtered_keywords_with_timestamp, start,theme=["ressources"]) + number_of_ressources_solutions = count_keywords_duration_overlap(filtered_keywords_with_timestamp, start,theme=["ressources_solutions"]) + number_of_biodiversite_concepts_generaux = count_keywords_duration_overlap(filtered_keywords_with_timestamp, start,theme=["biodiversite_concepts_generaux"]) + number_of_biodiversite_causes = count_keywords_duration_overlap(filtered_keywords_with_timestamp, start,theme=["biodiversite_causes"]) + number_of_biodiversite_consequences = count_keywords_duration_overlap(filtered_keywords_with_timestamp, start,theme=["biodiversite_consequences"]) + number_of_biodiversite_solutions = count_keywords_duration_overlap(filtered_keywords_with_timestamp, start,theme=["biodiversite_solutions"]) - number_of_changement_climatique_constat = count_keywords_duration_overlap(filtered_keywords_with_timestamp, start,theme="changement_climatique_constat") - number_of_changement_climatique_causes = count_keywords_duration_overlap(filtered_keywords_with_timestamp, start,theme="changement_climatique_causes") - number_of_changement_climatique_consequences = count_keywords_duration_overlap(filtered_keywords_with_timestamp, start,theme="changement_climatique_consequences") - number_of_attenuation_climatique_solutions = count_keywords_duration_overlap(filtered_keywords_with_timestamp, start,theme="attenuation_climatique_solutions") - number_of_adaptation_climatique_solutions = count_keywords_duration_overlap(filtered_keywords_with_timestamp, start,theme="adaptation_climatique_solutions") - number_of_ressources = count_keywords_duration_overlap(filtered_keywords_with_timestamp, start,theme="ressources") - number_of_ressources_solutions = count_keywords_duration_overlap(filtered_keywords_with_timestamp, start,theme="ressources_solutions") - number_of_biodiversite_concepts_generaux = count_keywords_duration_overlap(filtered_keywords_with_timestamp, start,theme="biodiversite_concepts_generaux") - number_of_biodiversite_causes = count_keywords_duration_overlap(filtered_keywords_with_timestamp, start,theme="biodiversite_causes") - number_of_biodiversite_consequences = count_keywords_duration_overlap(filtered_keywords_with_timestamp, start,theme="biodiversite_consequences") - number_of_biodiversite_solutions = count_keywords_duration_overlap(filtered_keywords_with_timestamp, start,theme="biodiversite_solutions") - - if(os.environ.get("COMPARE_DURATION") == "true"): - logging.debug(f"Comparaison between 15/20/30/40 is activated") - keywords_with_timestamp_20 = get_keywords_with_timestamp_with_false_positive(keywords_with_timestamp, start, duration_seconds=20) - keywords_with_timestamp_30 = get_keywords_with_timestamp_with_false_positive(keywords_with_timestamp, start, duration_seconds=30) - keywords_with_timestamp_40 = get_keywords_with_timestamp_with_false_positive(keywords_with_timestamp, start, duration_seconds=40) - number_of_keywords_20= count_keywords_duration_overlap(filter_indirect_words(keywords_with_timestamp_20), start) - number_of_keywords_30= count_keywords_duration_overlap(filter_indirect_words(keywords_with_timestamp_30), start) - number_of_keywords_40= count_keywords_duration_overlap(filter_indirect_words(keywords_with_timestamp_40), start) - else: - logging.debug(f"No comparaison between 15/20/30/40 is activated") - number_of_keywords_20=None - number_of_keywords_30=None - number_of_keywords_40=None - # TODO refacto this return array and else return [ theme ,keywords_with_timestamp @@ -186,10 +193,9 @@ def get_themes_keywords_duration(plaintext: str, subtitle_duration: List[str], s ,number_of_biodiversite_causes ,number_of_biodiversite_consequences ,number_of_biodiversite_solutions - # number_of_keywords with special duration to compare duration - ,number_of_keywords_20 - ,number_of_keywords_30 - ,number_of_keywords_40 + ,number_of_keywords_climat + ,number_of_keywords_biodiversite + ,number_of_keywords_ressources ] else: return [None] * number_of_elements_in_array @@ -244,9 +250,9 @@ def filter_and_tag_by_theme(df: pd.DataFrame) -> pd.DataFrame : 'number_of_biodiversite_causes_directes', 'number_of_biodiversite_consequences', 'number_of_biodiversite_solutions_directes' - ,'number_of_keywords_20' - ,'number_of_keywords_30' - ,'number_of_keywords_40' + ,"number_of_keywords_climat" + ,"number_of_keywords_biodiversite" + ,"number_of_keywords_ressources" ] ] = df[['plaintext','srt', 'start']]\ .swifter.apply(\ @@ -275,14 +281,14 @@ def add_primary_key(row): def filter_indirect_words(keywords_with_timestamp: List[dict]) -> List[dict]: return list(filter(lambda kw: indirectes not in kw['theme'], keywords_with_timestamp)) -def count_keywords_duration_overlap(keywords_with_timestamp: List[dict], start: datetime, theme: str = None) -> int: +def count_keywords_duration_overlap(keywords_with_timestamp: List[dict], start: datetime, theme: List[str] = None) -> int: total_keywords = len(keywords_with_timestamp) if(total_keywords) == 0: return 0 else: if theme is not None: logging.debug(f"filter theme {theme}") - keywords_with_timestamp = list(filter(lambda kw: kw['theme'] == theme, keywords_with_timestamp)) + keywords_with_timestamp = list(filter(lambda kw: kw['theme'] in theme, keywords_with_timestamp)) length_filtered_items = len(keywords_with_timestamp) diff --git a/quotaclimat/data_processing/mediatree/update_pg_keywords.py b/quotaclimat/data_processing/mediatree/update_pg_keywords.py index 3f782d74..8bc78059 100644 --- a/quotaclimat/data_processing/mediatree/update_pg_keywords.py +++ b/quotaclimat/data_processing/mediatree/update_pg_keywords.py @@ -47,9 +47,9 @@ def update_keywords(session: Session, batch_size: int = 50000, start_date : str ,number_of_biodiversite_causes_directes \ ,number_of_biodiversite_consequences \ ,number_of_biodiversite_solutions_directes \ - ,new_number_of_keywords_20 \ - ,new_number_of_keywords_30 \ - ,new_number_of_keywords_40 = get_themes_keywords_duration(plaintext, srt, start) + ,new_number_of_keywords_climat \ + ,new_number_of_keywords_biodiversite \ + ,new_number_of_keywords_ressources = get_themes_keywords_duration(plaintext, srt, start) except Exception as err: logging.error(f"continuing loop but met error : {err}") continue @@ -83,9 +83,9 @@ def update_keywords(session: Session, batch_size: int = 50000, start_date : str ,number_of_biodiversite_consequences ,number_of_biodiversite_solutions_directes ,channel_title=channel_title - ,number_of_keywords_20=new_number_of_keywords_20 - ,number_of_keywords_30=new_number_of_keywords_30 - ,number_of_keywords_40=new_number_of_keywords_40 + ,number_of_keywords_climat=new_number_of_keywords_climat + ,number_of_keywords_biodiversite=new_number_of_keywords_biodiversite + ,number_of_keywords_ressources=new_number_of_keywords_ressources ) else: program_name, program_name_type = get_a_program_with_start_timestamp(df_programs, pd.Timestamp(start).tz_convert('Europe/Paris'), channel_name) @@ -145,9 +145,9 @@ def update_keyword_row(session: Session, number_of_biodiversite_consequences: int, number_of_biodiversite_solutions_directes: int, channel_title: str - ,number_of_keywords_20: int - ,number_of_keywords_30: int - ,number_of_keywords_40: int + ,number_of_keywords_climat: int + ,number_of_keywords_biodiversite: int + ,number_of_keywords_ressources: int ): if matching_themes is not None: session.query(Keywords).filter(Keywords.id == keyword_id).update( @@ -167,9 +167,9 @@ def update_keyword_row(session: Session, Keywords.number_of_biodiversite_consequences:number_of_biodiversite_consequences , Keywords.number_of_biodiversite_solutions_directes:number_of_biodiversite_solutions_directes, Keywords.channel_title: channel_title - ,Keywords.number_of_keywords_20: number_of_keywords_20 - ,Keywords.number_of_keywords_30: number_of_keywords_30 - ,Keywords.number_of_keywords_40: number_of_keywords_40 + ,Keywords.number_of_keywords_climat: number_of_keywords_climat + ,Keywords.number_of_keywords_biodiversite: number_of_keywords_biodiversite + ,Keywords.number_of_keywords_ressources: number_of_keywords_ressources }, synchronize_session=False ) diff --git a/test/sitemap/test_detect_keywords.py b/test/sitemap/test_detect_keywords.py index 27d5b380..f345ccba 100644 --- a/test/sitemap/test_detect_keywords.py +++ b/test/sitemap/test_detect_keywords.py @@ -106,16 +106,16 @@ def test_one_theme_get_themes_keywords_duration(): number_of_biodiversite_causes_directes, number_of_biodiversite_consequences, number_of_biodiversite_solutions_directes - ,number_of_keywords_20, - number_of_keywords_30, - number_of_keywords_40) = get_themes_keywords_duration(plaintext_climat, subtitles, start) + ,number_of_keywords_climat, + number_of_keywords_biodiversite, + number_of_keywords_ressources) = get_themes_keywords_duration(plaintext_climat, subtitles, start) assert set(themes_output) == set(themes) # assert compare_unordered_lists_of_dicts(keywords_output, keywords) assert number_of_keywords == 1 - assert number_of_keywords_20 == 1 - assert number_of_keywords_30 == 1 - assert number_of_keywords_40 == 1 + assert number_of_keywords_climat == 1 + assert number_of_keywords_biodiversite == 0 + assert number_of_keywords_ressources == 1 assert number_of_changement_climatique_constat == 1 assert number_of_changement_climatique_causes_directes == 0 assert number_of_changement_climatique_consequences == 0 @@ -218,14 +218,15 @@ def test_long_sentence_theme_get_themes_keywords_duration(): number_of_biodiversite_causes_directes, number_of_biodiversite_consequences, number_of_biodiversite_solutions_directes - ,number_of_keywords_20, - number_of_keywords_30, - number_of_keywords_40) = get_themes_keywords_duration(plaintext_climat, subtitles, start) + ,number_of_keywords_climat, + number_of_keywords_biodiversite, + number_of_keywords_ressources) = get_themes_keywords_duration(plaintext_climat, subtitles, start) assert set(themes_output) == set(themes) # assert compare_unordered_lists_of_dicts(keywords_output, keywords) - assert number_of_keywords == 3 - assert number_of_changement_climatique_constat == 2 + assert number_of_keywords == 2 + assert number_of_keywords_climat == 2 + assert number_of_changement_climatique_constat == 1 assert number_of_changement_climatique_causes_directes == 0 assert number_of_changement_climatique_consequences == 0 assert number_of_attenuation_climatique_solutions_directes == 0 @@ -277,9 +278,9 @@ def test_three_get_themes_keywords_duration(): number_of_biodiversite_causes_directes, number_of_biodiversite_consequences, number_of_biodiversite_solutions_directes - ,number_of_keywords_20, - number_of_keywords_30, - number_of_keywords_40) = get_themes_keywords_duration("record de température pizza adaptation au dérèglement climatique", subtitles, start) + ,number_of_keywords_climat, + number_of_keywords_biodiversite, + number_of_keywords_ressources) = get_themes_keywords_duration("record de température pizza adaptation au dérèglement climatique", subtitles, start) assert set(themes_output)== themes #assert keywords_output == keywords @@ -335,9 +336,9 @@ def test_long_get_themes_keywords_duration(): number_of_biodiversite_causes_directes, number_of_biodiversite_consequences, number_of_biodiversite_solutions_directes - ,number_of_keywords_20, - number_of_keywords_30, - number_of_keywords_40) = get_themes_keywords_duration("il rencontre aussi une crise majeure de la pénurie de l' offre laetitia jaoude des barrages sauvages", subtitles, start) + ,number_of_keywords_climat, + number_of_keywords_biodiversite, + number_of_keywords_ressources) = get_themes_keywords_duration("il rencontre aussi une crise majeure de la pénurie de l' offre laetitia jaoude des barrages sauvages", subtitles, start) assert set(themes_output) == set(themes) # too hard to maintain #assert compare_unordered_lists_of_dicts(keywords_output, keywords) @@ -494,9 +495,9 @@ def test_lower_case_filter_and_tag_by_theme(): "number_of_biodiversite_causes_directes": 0, "number_of_biodiversite_consequences": 0, "number_of_biodiversite_solutions_directes" :0 - ,'number_of_keywords_20':1, - 'number_of_keywords_30':1, - 'number_of_keywords_40':1 + ,'number_of_keywords_climat':1, + 'number_of_keywords_biodiversite':0, + 'number_of_keywords_ressources':0 }]) # List of words to filter on @@ -547,9 +548,9 @@ def test_singular_plural_case_filter_and_tag_by_theme(): "number_of_biodiversite_causes_directes": 0, "number_of_biodiversite_consequences": 0, "number_of_biodiversite_solutions_directes" :0 - ,'number_of_keywords_20':1, - 'number_of_keywords_30':1, - 'number_of_keywords_40':1 + ,'number_of_keywords_climat':1, + 'number_of_keywords_biodiversite':0, + 'number_of_keywords_ressources':0 }]) # List of words to filter on @@ -631,7 +632,7 @@ def test_complexe_filter_and_tag_by_theme(): "theme":"changement_climatique_constat", } ], - "number_of_keywords": 2, + "number_of_keywords": 1, "number_of_changement_climatique_constat": 1, "number_of_changement_climatique_causes_directes": 0, "number_of_changement_climatique_consequences": 0, @@ -643,9 +644,9 @@ def test_complexe_filter_and_tag_by_theme(): "number_of_biodiversite_causes_directes": 0, "number_of_biodiversite_consequences": 0, "number_of_biodiversite_solutions_directes" :0 - ,'number_of_keywords_20':2, - 'number_of_keywords_30':2, - 'number_of_keywords_40':2 + ,'number_of_keywords_climat':1, + 'number_of_keywords_biodiversite':0, + 'number_of_keywords_ressources':0 }]) # List of words to filter on diff --git a/test/sitemap/test_main_import_api.py b/test/sitemap/test_main_import_api.py index 5d02332d..8dcb5b84 100644 --- a/test/sitemap/test_main_import_api.py +++ b/test/sitemap/test_main_import_api.py @@ -51,20 +51,17 @@ def test_second_row_api_import(): assert set(specific_keyword.theme) == set([ 'adaptation_climatique_solutions', 'attenuation_climatique_solutions', - 'attenuation_climatique_solutions_indirectes', - 'biodiversite_causes_indirectes', - 'biodiversite_concepts_generaux_indirectes', + 'biodiversite_causes', 'biodiversite_solutions', - 'biodiversite_solutions_indirectes', - 'changement_climatique_causes_indirectes', - 'changement_climatique_consequences_indirectes', + 'biodiversite_concepts_generaux', + 'changement_climatique_causes', + 'changement_climatique_consequences', 'changement_climatique_constat', 'ressources', 'ressources_solutions', - ]) - assert specific_keyword.number_of_keywords == 6 + assert specific_keyword.number_of_keywords == 5 def test_third_row_api_import(): @@ -80,5 +77,5 @@ def test_third_row_api_import(): ]) - assert specific_keyword.number_of_keywords == 2 + assert specific_keyword.number_of_keywords == 1 diff --git a/test/sitemap/test_update_pg_keywords.py b/test/sitemap/test_update_pg_keywords.py index 9db538e5..1ccbbe60 100644 --- a/test/sitemap/test_update_pg_keywords.py +++ b/test/sitemap/test_update_pg_keywords.py @@ -45,9 +45,9 @@ def test_delete_keywords(): "channel_program_type": "to change", "channel_program":"to change" ,"channel_title":"channel_title" - ,"number_of_keywords_20": wrong_value - ,"number_of_keywords_30": wrong_value - ,"number_of_keywords_40": wrong_value + ,"number_of_keywords_climat": wrong_value + ,"number_of_keywords_biodiversite": wrong_value + ,"number_of_keywords_ressources": wrong_value }]) assert save_to_pg(df, keywords_table, conn) == 1 session = get_db_session(conn) @@ -181,9 +181,9 @@ def test_first_update_keywords(): "channel_program_type": "to change", "channel_program":"to change" ,"channel_title":None - ,"number_of_keywords_20": wrong_value - ,"number_of_keywords_30": wrong_value - ,"number_of_keywords_40": wrong_value + ,"number_of_keywords_climat": wrong_value + ,"number_of_keywords_biodiversite": wrong_value + ,"number_of_keywords_ressources": wrong_value }]) assert save_to_pg(df, keywords_table, conn) == 1 @@ -206,9 +206,9 @@ def test_first_update_keywords(): ,number_of_biodiversite_causes_directes \ ,number_of_biodiversite_consequences \ ,number_of_biodiversite_solutions_directes \ - ,number_of_keywords_20 \ - ,number_of_keywords_30 \ - ,number_of_keywords_40 = get_themes_keywords_duration(plaintext, srt, start) + ,number_of_keywords_climat \ + ,number_of_keywords_biodiversite \ + ,number_of_keywords_ressources = get_themes_keywords_duration(plaintext, srt, start) expected_keywords_with_timestamp = [ {'category': 'Ecosystème', 'keyword': 'conditions de vie sur terre', 'timestamp': original_timestamp + 15000, 'theme': 'changement_climatique_constat'}, @@ -260,6 +260,6 @@ def test_first_update_keywords(): #channel_title assert result_after_update.channel_title == "M6" - # number_of_keywords_20 - assert result_after_update.number_of_keywords_20 == number_of_keywords_20 + # number_of_keywords_climat + assert result_after_update.number_of_keywords_climat == number_of_keywords_climat