Skip to content

Commit

Permalink
review: keywords category (industrie/general) (#231)
Browse files Browse the repository at this point in the history
  • Loading branch information
polomarcus authored Sep 6, 2024
1 parent f1ebcee commit 3341d0a
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 19 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
secrets/pwd_api.txt
secrets/username_api.txt
documents-experts/
cc-bio.json
*.xlsx
coverage_re
Expand Down
27 changes: 26 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ tenacity = "^8.2.3"
sentry-sdk = "^2.13.0"
modin = {extras = ["ray"], version = "^0.31.0"}
numpy = "1.26.4"
openpyxl = "^3.1.5"
[build-system]
requires = ["poetry-core>=1.1"]
build-backend = "poetry.core.masonry.api"
Expand Down
2 changes: 1 addition & 1 deletion quotaclimat/data_processing/mediatree/api_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ def parse_reponse_subtitle(response_sub, channel = None, channel_program = "", c
logging.getLogger("modin.logging.default").setLevel(logging.WARNING)
if(total_results > 0):
logging.info(f"{total_results} 'total_results' field")
new_df : pd.DataFrame = json_normalize(response_sub.get('data'))
new_df : pd.DataFrame = json_normalize(response_sub.get('data')) # TODO UserWarning: json_normalize is not currently supported by PandasOnRay, defaulting to pandas implementation.
logging.debug("Schema from API before formatting :\n%s", new_df.dtypes)
pd.set_option('display.max_columns', None)
logging.debug("head: :\n%s", new_df.head())
Expand Down
22 changes: 5 additions & 17 deletions quotaclimat/data_processing/mediatree/keyword/keyword.py
Original file line number Diff line number Diff line change
Expand Up @@ -1048,19 +1048,19 @@
},
{
"keyword": "polluer",
"category": "general"
"category": "General"
},
{
"keyword": "pollution",
"category": "general"
"category": "General"
},
{
"keyword": "pollution de l’air",
"category": "General"
},
{
"keyword": "rejets industriels",
"category": "industrie"
"category": "Industrie"
},
{
"keyword": "site minier",
Expand Down Expand Up @@ -2986,7 +2986,7 @@
},
{
"keyword": "relocaliser la production",
"category": "industrie"
"category": "Industrie"
},
{
"keyword": "rénovation",
Expand Down Expand Up @@ -3016,10 +3016,6 @@
"keyword": "restreindre",
"category": "General"
},
{
"keyword": "restreindre",
"category": "general"
},
{
"keyword": "restriction",
"category": "General"
Expand Down Expand Up @@ -3416,7 +3412,7 @@
},
{
"keyword": "usine",
"category": "industrie"
"category": "Industrie"
},
{
"keyword": "vache",
Expand Down Expand Up @@ -4652,10 +4648,6 @@
"keyword": "norme",
"category": "General"
},
{
"keyword": "norme",
"category": "General"
},
{
"keyword": "parc",
"category": "Ecosystème"
Expand Down Expand Up @@ -4720,10 +4712,6 @@
"keyword": "restreindre",
"category": "General"
},
{
"keyword": "restreindre",
"category": "general"
},
{
"keyword": "restriction",
"category": "General"
Expand Down

1 comment on commit 3341d0a

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Coverage

Coverage Report
FileStmtsMissCoverMissing
postgres
   insert_data.py43784%36–38, 56–58, 63
   insert_existing_data_example.py19384%25–27
postgres/schemas
   models.py1471093%121–128, 140–141, 199–200, 214–215
quotaclimat/data_ingestion
   scrap_sitemap.py1341787%27–28, 33–34, 66–71, 95–97, 138–140, 202, 223–228
quotaclimat/data_ingestion/ingest_db
   ingest_sitemap_in_db.py553733%21–42, 45–58, 62–73
quotaclimat/data_ingestion/scrap_html
   scrap_description_article.py36392%19–20, 32
quotaclimat/data_processing/mediatree
   api_import.py21113237%44–48, 53–69, 73–76, 82, 85–126, 132–147, 151–152, 165–177, 181–187, 200–212, 215–219, 225, 261–262, 265–301, 304–306
   channel_program.py1575664%28–30, 41–43, 60–61, 64–66, 93, 105, 114, 154–195
   config.py15287%7, 16
   detect_keywords.py2141294%169–172, 216, 266–273
   update_pg_keywords.py543928%14–100, 125–129, 152–178, 184
   utils.py692268%27–51, 54, 63, 84–85
quotaclimat/utils
   healthcheck_config.py291452%22–24, 27–38
   logger.py241154%22–24, 28–37
   sentry.py11282%22–23
TOTAL124436770% 

Tests Skipped Failures Errors Time
87 0 💤 0 ❌ 0 🔥 1m 39s ⏱️

Please sign in to comment.