From ad3749c0246c89dd2f2c14a2eea1272a9cce9084 Mon Sep 17 00:00:00 2001 From: Alexandre Quemy Date: Tue, 14 Nov 2023 16:30:33 +0100 Subject: [PATCH] fix: update database model and case creation (#200) --- echr/data_models/case.py | 2 -- echr/steps/cases_info.py | 6 +++--- echr/steps/generate_sqlite.py | 11 +++++++---- echr/steps/prepare_database.py | 9 +++++---- 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/echr/data_models/case.py b/echr/data_models/case.py index 3068fe2..f0ed69e 100644 --- a/echr/data_models/case.py +++ b/echr/data_models/case.py @@ -22,9 +22,7 @@ class Case(BaseModel): originatingbody_type = pw.CharField() rank = pw.CharField() respondent = pw.CharField() - respondentOrderEng = pw.CharField() separateopinion = pw.BooleanField() - sharepointid = pw.IntegerField() typedescription = pw.IntegerField() judgment = JSONField(null=True) diff --git a/echr/steps/cases_info.py b/echr/steps/cases_info.py index d847055..e20dee0 100644 --- a/echr/steps/cases_info.py +++ b/echr/steps/cases_info.py @@ -112,19 +112,19 @@ def get_cases_info_step(year, progress, task): progress.update(task, advance=0, error=error) else: failed_to_get_some_cases = True - progress.update(task, advance=1, to_be_completed=len(YEARS)) + progress.update(task, advance=1, to_be_completed=len(YEARS), year=year) return failed_to_get_some_cases with Progress( TAB + "> Downloading... [IN PROGRESS]\n", BarColumn(30), TimeRemainingColumn(), - "| ({task.completed}/{task.total}) Fetching cases information for year {task.completed}" + "| ({task.completed}/{task.total}) Fetching cases information for year {task.fields[year]}" "{task.fields[error]}", transient=True, console=console ) as progress: - task = progress.add_task("Downloading...", total=len(YEARS), to_be_completed=len(YEARS), error="") + task = progress.add_task("Downloading...", total=len(YEARS), to_be_completed=len(YEARS), year=YEARS[0], error="") f = lambda x: get_cases_info_step(x, progress, task) with ThreadPoolExecutor(16) as executor: results = list(executor.map(f, YEARS)) diff --git a/echr/steps/generate_sqlite.py b/echr/steps/generate_sqlite.py index 940bb2e..cd79dad 100644 --- a/echr/steps/generate_sqlite.py +++ b/echr/steps/generate_sqlite.py @@ -94,12 +94,15 @@ def populate_database(console, build, update, doc_ids): try: with db.atomic(): date_keys = ['decisiondate', 'introductiondate', 'judgementdate', 'kpdate'] + formats = ['%d/%m/%Y %H:%M:%S', '%d/%m/%YT%H:%M:%S', '%d/%m/%Y'] for k in date_keys: if case[k]: - try: - case[k] = datetime.strptime(case[k], '%d/%m/%Y %H:%M:%S') - except: - case[k] = datetime.strptime(case[k], '%d/%m/%Y') + for fmt in formats: + try: + case[k] = datetime.strptime(case[k], fmt) + break + except: + continue else: del case[k] parties = case.get('parties', []) diff --git a/echr/steps/prepare_database.py b/echr/steps/prepare_database.py index 4930368..313f0e4 100644 --- a/echr/steps/prepare_database.py +++ b/echr/steps/prepare_database.py @@ -61,15 +61,16 @@ def format_structured_json(cases_list): c['respondent'] = c['respondent'].split(';') # c['applicability'] = c['applicability'].strip().split(';') c['appno'] = c['appno'].split(';')[0] - c['decisiondate'] = c['decisiondate'].split(' ')[0] - c['judgementdate'] = c['judgementdate'].split(' ')[0] - c['introductiondate'] = c['introductiondate'].split(' ')[0] - c['kpdate'] = c['kpdate'].split(' ')[0] + c['decisiondate'] = c['decisiondate'].split(' ')[0].split('T')[0].replace('-', '/') + c['judgementdate'] = c['judgementdate'].split(' ')[0].split('T')[0].replace('-', '/') + c['introductiondate'] = c['introductiondate'].split(' ')[0].split('T')[0].replace('-', '/') + c['kpdate'] = c['kpdate'].split(' ')[0].split('T')[0] c['separateopinion'] = True if c['separateopinion'] == 'TRUE' else False c['country'] = c['country']['alpha2'] c['parties'] = c['parties'][0] c['decision_body'] = [e['name'] for e in c['decision_body']] + del c['docname'] del c['attachments'] del c['representedby']