From 60dc2e3f019e74877e2604f4179f8cff9b2106c7 Mon Sep 17 00:00:00 2001 From: Alexandre Quemy Date: Thu, 17 Feb 2022 09:37:15 +0100 Subject: [PATCH] Save the list of cases after a build for reproducibility #186 --- echr/steps/generate_sqlite.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/echr/steps/generate_sqlite.py b/echr/steps/generate_sqlite.py index 2c7233e..940bb2e 100644 --- a/echr/steps/generate_sqlite.py +++ b/echr/steps/generate_sqlite.py @@ -59,10 +59,19 @@ def get_files(doc_ids, input_folder): return cases_files +def save_cases_list(build, cases_files): + path = os.path.join(build, 'build_cases.txt') + with open(path, 'w') as f: + for l in cases_files: + f.write('{}\n'.format(l.split('/')[-1].split('_')[0])) + + + def populate_database(console, build, update, doc_ids): input_folder = os.path.join(build, 'raw', 'preprocessed_documents') cases_files = get_files(doc_ids, input_folder) + save_cases_list(build, cases_files) db.connect() if True: