From e453548fe69662b2e66dfd87d0a7ee94c270a995 Mon Sep 17 00:00:00 2001 From: Tamara Slosarek Date: Fri, 11 Oct 2024 17:52:34 +0200 Subject: [PATCH] feat(scripts): add any fallback guideline check --- scripts/README.md | 3 +- scripts/{run_analysis.py => analyze.py} | 33 +++++++++++-------- scripts/analyze/checks/consult.py | 4 --- .../checks/brand_name_whitespace.py | 3 +- scripts/analyze_functions/checks/consult.py | 5 +++ .../metabolization_before_consequence.py | 5 +-- .../checks/metabolization_severity.py | 6 ++-- .../checks/single_any_fallback_guideline.py | 23 +++++++++++++ .../checks/warning_levels.py | 14 +++++--- .../constants.py | 0 .../corrections/brand_name_whitespace.py | 0 .../corrections/consult.py | 4 +-- .../data_helpers.py | 0 13 files changed, 69 insertions(+), 31 deletions(-) rename scripts/{run_analysis.py => analyze.py} (83%) delete mode 100644 scripts/analyze/checks/consult.py rename scripts/{analyze => analyze_functions}/checks/brand_name_whitespace.py (75%) create mode 100644 scripts/analyze_functions/checks/consult.py rename scripts/{analyze => analyze_functions}/checks/metabolization_before_consequence.py (84%) rename scripts/{analyze => analyze_functions}/checks/metabolization_severity.py (87%) create mode 100644 scripts/analyze_functions/checks/single_any_fallback_guideline.py rename scripts/{analyze => analyze_functions}/checks/warning_levels.py (84%) rename scripts/{analyze => analyze_functions}/constants.py (100%) rename scripts/{analyze => analyze_functions}/corrections/brand_name_whitespace.py (100%) rename scripts/{analyze => analyze_functions}/corrections/consult.py (76%) rename scripts/{analyze => analyze_functions}/data_helpers.py (100%) diff --git a/scripts/README.md b/scripts/README.md index 4c0c99a3..8040dad1 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -85,7 +85,7 @@ Run `python clean.py` to remove the `scripts/temp` directory and all files in ## Analyze (and correct) annotations -Run `python run_analysis.py [--correct]` to analyze annotations +Run `python analyze.py [--correct]` to analyze annotations and optionally correct what can be corrected easily in `_corrected_.base64.json`. @@ -94,6 +94,7 @@ and optionally correct what can be corrected easily in | Check | Description | `--correct`ed | Only for single-gene results* | | ----- | ----------- | ------------- | ----------------------------- | | `brand_whitespace` | Drug brand names should not have leading or trailing white space. | ✅ | ❌ | +| `single_any_fallback` | If any fallback guidelines `*` are present, only one guideline should be present (otherwise other guidelines are ignored) | ❌ | ❌ | ### Guideline annotation checks diff --git a/scripts/run_analysis.py b/scripts/analyze.py similarity index 83% rename from scripts/run_analysis.py rename to scripts/analyze.py index 9a94fd1c..8a94f1e7 100644 --- a/scripts/run_analysis.py +++ b/scripts/analyze.py @@ -1,23 +1,25 @@ import sys -from analyze.checks.brand_name_whitespace import check_brand_name_whitespace -from analyze.checks.metabolization_before_consequence import check_metabolization_before_consequence -from analyze.checks.warning_levels import check_green_warning_level, \ +from analyze_functions.checks.brand_name_whitespace import check_brand_name_whitespace +from analyze_functions.checks.metabolization_before_consequence import check_metabolization_before_consequence +from analyze_functions.checks.single_any_fallback_guideline import check_single_any_fallback_guideline +from analyze_functions.checks.warning_levels import check_green_warning_level, \ check_none_warning_level, check_red_warning_level, \ check_yellow_warning_level -from analyze.checks.consult import has_consult -from analyze.checks.metabolization_severity import check_metabolization_severity +from analyze_functions.checks.consult import has_consult +from analyze_functions.checks.metabolization_severity import check_metabolization_severity -from analyze.corrections.consult import add_consult -from analyze.corrections.brand_name_whitespace import correct_brand_name_whitespace +from analyze_functions.corrections.consult import add_consult +from analyze_functions.corrections.brand_name_whitespace import correct_brand_name_whitespace -from analyze.data_helpers import get_drug_annotations, get_guideline_annotations, has_annotations +from analyze_functions.data_helpers import get_drug_annotations, get_guideline_annotations, has_annotations from common.constants import DRUG_COLLECTION_NAME, SCRIPT_POSTFIXES from common.get_data import get_data, get_guideline_by_id, get_phenotype_key from common.write_data import write_data, write_log DRUG_CHECKS = { 'brand_whitespace': check_brand_name_whitespace, + 'single_any_fallback': check_single_any_fallback_guideline, } DRUG_CORRECTIONS = { @@ -38,11 +40,14 @@ 'has_consult': add_consult, } - -def analyze_annotations(item, annotations, checks): +def analyze_annotations(item, annotations, checks, data = None): results = {} for check_name, check_function in checks.items(): - results[check_name] = check_function(item, annotations) + results[check_name] = check_function({ + 'item': item, + 'annotations': annotations, + 'data': data, + }) return results def correct_inconsistency(data, item, check_name, corrections): @@ -87,7 +92,7 @@ def handle_failed_checks( log_all_passed(log_content, postfix=skipped_checks_string) return len(skipped_checks), len(failed_checks) -def main(): +def run_analyses(): correct_inconsistencies = '--correct' in sys.argv data = get_data() missing_drug_annotation_count = 0 @@ -106,7 +111,7 @@ def main(): log_not_annotated(log_content) else: drug_result = analyze_annotations( - drug, drug_annotations, DRUG_CHECKS) + drug, drug_annotations, DRUG_CHECKS, data) if not all(drug_result.values()): skipped, failed = handle_failed_checks(data, drug, drug_result, DRUG_CORRECTIONS, correct_inconsistencies, @@ -154,4 +159,4 @@ def main(): write_data(data, postfix=SCRIPT_POSTFIXES['correct']) if __name__ == '__main__': - main() \ No newline at end of file + run_analyses() \ No newline at end of file diff --git a/scripts/analyze/checks/consult.py b/scripts/analyze/checks/consult.py deleted file mode 100644 index 259d904f..00000000 --- a/scripts/analyze/checks/consult.py +++ /dev/null @@ -1,4 +0,0 @@ -from analyze.constants import CONSULT_TEXT - -def has_consult(_, annotations): - return CONSULT_TEXT in annotations['recommendation'] \ No newline at end of file diff --git a/scripts/analyze/checks/brand_name_whitespace.py b/scripts/analyze_functions/checks/brand_name_whitespace.py similarity index 75% rename from scripts/analyze/checks/brand_name_whitespace.py rename to scripts/analyze_functions/checks/brand_name_whitespace.py index 3c1e41a8..751ecf5c 100644 --- a/scripts/analyze/checks/brand_name_whitespace.py +++ b/scripts/analyze_functions/checks/brand_name_whitespace.py @@ -1,4 +1,5 @@ -def check_brand_name_whitespace(_, annotations): +def check_brand_name_whitespace(args): + annotations = args['annotations'] check_applies = True for brand_name in annotations['brand_names']: trimmed_name = brand_name.strip() diff --git a/scripts/analyze_functions/checks/consult.py b/scripts/analyze_functions/checks/consult.py new file mode 100644 index 00000000..5bc87db3 --- /dev/null +++ b/scripts/analyze_functions/checks/consult.py @@ -0,0 +1,5 @@ +from analyze_functions.constants import CONSULT_TEXT + +def has_consult(args): + annotations = args['annotations'] + return CONSULT_TEXT in annotations['recommendation'] \ No newline at end of file diff --git a/scripts/analyze/checks/metabolization_before_consequence.py b/scripts/analyze_functions/checks/metabolization_before_consequence.py similarity index 84% rename from scripts/analyze/checks/metabolization_before_consequence.py rename to scripts/analyze_functions/checks/metabolization_before_consequence.py index 3c84ec19..7fdc4208 100644 --- a/scripts/analyze/checks/metabolization_before_consequence.py +++ b/scripts/analyze_functions/checks/metabolization_before_consequence.py @@ -1,4 +1,4 @@ -import analyze.constants as constants +import analyze_functions.constants as constants def _get_first_substring_position(string, substrings): positions = list(filter( @@ -11,7 +11,8 @@ def _get_first_substring_position(string, substrings): if (len(positions) == 0): return None return min(positions) -def check_metabolization_before_consequence(_, annotations): +def check_metabolization_before_consequence(args): + annotations = args['annotations'] implication = annotations['implication'] metabolization_position = _get_first_substring_position( implication, diff --git a/scripts/analyze/checks/metabolization_severity.py b/scripts/analyze_functions/checks/metabolization_severity.py similarity index 87% rename from scripts/analyze/checks/metabolization_severity.py rename to scripts/analyze_functions/checks/metabolization_severity.py index 2f26cacd..73691909 100644 --- a/scripts/analyze/checks/metabolization_severity.py +++ b/scripts/analyze_functions/checks/metabolization_severity.py @@ -1,6 +1,8 @@ -import analyze.constants as constants +import analyze_functions.constants as constants -def check_metabolization_severity(guideline, annotations): +def check_metabolization_severity(args): + guideline = args['item'] + annotations = args['annotations'] ignored_phenotypes = ['no result', 'indeterminate', 'normal metabolizer'] multiple_relevant_phenotypes = False relevant_gene = None diff --git a/scripts/analyze_functions/checks/single_any_fallback_guideline.py b/scripts/analyze_functions/checks/single_any_fallback_guideline.py new file mode 100644 index 00000000..4e7a8474 --- /dev/null +++ b/scripts/analyze_functions/checks/single_any_fallback_guideline.py @@ -0,0 +1,23 @@ +from common.get_data import get_guideline_by_id + +def check_single_any_fallback_guideline(args): + drug = args['item'] + data = args['data'] + guidelines = list(map( + lambda guideline_id: get_guideline_by_id(data, guideline_id), + drug['guidelines'], + )) + has_any_fallback = False + for guideline in guidelines: + for gene in guideline['lookupkey'].keys(): + for lookupValue in guideline['lookupkey'][gene]: + is_any_fallback = lookupValue == '*' + if is_any_fallback and len(guideline['lookupkey'][gene]) != 1: + print(gene) + print(guideline['lookupkey']) + print( + '[WARNING] Multiple lookupkeys with present "any ' + 'fallback", all other than * are ignored' + ) + has_any_fallback = has_any_fallback or is_any_fallback + return not has_any_fallback or len(guidelines) == 1 \ No newline at end of file diff --git a/scripts/analyze/checks/warning_levels.py b/scripts/analyze_functions/checks/warning_levels.py similarity index 84% rename from scripts/analyze/checks/warning_levels.py rename to scripts/analyze_functions/checks/warning_levels.py index 19f790a1..22e82c08 100644 --- a/scripts/analyze/checks/warning_levels.py +++ b/scripts/analyze_functions/checks/warning_levels.py @@ -1,4 +1,4 @@ -import analyze.constants as constants +import analyze_functions.constants as constants def should_be_red(annotations): return constants.RED_TEXT in annotations['recommendation'] and all(map( @@ -24,25 +24,29 @@ def should_be_green(annotations): constants.GREEN_TEXTS, )) -def check_red_warning_level(_, annotations): +def check_red_warning_level(args): + annotations = args['annotations'] has_warning_level = annotations['warning_level'] == 'red' should_have_warning_level = should_be_red(annotations) return has_warning_level == should_have_warning_level -def check_yellow_warning_level(_, annotations): +def check_yellow_warning_level(args): + annotations = args['annotations'] has_warning_level = annotations['warning_level'] == 'yellow' should_have_warning_level = not should_be_red(annotations) and \ should_be_yellow(annotations) return has_warning_level if should_have_warning_level else True -def check_green_warning_level(_, annotations): +def check_green_warning_level(args): + annotations = args['annotations'] has_warning_level = annotations['warning_level'] == 'green' should_have_warning_level = not should_be_red(annotations) and \ not should_be_yellow(annotations) and \ should_be_green(annotations) return has_warning_level == should_have_warning_level -def check_none_warning_level(_, annotations): +def check_none_warning_level(args): + annotations = args['annotations'] has_warning_level = annotations['warning_level'] == 'none' should_have_warning_level = not should_be_red(annotations) and \ not should_be_yellow(annotations) and \ diff --git a/scripts/analyze/constants.py b/scripts/analyze_functions/constants.py similarity index 100% rename from scripts/analyze/constants.py rename to scripts/analyze_functions/constants.py diff --git a/scripts/analyze/corrections/brand_name_whitespace.py b/scripts/analyze_functions/corrections/brand_name_whitespace.py similarity index 100% rename from scripts/analyze/corrections/brand_name_whitespace.py rename to scripts/analyze_functions/corrections/brand_name_whitespace.py diff --git a/scripts/analyze/corrections/consult.py b/scripts/analyze_functions/corrections/consult.py similarity index 76% rename from scripts/analyze/corrections/consult.py rename to scripts/analyze_functions/corrections/consult.py index c2deb542..096fef4a 100644 --- a/scripts/analyze/corrections/consult.py +++ b/scripts/analyze_functions/corrections/consult.py @@ -1,7 +1,7 @@ -from analyze.data_helpers import ensure_unique_item, get_english_text +from analyze_functions.data_helpers import ensure_unique_item, get_english_text from common.constants import BRICK_COLLECTION_NAME -from analyze.constants import CONSULT_TEXT +from analyze_functions.constants import CONSULT_TEXT def get_consult_brick(data): brick_filter = filter( diff --git a/scripts/analyze/data_helpers.py b/scripts/analyze_functions/data_helpers.py similarity index 100% rename from scripts/analyze/data_helpers.py rename to scripts/analyze_functions/data_helpers.py