feat(scripts): add any fallback guideline check

hpi-dhc · Oct 11, 2024 · e453548 · e453548
1 parent 72f9952
commit e453548
Show file tree

Hide file tree

Showing 13 changed files with 69 additions and 31 deletions.
diff --git a/scripts/README.md b/scripts/README.md
@@ -85,7 +85,7 @@ Run `python clean.py` to remove the `scripts/temp` directory and all files in
 
 ## Analyze (and correct) annotations
 
-Run `python run_analysis.py <PATH_TO_BACKUP> [--correct]` to analyze annotations
+Run `python analyze.py <PATH_TO_BACKUP> [--correct]` to analyze annotations
 and optionally correct what can be corrected easily in
 `<PATH_TO_BACKUP>_corrected_<TIMESTAMP>.base64.json`.
 
@@ -94,6 +94,7 @@ and optionally correct what can be corrected easily in
 | Check | Description | `--correct`ed | Only for single-gene results* |
 | ----- | ----------- | ------------- | ----------------------------- |
 | `brand_whitespace` | Drug brand names should not have leading or trailing white space. | ✅ | ❌ |
+| `single_any_fallback` | If any fallback guidelines `*` are present, only one guideline should be present (otherwise other guidelines are ignored) | ❌ | ❌ |
 
 ### Guideline annotation checks
 

diff --git a/scripts/run_analysis.py → scripts/analyze.py b/scripts/run_analysis.py → scripts/analyze.py
@@ -1,23 +1,25 @@
 import sys
 
-from analyze.checks.brand_name_whitespace import check_brand_name_whitespace
-from analyze.checks.metabolization_before_consequence import check_metabolization_before_consequence
-from analyze.checks.warning_levels import check_green_warning_level, \
+from analyze_functions.checks.brand_name_whitespace import check_brand_name_whitespace
+from analyze_functions.checks.metabolization_before_consequence import check_metabolization_before_consequence
+from analyze_functions.checks.single_any_fallback_guideline import check_single_any_fallback_guideline
+from analyze_functions.checks.warning_levels import check_green_warning_level, \
     check_none_warning_level, check_red_warning_level, \
         check_yellow_warning_level
-from analyze.checks.consult import has_consult
-from analyze.checks.metabolization_severity import check_metabolization_severity
+from analyze_functions.checks.consult import has_consult
+from analyze_functions.checks.metabolization_severity import check_metabolization_severity
 
-from analyze.corrections.consult import add_consult
-from analyze.corrections.brand_name_whitespace import correct_brand_name_whitespace
+from analyze_functions.corrections.consult import add_consult
+from analyze_functions.corrections.brand_name_whitespace import correct_brand_name_whitespace
 
-from analyze.data_helpers import get_drug_annotations, get_guideline_annotations, has_annotations
+from analyze_functions.data_helpers import get_drug_annotations, get_guideline_annotations, has_annotations
 from common.constants import DRUG_COLLECTION_NAME, SCRIPT_POSTFIXES
 from common.get_data import get_data, get_guideline_by_id, get_phenotype_key
 from common.write_data import write_data, write_log
 
 DRUG_CHECKS = {
     'brand_whitespace': check_brand_name_whitespace,
+    'single_any_fallback': check_single_any_fallback_guideline,
 }
 
 DRUG_CORRECTIONS = {
@@ -38,11 +40,14 @@
     'has_consult': add_consult,
 }
 
-
-def analyze_annotations(item, annotations, checks):
+def analyze_annotations(item, annotations, checks, data = None):
     results = {}
     for check_name, check_function in checks.items():
-        results[check_name] = check_function(item, annotations)
+        results[check_name] = check_function({
+            'item': item,
+            'annotations': annotations,
+            'data': data,
+        })
     return results
 
 def correct_inconsistency(data, item, check_name, corrections):
@@ -87,7 +92,7 @@ def handle_failed_checks(
         log_all_passed(log_content, postfix=skipped_checks_string)
     return len(skipped_checks), len(failed_checks)
 
-def main():
+def run_analyses():
     correct_inconsistencies = '--correct' in sys.argv
     data = get_data()
     missing_drug_annotation_count = 0
@@ -106,7 +111,7 @@ def main():
             log_not_annotated(log_content)
         else:
             drug_result = analyze_annotations(
-                drug, drug_annotations, DRUG_CHECKS)
+                drug, drug_annotations, DRUG_CHECKS, data)
             if not all(drug_result.values()):
                 skipped, failed = handle_failed_checks(data, drug, drug_result,
                     DRUG_CORRECTIONS, correct_inconsistencies,
@@ -154,4 +159,4 @@ def main():
         write_data(data, postfix=SCRIPT_POSTFIXES['correct'])
 
 if __name__ == '__main__':
-    main()
+    run_analyses()
diff --git a/scripts/analyze/checks/consult.py b/scripts/analyze/checks/consult.py
diff --git a/...s/analyze/checks/brand_name_whitespace.py → ...functions/checks/brand_name_whitespace.py b/...s/analyze/checks/brand_name_whitespace.py → ...functions/checks/brand_name_whitespace.py
@@ -1,4 +1,5 @@
-def check_brand_name_whitespace(_, annotations):
+def check_brand_name_whitespace(args):
+    annotations = args['annotations']
     check_applies = True
     for brand_name in annotations['brand_names']:
         trimmed_name = brand_name.strip()

diff --git a/scripts/analyze_functions/checks/consult.py b/scripts/analyze_functions/checks/consult.py
@@ -0,0 +1,5 @@
+from analyze_functions.constants import CONSULT_TEXT
+
+def has_consult(args):
+    annotations = args['annotations']
+    return CONSULT_TEXT in annotations['recommendation']
diff --git a/...ecks/metabolization_before_consequence.py → ...ecks/metabolization_before_consequence.py b/...ecks/metabolization_before_consequence.py → ...ecks/metabolization_before_consequence.py
@@ -1,4 +1,4 @@
-import analyze.constants as constants
+import analyze_functions.constants as constants
 
 def _get_first_substring_position(string, substrings):
     positions = list(filter(
@@ -11,7 +11,8 @@ def _get_first_substring_position(string, substrings):
     if (len(positions) == 0): return None
     return min(positions)
 
-def check_metabolization_before_consequence(_, annotations):
+def check_metabolization_before_consequence(args):
+    annotations = args['annotations']
     implication = annotations['implication']
     metabolization_position = _get_first_substring_position(
         implication,

diff --git a/...analyze/checks/metabolization_severity.py → ...nctions/checks/metabolization_severity.py b/...analyze/checks/metabolization_severity.py → ...nctions/checks/metabolization_severity.py
@@ -1,6 +1,8 @@
-import analyze.constants as constants
+import analyze_functions.constants as constants
 
-def check_metabolization_severity(guideline, annotations):
+def check_metabolization_severity(args):
+    guideline = args['item']
+    annotations = args['annotations']
     ignored_phenotypes = ['no result', 'indeterminate', 'normal metabolizer']
     multiple_relevant_phenotypes = False
     relevant_gene = None

diff --git a/scripts/analyze_functions/checks/single_any_fallback_guideline.py b/scripts/analyze_functions/checks/single_any_fallback_guideline.py
@@ -0,0 +1,23 @@
+from common.get_data import get_guideline_by_id
+
+def check_single_any_fallback_guideline(args):
+    drug = args['item']
+    data = args['data']
+    guidelines = list(map(
+        lambda guideline_id: get_guideline_by_id(data, guideline_id),
+        drug['guidelines'],
+    ))
+    has_any_fallback = False
+    for guideline in guidelines:
+        for gene in guideline['lookupkey'].keys():
+            for lookupValue in guideline['lookupkey'][gene]:
+                is_any_fallback = lookupValue == '*'
+                if is_any_fallback and len(guideline['lookupkey'][gene]) != 1:
+                    print(gene)
+                    print(guideline['lookupkey'])
+                    print(
+                        '[WARNING] Multiple lookupkeys with present "any '
+                        'fallback", all other than * are ignored'    
+                    )
+                has_any_fallback = has_any_fallback or is_any_fallback                
+    return not has_any_fallback or len(guidelines) == 1
diff --git a/scripts/analyze/checks/warning_levels.py → ...nalyze_functions/checks/warning_levels.py b/scripts/analyze/checks/warning_levels.py → ...nalyze_functions/checks/warning_levels.py
@@ -1,4 +1,4 @@
-import analyze.constants as constants
+import analyze_functions.constants as constants
 
 def should_be_red(annotations):
     return constants.RED_TEXT in annotations['recommendation'] and all(map(
@@ -24,25 +24,29 @@ def should_be_green(annotations):
         constants.GREEN_TEXTS,
     ))
 
-def check_red_warning_level(_, annotations):
+def check_red_warning_level(args):
+    annotations = args['annotations']
     has_warning_level = annotations['warning_level'] == 'red'
     should_have_warning_level = should_be_red(annotations)
     return has_warning_level == should_have_warning_level
 
-def check_yellow_warning_level(_, annotations):
+def check_yellow_warning_level(args):
+    annotations = args['annotations']
     has_warning_level = annotations['warning_level'] == 'yellow'
     should_have_warning_level = not should_be_red(annotations) and \
         should_be_yellow(annotations)
     return has_warning_level if should_have_warning_level else True
 
-def check_green_warning_level(_, annotations):
+def check_green_warning_level(args):
+    annotations = args['annotations']
     has_warning_level = annotations['warning_level'] == 'green'
     should_have_warning_level = not should_be_red(annotations) and \
         not should_be_yellow(annotations) and \
         should_be_green(annotations)
     return has_warning_level == should_have_warning_level
 
-def check_none_warning_level(_, annotations):
+def check_none_warning_level(args):
+    annotations = args['annotations']
     has_warning_level = annotations['warning_level'] == 'none'
     should_have_warning_level = not should_be_red(annotations) and \
         not should_be_yellow(annotations) and \

diff --git a/scripts/analyze/constants.py → scripts/analyze_functions/constants.py b/scripts/analyze/constants.py → scripts/analyze_functions/constants.py
diff --git a/...lyze/corrections/brand_name_whitespace.py → ...ions/corrections/brand_name_whitespace.py b/...lyze/corrections/brand_name_whitespace.py → ...ions/corrections/brand_name_whitespace.py
diff --git a/scripts/analyze/corrections/consult.py → .../analyze_functions/corrections/consult.py b/scripts/analyze/corrections/consult.py → .../analyze_functions/corrections/consult.py
@@ -1,7 +1,7 @@
-from analyze.data_helpers import ensure_unique_item, get_english_text
+from analyze_functions.data_helpers import ensure_unique_item, get_english_text
 from common.constants import BRICK_COLLECTION_NAME
 
-from analyze.constants import CONSULT_TEXT
+from analyze_functions.constants import CONSULT_TEXT
 
 def get_consult_brick(data):
     brick_filter = filter(

diff --git a/scripts/analyze/data_helpers.py → scripts/analyze_functions/data_helpers.py b/scripts/analyze/data_helpers.py → scripts/analyze_functions/data_helpers.py