From e453548fe69662b2e66dfd87d0a7ee94c270a995 Mon Sep 17 00:00:00 2001
From: Tamara Slosarek <tamara.slosarek@hpi.de>
Date: Fri, 11 Oct 2024 17:52:34 +0200
Subject: [PATCH] feat(scripts): add any fallback guideline check

---
 scripts/README.md                             |  3 +-
 scripts/{run_analysis.py => analyze.py}       | 33 +++++++++++--------
 scripts/analyze/checks/consult.py             |  4 ---
 .../checks/brand_name_whitespace.py           |  3 +-
 scripts/analyze_functions/checks/consult.py   |  5 +++
 .../metabolization_before_consequence.py      |  5 +--
 .../checks/metabolization_severity.py         |  6 ++--
 .../checks/single_any_fallback_guideline.py   | 23 +++++++++++++
 .../checks/warning_levels.py                  | 14 +++++---
 .../constants.py                              |  0
 .../corrections/brand_name_whitespace.py      |  0
 .../corrections/consult.py                    |  4 +--
 .../data_helpers.py                           |  0
 13 files changed, 69 insertions(+), 31 deletions(-)
 rename scripts/{run_analysis.py => analyze.py} (83%)
 delete mode 100644 scripts/analyze/checks/consult.py
 rename scripts/{analyze => analyze_functions}/checks/brand_name_whitespace.py (75%)
 create mode 100644 scripts/analyze_functions/checks/consult.py
 rename scripts/{analyze => analyze_functions}/checks/metabolization_before_consequence.py (84%)
 rename scripts/{analyze => analyze_functions}/checks/metabolization_severity.py (87%)
 create mode 100644 scripts/analyze_functions/checks/single_any_fallback_guideline.py
 rename scripts/{analyze => analyze_functions}/checks/warning_levels.py (84%)
 rename scripts/{analyze => analyze_functions}/constants.py (100%)
 rename scripts/{analyze => analyze_functions}/corrections/brand_name_whitespace.py (100%)
 rename scripts/{analyze => analyze_functions}/corrections/consult.py (76%)
 rename scripts/{analyze => analyze_functions}/data_helpers.py (100%)
diff --git a/scripts/README.md b/scripts/README.md
index 4c0c99a3..8040dad1 100644
--- a/scripts/README.md
+++ b/scripts/README.md
@@ -85,7 +85,7 @@ Run `python clean.py` to remove the `scripts/temp` directory and all files in
 
 ## Analyze (and correct) annotations
 
-Run `python run_analysis.py <PATH_TO_BACKUP> [--correct]` to analyze annotations
+Run `python analyze.py <PATH_TO_BACKUP> [--correct]` to analyze annotations
 and optionally correct what can be corrected easily in
 `<PATH_TO_BACKUP>_corrected_<TIMESTAMP>.base64.json`.
 
@@ -94,6 +94,7 @@ and optionally correct what can be corrected easily in
 | Check | Description | `--correct`ed | Only for single-gene results* |
 | ----- | ----------- | ------------- | ----------------------------- |
 | `brand_whitespace` | Drug brand names should not have leading or trailing white space. | ✅ | ❌ |
+| `single_any_fallback` | If any fallback guidelines `*` are present, only one guideline should be present (otherwise other guidelines are ignored) | ❌ | ❌ |
 
 ### Guideline annotation checks
 
diff --git a/scripts/run_analysis.py b/scripts/analyze.py
similarity index 83%
rename from scripts/run_analysis.py
rename to scripts/analyze.py
index 9a94fd1c..8a94f1e7 100644
--- a/scripts/run_analysis.py
+++ b/scripts/analyze.py
@@ -1,23 +1,25 @@
 import sys
 
-from analyze.checks.brand_name_whitespace import check_brand_name_whitespace
-from analyze.checks.metabolization_before_consequence import check_metabolization_before_consequence
-from analyze.checks.warning_levels import check_green_warning_level, \
+from analyze_functions.checks.brand_name_whitespace import check_brand_name_whitespace
+from analyze_functions.checks.metabolization_before_consequence import check_metabolization_before_consequence
+from analyze_functions.checks.single_any_fallback_guideline import check_single_any_fallback_guideline
+from analyze_functions.checks.warning_levels import check_green_warning_level, \
     check_none_warning_level, check_red_warning_level, \
         check_yellow_warning_level
-from analyze.checks.consult import has_consult
-from analyze.checks.metabolization_severity import check_metabolization_severity
+from analyze_functions.checks.consult import has_consult
+from analyze_functions.checks.metabolization_severity import check_metabolization_severity
 
-from analyze.corrections.consult import add_consult
-from analyze.corrections.brand_name_whitespace import correct_brand_name_whitespace
+from analyze_functions.corrections.consult import add_consult
+from analyze_functions.corrections.brand_name_whitespace import correct_brand_name_whitespace
 
-from analyze.data_helpers import get_drug_annotations, get_guideline_annotations, has_annotations
+from analyze_functions.data_helpers import get_drug_annotations, get_guideline_annotations, has_annotations
 from common.constants import DRUG_COLLECTION_NAME, SCRIPT_POSTFIXES
 from common.get_data import get_data, get_guideline_by_id, get_phenotype_key
 from common.write_data import write_data, write_log
 
 DRUG_CHECKS = {
     'brand_whitespace': check_brand_name_whitespace,
+    'single_any_fallback': check_single_any_fallback_guideline,
 }
 
 DRUG_CORRECTIONS = {
@@ -38,11 +40,14 @@
     'has_consult': add_consult,
 }
 
-
-def analyze_annotations(item, annotations, checks):
+def analyze_annotations(item, annotations, checks, data = None):
     results = {}
     for check_name, check_function in checks.items():
-        results[check_name] = check_function(item, annotations)
+        results[check_name] = check_function({
+            'item': item,
+            'annotations': annotations,
+            'data': data,
+        })
     return results
 
 def correct_inconsistency(data, item, check_name, corrections):
@@ -87,7 +92,7 @@ def handle_failed_checks(
         log_all_passed(log_content, postfix=skipped_checks_string)
     return len(skipped_checks), len(failed_checks)
 
-def main():
+def run_analyses():
     correct_inconsistencies = '--correct' in sys.argv
     data = get_data()
     missing_drug_annotation_count = 0
@@ -106,7 +111,7 @@ def main():
             log_not_annotated(log_content)
         else:
             drug_result = analyze_annotations(
-                drug, drug_annotations, DRUG_CHECKS)
+                drug, drug_annotations, DRUG_CHECKS, data)
             if not all(drug_result.values()):
                 skipped, failed = handle_failed_checks(data, drug, drug_result,
                     DRUG_CORRECTIONS, correct_inconsistencies,
@@ -154,4 +159,4 @@ def main():
         write_data(data, postfix=SCRIPT_POSTFIXES['correct'])
 
 if __name__ == '__main__':
-    main()
\ No newline at end of file
+    run_analyses()
\ No newline at end of file
diff --git a/scripts/analyze/checks/consult.py b/scripts/analyze/checks/consult.py
deleted file mode 100644
index 259d904f..00000000
--- a/scripts/analyze/checks/consult.py
+++ /dev/null
@@ -1,4 +0,0 @@
-from analyze.constants import CONSULT_TEXT
-
-def has_consult(_, annotations):
-    return CONSULT_TEXT in annotations['recommendation']
\ No newline at end of file
diff --git a/scripts/analyze/checks/brand_name_whitespace.py b/scripts/analyze_functions/checks/brand_name_whitespace.py
similarity index 75%
rename from scripts/analyze/checks/brand_name_whitespace.py
rename to scripts/analyze_functions/checks/brand_name_whitespace.py
index 3c1e41a8..751ecf5c 100644
--- a/scripts/analyze/checks/brand_name_whitespace.py
+++ b/scripts/analyze_functions/checks/brand_name_whitespace.py
@@ -1,4 +1,5 @@
-def check_brand_name_whitespace(_, annotations):
+def check_brand_name_whitespace(args):
+    annotations = args['annotations']
     check_applies = True
     for brand_name in annotations['brand_names']:
         trimmed_name = brand_name.strip()
diff --git a/scripts/analyze_functions/checks/consult.py b/scripts/analyze_functions/checks/consult.py
new file mode 100644
index 00000000..5bc87db3
--- /dev/null
+++ b/scripts/analyze_functions/checks/consult.py
@@ -0,0 +1,5 @@
+from analyze_functions.constants import CONSULT_TEXT
+
+def has_consult(args):
+    annotations = args['annotations']
+    return CONSULT_TEXT in annotations['recommendation']
\ No newline at end of file
diff --git a/scripts/analyze/checks/metabolization_before_consequence.py b/scripts/analyze_functions/checks/metabolization_before_consequence.py
similarity index 84%
rename from scripts/analyze/checks/metabolization_before_consequence.py
rename to scripts/analyze_functions/checks/metabolization_before_consequence.py
index 3c84ec19..7fdc4208 100644
--- a/scripts/analyze/checks/metabolization_before_consequence.py
+++ b/scripts/analyze_functions/checks/metabolization_before_consequence.py
@@ -1,4 +1,4 @@
-import analyze.constants as constants
+import analyze_functions.constants as constants
 
 def _get_first_substring_position(string, substrings):
     positions = list(filter(
@@ -11,7 +11,8 @@ def _get_first_substring_position(string, substrings):
     if (len(positions) == 0): return None
     return min(positions)
 
-def check_metabolization_before_consequence(_, annotations):
+def check_metabolization_before_consequence(args):
+    annotations = args['annotations']
     implication = annotations['implication']
     metabolization_position = _get_first_substring_position(
         implication,
diff --git a/scripts/analyze/checks/metabolization_severity.py b/scripts/analyze_functions/checks/metabolization_severity.py
similarity index 87%
rename from scripts/analyze/checks/metabolization_severity.py
rename to scripts/analyze_functions/checks/metabolization_severity.py
index 2f26cacd..73691909 100644
--- a/scripts/analyze/checks/metabolization_severity.py
+++ b/scripts/analyze_functions/checks/metabolization_severity.py
@@ -1,6 +1,8 @@
-import analyze.constants as constants
+import analyze_functions.constants as constants
 
-def check_metabolization_severity(guideline, annotations):
+def check_metabolization_severity(args):
+    guideline = args['item']
+    annotations = args['annotations']
     ignored_phenotypes = ['no result', 'indeterminate', 'normal metabolizer']
     multiple_relevant_phenotypes = False
     relevant_gene = None
diff --git a/scripts/analyze_functions/checks/single_any_fallback_guideline.py b/scripts/analyze_functions/checks/single_any_fallback_guideline.py
new file mode 100644
index 00000000..4e7a8474
--- /dev/null
+++ b/scripts/analyze_functions/checks/single_any_fallback_guideline.py
@@ -0,0 +1,23 @@
+from common.get_data import get_guideline_by_id
+
+def check_single_any_fallback_guideline(args):
+    drug = args['item']
+    data = args['data']
+    guidelines = list(map(
+        lambda guideline_id: get_guideline_by_id(data, guideline_id),
+        drug['guidelines'],
+    ))
+    has_any_fallback = False
+    for guideline in guidelines:
+        for gene in guideline['lookupkey'].keys():
+            for lookupValue in guideline['lookupkey'][gene]:
+                is_any_fallback = lookupValue == '*'
+                if is_any_fallback and len(guideline['lookupkey'][gene]) != 1:
+                    print(gene)
+                    print(guideline['lookupkey'])
+                    print(
+                        '[WARNING] Multiple lookupkeys with present "any '
+                        'fallback", all other than * are ignored'    
+                    )
+                has_any_fallback = has_any_fallback or is_any_fallback                
+    return not has_any_fallback or len(guidelines) == 1
\ No newline at end of file
diff --git a/scripts/analyze/checks/warning_levels.py b/scripts/analyze_functions/checks/warning_levels.py
similarity index 84%
rename from scripts/analyze/checks/warning_levels.py
rename to scripts/analyze_functions/checks/warning_levels.py
index 19f790a1..22e82c08 100644
--- a/scripts/analyze/checks/warning_levels.py
+++ b/scripts/analyze_functions/checks/warning_levels.py
@@ -1,4 +1,4 @@
-import analyze.constants as constants
+import analyze_functions.constants as constants
 
 def should_be_red(annotations):
     return constants.RED_TEXT in annotations['recommendation'] and all(map(
@@ -24,25 +24,29 @@ def should_be_green(annotations):
         constants.GREEN_TEXTS,
     ))
 
-def check_red_warning_level(_, annotations):
+def check_red_warning_level(args):
+    annotations = args['annotations']
     has_warning_level = annotations['warning_level'] == 'red'
     should_have_warning_level = should_be_red(annotations)
     return has_warning_level == should_have_warning_level
 
-def check_yellow_warning_level(_, annotations):
+def check_yellow_warning_level(args):
+    annotations = args['annotations']
     has_warning_level = annotations['warning_level'] == 'yellow'
     should_have_warning_level = not should_be_red(annotations) and \
         should_be_yellow(annotations)
     return has_warning_level if should_have_warning_level else True
 
-def check_green_warning_level(_, annotations):
+def check_green_warning_level(args):
+    annotations = args['annotations']
     has_warning_level = annotations['warning_level'] == 'green'
     should_have_warning_level = not should_be_red(annotations) and \
         not should_be_yellow(annotations) and \
         should_be_green(annotations)
     return has_warning_level == should_have_warning_level
 
-def check_none_warning_level(_, annotations):
+def check_none_warning_level(args):
+    annotations = args['annotations']
     has_warning_level = annotations['warning_level'] == 'none'
     should_have_warning_level = not should_be_red(annotations) and \
         not should_be_yellow(annotations) and \
diff --git a/scripts/analyze/constants.py b/scripts/analyze_functions/constants.py
similarity index 100%
rename from scripts/analyze/constants.py
rename to scripts/analyze_functions/constants.py
diff --git a/scripts/analyze/corrections/brand_name_whitespace.py b/scripts/analyze_functions/corrections/brand_name_whitespace.py
similarity index 100%
rename from scripts/analyze/corrections/brand_name_whitespace.py
rename to scripts/analyze_functions/corrections/brand_name_whitespace.py
diff --git a/scripts/analyze/corrections/consult.py b/scripts/analyze_functions/corrections/consult.py
similarity index 76%
rename from scripts/analyze/corrections/consult.py
rename to scripts/analyze_functions/corrections/consult.py
index c2deb542..096fef4a 100644
--- a/scripts/analyze/corrections/consult.py
+++ b/scripts/analyze_functions/corrections/consult.py
@@ -1,7 +1,7 @@
-from analyze.data_helpers import ensure_unique_item, get_english_text
+from analyze_functions.data_helpers import ensure_unique_item, get_english_text
 from common.constants import BRICK_COLLECTION_NAME
 
-from analyze.constants import CONSULT_TEXT
+from analyze_functions.constants import CONSULT_TEXT
 
 def get_consult_brick(data):
     brick_filter = filter(
diff --git a/scripts/analyze/data_helpers.py b/scripts/analyze_functions/data_helpers.py
similarity index 100%
rename from scripts/analyze/data_helpers.py
rename to scripts/analyze_functions/data_helpers.py