feat(#639): ignore inconsistency checks in specific cases

hpi-dhc · Oct 18, 2024 · c4ebf17 · c4ebf17
1 parent e7707a2
commit c4ebf17
Show file tree

Hide file tree

Showing 5 changed files with 53 additions and 4 deletions.
diff --git a/pharme.code-workspace b/pharme.code-workspace
@@ -38,6 +38,7 @@
       "amikacin",
       "anni",
       "aripiprazole",
+      "atomoxetine",
       "atorvastatin",
       "Backupper",
       "brandnames",
@@ -60,6 +61,7 @@
       "drugid",
       "drugrecommendation",
       "duloxetine",
+      "endoxifen",
       "Ezallor",
       "fluorouracil",
       "fullscreen",
@@ -113,6 +115,7 @@
       "plazomicin",
       "Proprinal",
       "pubspec",
+      "pytest",
       "RGBO",
       "rosuvastatin",
       "Roszet",
@@ -138,6 +141,7 @@
       "unstage",
       "unstaged",
       "userdata",
+      "venv",
       "Vicoprofen",
       "VKORC",
       "voriconazole",

diff --git a/scripts/README.md b/scripts/README.md
@@ -89,9 +89,12 @@ Run `python analyze.py <PATH_TO_BACKUP> [--correct]` to analyze annotations
 and optionally correct what can be corrected easily in
 `<PATH_TO_BACKUP>_corrected_<TIMESTAMP>.base64.json`.
 
-Also checks whether guidelines with same implications / recommendations were
-annotated consistently (although this check will not catch similar formulations)
-and which bricks are not used in guidelines.
+Also checks whether guidelines with the exact same implications /
+recommendations were annotated consistently for CPIC guidelines (this check will
+not catch similar formulations).
+Ignored cases can be defined in `IGNORED_GUIDELINE_INCONSISTENCIES`.
+
+Additionally lists which bricks are not used in guidelines.
 
 ### Drug annotation checks
 

diff --git a/scripts/analyze.py b/scripts/analyze.py
@@ -187,7 +187,6 @@ def run_analyses():
         f'* Guidelines: {failed_guideline_annotation_count}\n\n',
         f'**Inconsistent guidelines**: {inconsistent_guidelines_count}\n\n',
         *guideline_inconsistency_log,
-        '\n',
         'Missing annotations (search for `_not annotated_`):\n\n',
         f'* Drugs: {missing_drug_annotation_count}\n',
         f'* Guidelines: {missing_guideline_annotation_count}\n\n',

diff --git a/scripts/analyze_functions/checks/guideline_consistency.py b/scripts/analyze_functions/checks/guideline_consistency.py
@@ -1,3 +1,4 @@
+from analyze_functions.constants import IGNORED_GUIDELINE_INCONSISTENCIES
 from analyze_functions.data_helpers import get_guideline_content, joint_implication_text
 from common.get_data import get_phenotype
 
@@ -49,12 +50,26 @@ def check_guideline_consistencies(guideline_check_args_list):
     inconsistent_guidelines_count = 0
     log_content = []
     for guideline_key, check_args_list in check_args_per_external_guideline.items():
+        if (guideline_key.startswith('fda-table-pharmacogenetic-associations')):
+            continue
         if (len(check_args_list) < 2): continue
         same_guideline_annotations = _group_annotations_by_guideline_content(
             check_args_list,
         )
         inconsistency_log_content = []
         for same_guideline_key, guideline_content in same_guideline_annotations.items():
+            skip_definitions = list(filter(
+                lambda ignored_case: ignored_case['guideline'] == guideline_key \
+                    and same_guideline_key.lower().startswith(ignored_case['type']) \
+                    and same_guideline_key.endswith(f'"{ignored_case["text"]}"'),
+                IGNORED_GUIDELINE_INCONSISTENCIES,
+            ))
+            if len(skip_definitions) > 1:
+                print('WARNING: Got multiple applying consistency check skip '
+                      'definitions, this should not happen'
+                )
+            if len(skip_definitions) > 0:
+                continue
             unique_guideline_content = set(guideline_content.keys())
             if len(unique_guideline_content) != 1:
                 inconsistency_log_content += f'  * {same_guideline_key} maps to:\n'
@@ -66,4 +81,6 @@ def check_guideline_consistencies(guideline_check_args_list):
             log_content += f'* {guideline_key}\n'
             for inconsistency in inconsistency_log_content:
                 log_content += inconsistency
+    if inconsistent_guidelines_count > 0:
+        log_content.append('\n')
     return inconsistent_guidelines_count, log_content
diff --git a/scripts/analyze_functions/constants.py b/scripts/analyze_functions/constants.py
@@ -11,6 +11,32 @@
     },
 ]
 
+IMPLICATION_TYPE = 'implication'
+RECOMMENDATION_TYPE = 'recommendation'
+
+IGNORED_GUIDELINE_INCONSISTENCIES = [
+    {
+        'guideline': 'guideline-for-tricyclic-antidepressants-and-cyp2d6-and-cyp2c19',
+        'type': RECOMMENDATION_TYPE,
+        'text': 'No recommendation',
+    },
+    {
+        'guideline': 'cpic-guideline-for-tamoxifen-based-on-cyp2d6-genotype',
+        'type': IMPLICATION_TYPE,
+        'text': 'therapeutic endoxifen concentrations',
+    },
+    {
+        'guideline': 'cpic-guideline-for-atomoxetine-based-on-cyp2d6-genotype',
+        'type': IMPLICATION_TYPE,
+        'text': 'normal metabolizers of #drug-name have a lower likelihood of response as compared to poor metabolizers. this is associated with increased discontinuation due to lack of efficacy as compared to poor metabolizers.',
+    },
+    {
+        'guideline': 'guideline-for-phenytoin-and-cyp2c9-and-hla-b',
+        'type': IMPLICATION_TYPE,
+        'text': 'n/a',
+    },
+]
+
 IGNORE_STAGED_CHECK = [
     'amikacin',
     'gentamicin',