diff --git a/pharme.code-workspace b/pharme.code-workspace index 2bd98956..6683765e 100644 --- a/pharme.code-workspace +++ b/pharme.code-workspace @@ -38,6 +38,7 @@ "amikacin", "anni", "aripiprazole", + "atomoxetine", "atorvastatin", "Backupper", "brandnames", @@ -60,6 +61,7 @@ "drugid", "drugrecommendation", "duloxetine", + "endoxifen", "Ezallor", "fluorouracil", "fullscreen", @@ -113,6 +115,7 @@ "plazomicin", "Proprinal", "pubspec", + "pytest", "RGBO", "rosuvastatin", "Roszet", @@ -138,6 +141,7 @@ "unstage", "unstaged", "userdata", + "venv", "Vicoprofen", "VKORC", "voriconazole", diff --git a/scripts/README.md b/scripts/README.md index aa995669..a6f61633 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -89,9 +89,12 @@ Run `python analyze.py [--correct]` to analyze annotations and optionally correct what can be corrected easily in `_corrected_.base64.json`. -Also checks whether guidelines with same implications / recommendations were -annotated consistently (although this check will not catch similar formulations) -and which bricks are not used in guidelines. +Also checks whether guidelines with the exact same implications / +recommendations were annotated consistently for CPIC guidelines (this check will +not catch similar formulations). +Ignored cases can be defined in `IGNORED_GUIDELINE_INCONSISTENCIES`. + +Additionally lists which bricks are not used in guidelines. ### Drug annotation checks diff --git a/scripts/analyze.py b/scripts/analyze.py index 11ded513..5832db6d 100644 --- a/scripts/analyze.py +++ b/scripts/analyze.py @@ -187,7 +187,6 @@ def run_analyses(): f'* Guidelines: {failed_guideline_annotation_count}\n\n', f'**Inconsistent guidelines**: {inconsistent_guidelines_count}\n\n', *guideline_inconsistency_log, - '\n', 'Missing annotations (search for `_not annotated_`):\n\n', f'* Drugs: {missing_drug_annotation_count}\n', f'* Guidelines: {missing_guideline_annotation_count}\n\n', diff --git a/scripts/analyze_functions/checks/guideline_consistency.py b/scripts/analyze_functions/checks/guideline_consistency.py index d4682b16..ac67b1ef 100644 --- a/scripts/analyze_functions/checks/guideline_consistency.py +++ b/scripts/analyze_functions/checks/guideline_consistency.py @@ -1,3 +1,4 @@ +from analyze_functions.constants import IGNORED_GUIDELINE_INCONSISTENCIES from analyze_functions.data_helpers import get_guideline_content, joint_implication_text from common.get_data import get_phenotype @@ -49,12 +50,26 @@ def check_guideline_consistencies(guideline_check_args_list): inconsistent_guidelines_count = 0 log_content = [] for guideline_key, check_args_list in check_args_per_external_guideline.items(): + if (guideline_key.startswith('fda-table-pharmacogenetic-associations')): + continue if (len(check_args_list) < 2): continue same_guideline_annotations = _group_annotations_by_guideline_content( check_args_list, ) inconsistency_log_content = [] for same_guideline_key, guideline_content in same_guideline_annotations.items(): + skip_definitions = list(filter( + lambda ignored_case: ignored_case['guideline'] == guideline_key \ + and same_guideline_key.lower().startswith(ignored_case['type']) \ + and same_guideline_key.endswith(f'"{ignored_case["text"]}"'), + IGNORED_GUIDELINE_INCONSISTENCIES, + )) + if len(skip_definitions) > 1: + print('WARNING: Got multiple applying consistency check skip ' + 'definitions, this should not happen' + ) + if len(skip_definitions) > 0: + continue unique_guideline_content = set(guideline_content.keys()) if len(unique_guideline_content) != 1: inconsistency_log_content += f' * {same_guideline_key} maps to:\n' @@ -66,4 +81,6 @@ def check_guideline_consistencies(guideline_check_args_list): log_content += f'* {guideline_key}\n' for inconsistency in inconsistency_log_content: log_content += inconsistency + if inconsistent_guidelines_count > 0: + log_content.append('\n') return inconsistent_guidelines_count, log_content \ No newline at end of file diff --git a/scripts/analyze_functions/constants.py b/scripts/analyze_functions/constants.py index 01c8f2d1..c9ee3a78 100644 --- a/scripts/analyze_functions/constants.py +++ b/scripts/analyze_functions/constants.py @@ -11,6 +11,32 @@ }, ] +IMPLICATION_TYPE = 'implication' +RECOMMENDATION_TYPE = 'recommendation' + +IGNORED_GUIDELINE_INCONSISTENCIES = [ + { + 'guideline': 'guideline-for-tricyclic-antidepressants-and-cyp2d6-and-cyp2c19', + 'type': RECOMMENDATION_TYPE, + 'text': 'No recommendation', + }, + { + 'guideline': 'cpic-guideline-for-tamoxifen-based-on-cyp2d6-genotype', + 'type': IMPLICATION_TYPE, + 'text': 'therapeutic endoxifen concentrations', + }, + { + 'guideline': 'cpic-guideline-for-atomoxetine-based-on-cyp2d6-genotype', + 'type': IMPLICATION_TYPE, + 'text': 'normal metabolizers of #drug-name have a lower likelihood of response as compared to poor metabolizers. this is associated with increased discontinuation due to lack of efficacy as compared to poor metabolizers.', + }, + { + 'guideline': 'guideline-for-phenytoin-and-cyp2c9-and-hla-b', + 'type': IMPLICATION_TYPE, + 'text': 'n/a', + }, +] + IGNORE_STAGED_CHECK = [ 'amikacin', 'gentamicin',