Skip to content

Commit

Permalink
feat(scripts): add any fallback guideline check
Browse files Browse the repository at this point in the history
  • Loading branch information
tamslo committed Oct 11, 2024
1 parent 72f9952 commit e453548
Show file tree
Hide file tree
Showing 13 changed files with 69 additions and 31 deletions.
3 changes: 2 additions & 1 deletion scripts/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ Run `python clean.py` to remove the `scripts/temp` directory and all files in

## Analyze (and correct) annotations

Run `python run_analysis.py <PATH_TO_BACKUP> [--correct]` to analyze annotations
Run `python analyze.py <PATH_TO_BACKUP> [--correct]` to analyze annotations
and optionally correct what can be corrected easily in
`<PATH_TO_BACKUP>_corrected_<TIMESTAMP>.base64.json`.

Expand All @@ -94,6 +94,7 @@ and optionally correct what can be corrected easily in
| Check | Description | `--correct`ed | Only for single-gene results* |
| ----- | ----------- | ------------- | ----------------------------- |
| `brand_whitespace` | Drug brand names should not have leading or trailing white space. |||
| `single_any_fallback` | If any fallback guidelines `*` are present, only one guideline should be present (otherwise other guidelines are ignored) |||

### Guideline annotation checks

Expand Down
33 changes: 19 additions & 14 deletions scripts/run_analysis.py → scripts/analyze.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,25 @@
import sys

from analyze.checks.brand_name_whitespace import check_brand_name_whitespace
from analyze.checks.metabolization_before_consequence import check_metabolization_before_consequence
from analyze.checks.warning_levels import check_green_warning_level, \
from analyze_functions.checks.brand_name_whitespace import check_brand_name_whitespace
from analyze_functions.checks.metabolization_before_consequence import check_metabolization_before_consequence
from analyze_functions.checks.single_any_fallback_guideline import check_single_any_fallback_guideline
from analyze_functions.checks.warning_levels import check_green_warning_level, \
check_none_warning_level, check_red_warning_level, \
check_yellow_warning_level
from analyze.checks.consult import has_consult
from analyze.checks.metabolization_severity import check_metabolization_severity
from analyze_functions.checks.consult import has_consult
from analyze_functions.checks.metabolization_severity import check_metabolization_severity

from analyze.corrections.consult import add_consult
from analyze.corrections.brand_name_whitespace import correct_brand_name_whitespace
from analyze_functions.corrections.consult import add_consult
from analyze_functions.corrections.brand_name_whitespace import correct_brand_name_whitespace

from analyze.data_helpers import get_drug_annotations, get_guideline_annotations, has_annotations
from analyze_functions.data_helpers import get_drug_annotations, get_guideline_annotations, has_annotations
from common.constants import DRUG_COLLECTION_NAME, SCRIPT_POSTFIXES
from common.get_data import get_data, get_guideline_by_id, get_phenotype_key
from common.write_data import write_data, write_log

DRUG_CHECKS = {
'brand_whitespace': check_brand_name_whitespace,
'single_any_fallback': check_single_any_fallback_guideline,
}

DRUG_CORRECTIONS = {
Expand All @@ -38,11 +40,14 @@
'has_consult': add_consult,
}


def analyze_annotations(item, annotations, checks):
def analyze_annotations(item, annotations, checks, data = None):
results = {}
for check_name, check_function in checks.items():
results[check_name] = check_function(item, annotations)
results[check_name] = check_function({
'item': item,
'annotations': annotations,
'data': data,
})
return results

def correct_inconsistency(data, item, check_name, corrections):
Expand Down Expand Up @@ -87,7 +92,7 @@ def handle_failed_checks(
log_all_passed(log_content, postfix=skipped_checks_string)
return len(skipped_checks), len(failed_checks)

def main():
def run_analyses():
correct_inconsistencies = '--correct' in sys.argv
data = get_data()
missing_drug_annotation_count = 0
Expand All @@ -106,7 +111,7 @@ def main():
log_not_annotated(log_content)
else:
drug_result = analyze_annotations(
drug, drug_annotations, DRUG_CHECKS)
drug, drug_annotations, DRUG_CHECKS, data)
if not all(drug_result.values()):
skipped, failed = handle_failed_checks(data, drug, drug_result,
DRUG_CORRECTIONS, correct_inconsistencies,
Expand Down Expand Up @@ -154,4 +159,4 @@ def main():
write_data(data, postfix=SCRIPT_POSTFIXES['correct'])

if __name__ == '__main__':
main()
run_analyses()
4 changes: 0 additions & 4 deletions scripts/analyze/checks/consult.py

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
def check_brand_name_whitespace(_, annotations):
def check_brand_name_whitespace(args):
annotations = args['annotations']
check_applies = True
for brand_name in annotations['brand_names']:
trimmed_name = brand_name.strip()
Expand Down
5 changes: 5 additions & 0 deletions scripts/analyze_functions/checks/consult.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from analyze_functions.constants import CONSULT_TEXT

def has_consult(args):
annotations = args['annotations']
return CONSULT_TEXT in annotations['recommendation']
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import analyze.constants as constants
import analyze_functions.constants as constants

def _get_first_substring_position(string, substrings):
positions = list(filter(
Expand All @@ -11,7 +11,8 @@ def _get_first_substring_position(string, substrings):
if (len(positions) == 0): return None
return min(positions)

def check_metabolization_before_consequence(_, annotations):
def check_metabolization_before_consequence(args):
annotations = args['annotations']
implication = annotations['implication']
metabolization_position = _get_first_substring_position(
implication,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import analyze.constants as constants
import analyze_functions.constants as constants

def check_metabolization_severity(guideline, annotations):
def check_metabolization_severity(args):
guideline = args['item']
annotations = args['annotations']
ignored_phenotypes = ['no result', 'indeterminate', 'normal metabolizer']
multiple_relevant_phenotypes = False
relevant_gene = None
Expand Down
23 changes: 23 additions & 0 deletions scripts/analyze_functions/checks/single_any_fallback_guideline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from common.get_data import get_guideline_by_id

def check_single_any_fallback_guideline(args):
drug = args['item']
data = args['data']
guidelines = list(map(
lambda guideline_id: get_guideline_by_id(data, guideline_id),
drug['guidelines'],
))
has_any_fallback = False
for guideline in guidelines:
for gene in guideline['lookupkey'].keys():
for lookupValue in guideline['lookupkey'][gene]:
is_any_fallback = lookupValue == '*'
if is_any_fallback and len(guideline['lookupkey'][gene]) != 1:
print(gene)
print(guideline['lookupkey'])
print(
'[WARNING] Multiple lookupkeys with present "any '
'fallback", all other than * are ignored'
)
has_any_fallback = has_any_fallback or is_any_fallback
return not has_any_fallback or len(guidelines) == 1
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import analyze.constants as constants
import analyze_functions.constants as constants

def should_be_red(annotations):
return constants.RED_TEXT in annotations['recommendation'] and all(map(
Expand All @@ -24,25 +24,29 @@ def should_be_green(annotations):
constants.GREEN_TEXTS,
))

def check_red_warning_level(_, annotations):
def check_red_warning_level(args):
annotations = args['annotations']
has_warning_level = annotations['warning_level'] == 'red'
should_have_warning_level = should_be_red(annotations)
return has_warning_level == should_have_warning_level

def check_yellow_warning_level(_, annotations):
def check_yellow_warning_level(args):
annotations = args['annotations']
has_warning_level = annotations['warning_level'] == 'yellow'
should_have_warning_level = not should_be_red(annotations) and \
should_be_yellow(annotations)
return has_warning_level if should_have_warning_level else True

def check_green_warning_level(_, annotations):
def check_green_warning_level(args):
annotations = args['annotations']
has_warning_level = annotations['warning_level'] == 'green'
should_have_warning_level = not should_be_red(annotations) and \
not should_be_yellow(annotations) and \
should_be_green(annotations)
return has_warning_level == should_have_warning_level

def check_none_warning_level(_, annotations):
def check_none_warning_level(args):
annotations = args['annotations']
has_warning_level = annotations['warning_level'] == 'none'
should_have_warning_level = not should_be_red(annotations) and \
not should_be_yellow(annotations) and \
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from analyze.data_helpers import ensure_unique_item, get_english_text
from analyze_functions.data_helpers import ensure_unique_item, get_english_text
from common.constants import BRICK_COLLECTION_NAME

from analyze.constants import CONSULT_TEXT
from analyze_functions.constants import CONSULT_TEXT

def get_consult_brick(data):
brick_filter = filter(
Expand Down
File renamed without changes.

0 comments on commit e453548

Please sign in to comment.