Skip to content

Commit

Permalink
feat(#639): add unused brick check
Browse files Browse the repository at this point in the history
  • Loading branch information
tamslo committed Oct 15, 2024
1 parent a44c22c commit 4d9313c
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 1 deletion.
2 changes: 2 additions & 0 deletions scripts/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ Run `python analyze.py <PATH_TO_BACKUP> [--correct]` to analyze annotations
and optionally correct what can be corrected easily in
`<PATH_TO_BACKUP>_corrected_<TIMESTAMP>.base64.json`.

Also checks which bricks are not used in guidelines.

### Drug annotation checks

| Check | Description | `--correct`ed | Only for single-gene results* |
Expand Down
18 changes: 17 additions & 1 deletion scripts/analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from analyze_functions.corrections.consult import add_consult
from analyze_functions.corrections.brand_name_whitespace import correct_brand_name_whitespace

from analyze_functions.data_helpers import get_drug_annotations, get_guideline_annotations, has_annotations
from analyze_functions.data_helpers import get_brick_ids, get_brick_meaning, get_drug_annotations, get_guideline_annotations, get_used_bricks, has_annotations
from common.constants import DRUG_COLLECTION_NAME, SCRIPT_POSTFIXES
from common.get_data import get_data, get_guideline_by_id, get_phenotype_key
from common.write_data import write_data, write_log
Expand Down Expand Up @@ -109,9 +109,11 @@ def run_analyses():
skipped_guideline_annotation_count = 0
failed_guideline_annotation_count = 0
log_content = []
used_bricks = []
for drug in data[DRUG_COLLECTION_NAME]:
drug_name = drug['name']
log_content.append(f'* {drug_name}')
used_bricks += get_used_bricks(drug)
drug_annotations = get_drug_annotations(data, drug)
if not has_annotations(drug_annotations):
missing_drug_annotation_count += 1
Expand All @@ -136,6 +138,7 @@ def run_analyses():
log_all_passed(log_content)
for guideline_id in drug['guidelines']:
guideline = get_guideline_by_id(data, guideline_id)
used_bricks += get_used_bricks(guideline)
phenotype = get_phenotype_key(guideline)
log_content.append(f' * {phenotype}')
guideline_annotations = get_guideline_annotations(data, guideline)
Expand Down Expand Up @@ -174,6 +177,19 @@ def run_analyses():
f'* Drugs: {skipped_drug_annotation_count}\n',
f'* Guidelines: {skipped_guideline_annotation_count}\n\n',
]
used_bricks = set(used_bricks)
unused_bricks = list(map(
lambda brick_id: get_brick_meaning(data, brick_id),
filter(
lambda brick_id: brick_id not in used_bricks,
get_brick_ids(data),
),
))
if (len(unused_bricks) > 0):
log_header.append('* Unused bricks:\n')
for unused_brick in unused_bricks:
log_header.append(f' * {unused_brick}\n')
log_header.append('\n')
write_log([*log_header, *log_content], postfix=SCRIPT_POSTFIXES['correct'])
if correct_inconsistencies:
write_data(data, postfix=SCRIPT_POSTFIXES['correct'])
Expand Down
12 changes: 12 additions & 0 deletions scripts/analyze_functions/data_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,18 @@ def get_bricks_meaning(data, brick_ids):
lambda brick_id: get_brick_meaning(data, brick_id),
brick_ids))

def get_used_bricks(item):
used_bricks = []
for brick_list in item['annotations'].values():
used_bricks += brick_list
return used_bricks

def get_brick_ids(data):
return list(map(
lambda brick: brick['_id'],
data[BRICK_COLLECTION_NAME],
))

def get_annotation(data, item, key, resolve=True):
if not key in item['annotations']: return None
annotation = item['annotations'][key]
Expand Down

0 comments on commit 4d9313c

Please sign in to comment.