feat(#639): add unused brick check

hpi-dhc · Oct 15, 2024 · 4d9313c · 4d9313c
1 parent a44c22c
commit 4d9313c
Show file tree

Hide file tree

Showing 3 changed files with 31 additions and 1 deletion.
diff --git a/scripts/README.md b/scripts/README.md
@@ -89,6 +89,8 @@ Run `python analyze.py <PATH_TO_BACKUP> [--correct]` to analyze annotations
 and optionally correct what can be corrected easily in
 `<PATH_TO_BACKUP>_corrected_<TIMESTAMP>.base64.json`.
 
+Also checks which bricks are not used in guidelines.
+
 ### Drug annotation checks
 
 | Check | Description | `--correct`ed | Only for single-gene results* |

diff --git a/scripts/analyze.py b/scripts/analyze.py
@@ -16,7 +16,7 @@
 from analyze_functions.corrections.consult import add_consult
 from analyze_functions.corrections.brand_name_whitespace import correct_brand_name_whitespace
 
-from analyze_functions.data_helpers import get_drug_annotations, get_guideline_annotations, has_annotations
+from analyze_functions.data_helpers import get_brick_ids, get_brick_meaning, get_drug_annotations, get_guideline_annotations, get_used_bricks, has_annotations
 from common.constants import DRUG_COLLECTION_NAME, SCRIPT_POSTFIXES
 from common.get_data import get_data, get_guideline_by_id, get_phenotype_key
 from common.write_data import write_data, write_log
@@ -109,9 +109,11 @@ def run_analyses():
     skipped_guideline_annotation_count = 0
     failed_guideline_annotation_count = 0
     log_content = []
+    used_bricks = []
     for drug in data[DRUG_COLLECTION_NAME]:
         drug_name = drug['name']
         log_content.append(f'* {drug_name}')
+        used_bricks += get_used_bricks(drug)
         drug_annotations = get_drug_annotations(data, drug)
         if not has_annotations(drug_annotations):
             missing_drug_annotation_count += 1
@@ -136,6 +138,7 @@ def run_analyses():
                 log_all_passed(log_content)
         for guideline_id in drug['guidelines']:
             guideline = get_guideline_by_id(data, guideline_id)
+            used_bricks += get_used_bricks(guideline)
             phenotype = get_phenotype_key(guideline)
             log_content.append(f'  * {phenotype}')
             guideline_annotations = get_guideline_annotations(data, guideline)
@@ -174,6 +177,19 @@ def run_analyses():
         f'* Drugs: {skipped_drug_annotation_count}\n',
         f'* Guidelines: {skipped_guideline_annotation_count}\n\n',
     ]
+    used_bricks = set(used_bricks)
+    unused_bricks = list(map(
+        lambda brick_id: get_brick_meaning(data, brick_id),
+        filter(
+            lambda brick_id: brick_id not in used_bricks,
+            get_brick_ids(data),
+        ),
+    ))
+    if (len(unused_bricks) > 0):
+        log_header.append('* Unused bricks:\n')
+        for unused_brick in unused_bricks:
+            log_header.append(f'  * {unused_brick}\n')
+        log_header.append('\n')
     write_log([*log_header, *log_content], postfix=SCRIPT_POSTFIXES['correct'])
     if correct_inconsistencies:
         write_data(data, postfix=SCRIPT_POSTFIXES['correct'])

diff --git a/scripts/analyze_functions/data_helpers.py b/scripts/analyze_functions/data_helpers.py
@@ -32,6 +32,18 @@ def get_bricks_meaning(data, brick_ids):
         lambda brick_id: get_brick_meaning(data, brick_id),
         brick_ids))
 
+def get_used_bricks(item):
+    used_bricks = []
+    for brick_list in item['annotations'].values():
+        used_bricks += brick_list
+    return used_bricks
+
+def get_brick_ids(data):
+    return list(map(
+        lambda brick: brick['_id'],
+        data[BRICK_COLLECTION_NAME],
+    ))
+
 def get_annotation(data, item, key, resolve=True):
     if not key in item['annotations']: return None
     annotation = item['annotations'][key]