Merge pull request #85 from bcgsc/release/v1.10.0

Release/v1.10.0
bcgsc · May 8, 2023 · 18a0140 · 18a0140
2 parents 60445de + 72ca612
commit 18a0140
Show file tree

Hide file tree

Showing 6 changed files with 465 additions and 14 deletions.
diff --git a/graphkb/constants.py b/graphkb/constants.py
@@ -1,5 +1,7 @@
 import argparse
 
+from typing import Dict
+
 from .types import CategoryBaseTermMapping
 
 DEFAULT_LIMIT = 1000
@@ -137,3 +139,37 @@ def __getitem__(self, key):
 INPUT_EXPRESSION_CATEGORIES = IterableNamespace(
     UP='increased expression', DOWN='reduced expression'
 )
+
+# From: https://github.com/bcgsc/pori_graphkb_parser/blob/ae3738842a4c208ab30f58c08ae987594d632504/src/constants.ts#L33-L80
+TYPES_TO_NOTATION: Dict[str, str] = {
+    'acetylation': 'ac',
+    'copy gain': 'copygain',
+    'copy loss': 'copyloss',
+    'deletion': 'del',
+    'duplication': 'dup',
+    'extension': 'ext',
+    'frameshift': 'fs',
+    'fusion': 'fusion',
+    'indel': 'delins',
+    'insertion': 'ins',
+    'inversion': 'inv',
+    'inverted translocation': 'itrans',
+    'methylation': 'me',
+    'missense mutation': 'mis',
+    'mutation': 'mut',
+    'nonsense mutation': '>',
+    'phosphorylation': 'phos',
+    'splice-site': 'spl',
+    'substitution': '>',
+    'translocation': 'trans',
+    'truncating frameshift mutation': 'fs',
+    'ubiquitination': 'ub',
+    # deprecated forms and aliases
+    'frameshift mutation': 'fs',
+    'frameshift truncation': 'fs',
+    'missense variant': 'mis',
+    'truncating frameshift': 'fs',
+    'missense': 'mis',
+    'mutations': 'mut',
+    'nonsense': '>',
+}
diff --git a/graphkb/match.py b/graphkb/match.py
@@ -12,7 +12,13 @@
     VARIANT_RETURN_PROPERTIES,
 )
 from .types import BasicPosition, Ontology, ParsedVariant, PositionalVariant, Record, Variant
-from .util import FeatureNotFoundError, convert_to_rid_list, logger, looks_like_rid
+from .util import (
+    convert_to_rid_list,
+    FeatureNotFoundError,
+    logger,
+    looks_like_rid,
+    stringifyVariant,
+)
 from .vocab import get_term_tree
 
 FEATURES_CACHE: Set[str] = set()
@@ -165,6 +171,8 @@ def match_category_variant(
                     ],
                 },
                 'queryType': 'similarTo',
+                'edges': ['AliasOf', 'DeprecatedBy', 'CrossReferenceOf', 'GeneralizationOf'],
+                'treeEdges': ['Infers'],
                 'returnProperties': VARIANT_RETURN_PROPERTIES,
             },
             ignore_cache=ignore_cache,
@@ -258,17 +266,37 @@ def positions_overlap(
 def compare_positional_variants(
     variant: Union[PositionalVariant, ParsedVariant],
     reference_variant: Union[PositionalVariant, ParsedVariant],
+    generic: bool = True,
 ) -> bool:
     """
     Compare 2 variant records from GraphKB to determine if they are equivalent
 
     Args:
         variant: the input variant
         reference_variant: the reference (matched) variant record
+        generic (bool, optional): also include the more generic variants
 
     Returns:
         bool: True if the records are equivalent
     """
+
+    # If specific vs more-generic variants are not to be considered as equivalent,
+    # check if their stringify representation match and return True or False right away.
+    if not generic:
+        variant_str: str = stringifyVariant(
+            variant,
+            withRef=False,  # Reference(s) will not be included in the string repr.
+            withRefSeq=False,  # Reference sequence will not be included in the string repr.
+        )
+        reference_variant_str: str = stringifyVariant(
+            reference_variant,
+            withRef=False,  # Reference(s) will not be included in the string repr.
+            withRefSeq=False,  # Reference sequence will not be included in the string repr.
+        )
+        return variant_str == reference_variant_str
+
+    # For break1, check if positions are overlaping between the variant and the reference.
+    # Continue only if True.
     if not positions_overlap(
         cast(BasicPosition, variant['break1Start']),
         cast(BasicPosition, reference_variant['break1Start']),
@@ -278,6 +306,9 @@ def compare_positional_variants(
     ):
         return False
 
+    # For break2, check if positions are overlaping between the variant and the reference.
+    # Continue only if True or no break2.
+    # TODO: check for variant without break2 but reference_variant with one.
     if variant.get('break2Start'):
         if not reference_variant.get('break2Start'):
             return False
@@ -290,6 +321,8 @@ def compare_positional_variants(
         ):
             return False
 
+    # If both variants have untemplated sequence,
+    # check for size and content.
     if (
         variant.get('untemplatedSeq', None) is not None
         and reference_variant.get('untemplatedSeq', None) is not None
@@ -314,6 +347,8 @@ def compare_positional_variants(
             elif len(variant['untemplatedSeq']) != len(reference_variant['untemplatedSeq']):
                 return False
 
+    # If both variants have a reference sequence,
+    # check if they are the same.
     if (
         variant.get('refSeq', None) is not None
         and reference_variant.get('refSeq', None) is not None
@@ -464,24 +499,36 @@ def match_positional_variant(
         {'break1Start.@class': parsed['break1Start']['@class']},
     ]
 
-    filtered: List[Record] = []
+    filtered_similarOnly: List[Record] = []  # For post filter match use
+    filtered_similarAndGeneric: List[Record] = []  # To be added to the matches at the very end
 
     for row in cast(
         List[Record],
         conn.query(
             {'target': 'PositionalVariant', 'filters': query_filters}, ignore_cache=ignore_cache
         ),
     ):
-        if compare_positional_variants(parsed, cast(PositionalVariant, row)):
-            filtered.append(row)
+        # TODO: Check if variant and reference_variant should be interchanged
+        if compare_positional_variants(
+            variant=parsed,
+            reference_variant=cast(PositionalVariant, row),
+            generic=True,
+        ):
+            filtered_similarAndGeneric.append(row)
+            if compare_positional_variants(
+                variant=parsed,
+                reference_variant=cast(PositionalVariant, row),
+                generic=False,  # Similar variants only
+            ):
+                filtered_similarOnly.append(row)
 
     # post filter matches
     matches: List[Record] = []
-    if filtered:
+    if filtered_similarOnly:
         matches.extend(
             conn.query(
                 {
-                    'target': convert_to_rid_list(filtered),
+                    'target': convert_to_rid_list(filtered_similarOnly),
                     'queryType': 'similarTo',
                     'edges': ['AliasOf', 'DeprecatedBy', 'CrossReferenceOf', 'GeneralizationOf'],
                     'treeEdges': ['Infers'],
@@ -543,6 +590,20 @@ def cat_variant_query(
         cat_variant_query(features, types, None)
         cat_variant_query(secondary_features, types, None)
 
+    # Adding back generic PositionalVariant to the matches
+    if filtered_similarAndGeneric:
+        matches.extend(
+            conn.query(
+                {
+                    'target': convert_to_rid_list(filtered_similarAndGeneric),
+                    'queryType': 'descendants',
+                    'edges': [],
+                    'returnProperties': POS_VARIANT_RETURN_PROPERTIES,
+                },
+                ignore_cache=ignore_cache,
+            ),
+        )
+
     result: Dict[str, Variant] = {}
     for row in matches:
         result[row['@rid']] = cast(Variant, row)

diff --git a/graphkb/types.py b/graphkb/types.py
@@ -9,8 +9,8 @@
 except ImportError:
     from typing_extensions import TypedDict
 
-Record: TypedDict = TypedDict('Record', {'@rid': str, '@class': str})
-EmbeddedRecord: TypedDict = TypedDict('EmbeddedRecord', {'@class': str})
+Record: TypedDict = TypedDict("Record", {"@rid": str, "@class": str})
+EmbeddedRecord: TypedDict = TypedDict("EmbeddedRecord", {"@class": str})
 
 RecordLink = Union[str, Record]
 
@@ -22,6 +22,13 @@ class Ontology(Record):
     displayName: str
 
 
+class OntologyTerm(Record):
+    name: Optional[str]
+    sourceId: Optional[str]
+    sourceIdVersion: Optional[str]
+    displayName: Optional[str]
+
+
 OntologyLink = Union[str, Ontology]