Skip to content

Commit

Permalink
Merge pull request #85 from bcgsc/release/v1.10.0
Browse files Browse the repository at this point in the history
Release/v1.10.0
  • Loading branch information
mathieulemieux authored May 8, 2023
2 parents 60445de + 72ca612 commit 18a0140
Show file tree
Hide file tree
Showing 6 changed files with 465 additions and 14 deletions.
36 changes: 36 additions & 0 deletions graphkb/constants.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import argparse

from typing import Dict

from .types import CategoryBaseTermMapping

DEFAULT_LIMIT = 1000
Expand Down Expand Up @@ -137,3 +139,37 @@ def __getitem__(self, key):
INPUT_EXPRESSION_CATEGORIES = IterableNamespace(
UP='increased expression', DOWN='reduced expression'
)

# From: https://github.com/bcgsc/pori_graphkb_parser/blob/ae3738842a4c208ab30f58c08ae987594d632504/src/constants.ts#L33-L80
TYPES_TO_NOTATION: Dict[str, str] = {
'acetylation': 'ac',
'copy gain': 'copygain',
'copy loss': 'copyloss',
'deletion': 'del',
'duplication': 'dup',
'extension': 'ext',
'frameshift': 'fs',
'fusion': 'fusion',
'indel': 'delins',
'insertion': 'ins',
'inversion': 'inv',
'inverted translocation': 'itrans',
'methylation': 'me',
'missense mutation': 'mis',
'mutation': 'mut',
'nonsense mutation': '>',
'phosphorylation': 'phos',
'splice-site': 'spl',
'substitution': '>',
'translocation': 'trans',
'truncating frameshift mutation': 'fs',
'ubiquitination': 'ub',
# deprecated forms and aliases
'frameshift mutation': 'fs',
'frameshift truncation': 'fs',
'missense variant': 'mis',
'truncating frameshift': 'fs',
'missense': 'mis',
'mutations': 'mut',
'nonsense': '>',
}
73 changes: 67 additions & 6 deletions graphkb/match.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,13 @@
VARIANT_RETURN_PROPERTIES,
)
from .types import BasicPosition, Ontology, ParsedVariant, PositionalVariant, Record, Variant
from .util import FeatureNotFoundError, convert_to_rid_list, logger, looks_like_rid
from .util import (
convert_to_rid_list,
FeatureNotFoundError,
logger,
looks_like_rid,
stringifyVariant,
)
from .vocab import get_term_tree

FEATURES_CACHE: Set[str] = set()
Expand Down Expand Up @@ -165,6 +171,8 @@ def match_category_variant(
],
},
'queryType': 'similarTo',
'edges': ['AliasOf', 'DeprecatedBy', 'CrossReferenceOf', 'GeneralizationOf'],
'treeEdges': ['Infers'],
'returnProperties': VARIANT_RETURN_PROPERTIES,
},
ignore_cache=ignore_cache,
Expand Down Expand Up @@ -258,17 +266,37 @@ def positions_overlap(
def compare_positional_variants(
variant: Union[PositionalVariant, ParsedVariant],
reference_variant: Union[PositionalVariant, ParsedVariant],
generic: bool = True,
) -> bool:
"""
Compare 2 variant records from GraphKB to determine if they are equivalent
Args:
variant: the input variant
reference_variant: the reference (matched) variant record
generic (bool, optional): also include the more generic variants
Returns:
bool: True if the records are equivalent
"""

# If specific vs more-generic variants are not to be considered as equivalent,
# check if their stringify representation match and return True or False right away.
if not generic:
variant_str: str = stringifyVariant(
variant,
withRef=False, # Reference(s) will not be included in the string repr.
withRefSeq=False, # Reference sequence will not be included in the string repr.
)
reference_variant_str: str = stringifyVariant(
reference_variant,
withRef=False, # Reference(s) will not be included in the string repr.
withRefSeq=False, # Reference sequence will not be included in the string repr.
)
return variant_str == reference_variant_str

# For break1, check if positions are overlaping between the variant and the reference.
# Continue only if True.
if not positions_overlap(
cast(BasicPosition, variant['break1Start']),
cast(BasicPosition, reference_variant['break1Start']),
Expand All @@ -278,6 +306,9 @@ def compare_positional_variants(
):
return False

# For break2, check if positions are overlaping between the variant and the reference.
# Continue only if True or no break2.
# TODO: check for variant without break2 but reference_variant with one.
if variant.get('break2Start'):
if not reference_variant.get('break2Start'):
return False
Expand All @@ -290,6 +321,8 @@ def compare_positional_variants(
):
return False

# If both variants have untemplated sequence,
# check for size and content.
if (
variant.get('untemplatedSeq', None) is not None
and reference_variant.get('untemplatedSeq', None) is not None
Expand All @@ -314,6 +347,8 @@ def compare_positional_variants(
elif len(variant['untemplatedSeq']) != len(reference_variant['untemplatedSeq']):
return False

# If both variants have a reference sequence,
# check if they are the same.
if (
variant.get('refSeq', None) is not None
and reference_variant.get('refSeq', None) is not None
Expand Down Expand Up @@ -464,24 +499,36 @@ def match_positional_variant(
{'break1Start.@class': parsed['break1Start']['@class']},
]

filtered: List[Record] = []
filtered_similarOnly: List[Record] = [] # For post filter match use
filtered_similarAndGeneric: List[Record] = [] # To be added to the matches at the very end

for row in cast(
List[Record],
conn.query(
{'target': 'PositionalVariant', 'filters': query_filters}, ignore_cache=ignore_cache
),
):
if compare_positional_variants(parsed, cast(PositionalVariant, row)):
filtered.append(row)
# TODO: Check if variant and reference_variant should be interchanged
if compare_positional_variants(
variant=parsed,
reference_variant=cast(PositionalVariant, row),
generic=True,
):
filtered_similarAndGeneric.append(row)
if compare_positional_variants(
variant=parsed,
reference_variant=cast(PositionalVariant, row),
generic=False, # Similar variants only
):
filtered_similarOnly.append(row)

# post filter matches
matches: List[Record] = []
if filtered:
if filtered_similarOnly:
matches.extend(
conn.query(
{
'target': convert_to_rid_list(filtered),
'target': convert_to_rid_list(filtered_similarOnly),
'queryType': 'similarTo',
'edges': ['AliasOf', 'DeprecatedBy', 'CrossReferenceOf', 'GeneralizationOf'],
'treeEdges': ['Infers'],
Expand Down Expand Up @@ -543,6 +590,20 @@ def cat_variant_query(
cat_variant_query(features, types, None)
cat_variant_query(secondary_features, types, None)

# Adding back generic PositionalVariant to the matches
if filtered_similarAndGeneric:
matches.extend(
conn.query(
{
'target': convert_to_rid_list(filtered_similarAndGeneric),
'queryType': 'descendants',
'edges': [],
'returnProperties': POS_VARIANT_RETURN_PROPERTIES,
},
ignore_cache=ignore_cache,
),
)

result: Dict[str, Variant] = {}
for row in matches:
result[row['@rid']] = cast(Variant, row)
Expand Down
11 changes: 9 additions & 2 deletions graphkb/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
except ImportError:
from typing_extensions import TypedDict

Record: TypedDict = TypedDict('Record', {'@rid': str, '@class': str})
EmbeddedRecord: TypedDict = TypedDict('EmbeddedRecord', {'@class': str})
Record: TypedDict = TypedDict("Record", {"@rid": str, "@class": str})
EmbeddedRecord: TypedDict = TypedDict("EmbeddedRecord", {"@class": str})

RecordLink = Union[str, Record]

Expand All @@ -22,6 +22,13 @@ class Ontology(Record):
displayName: str


class OntologyTerm(Record):
name: Optional[str]
sourceId: Optional[str]
sourceIdVersion: Optional[str]
displayName: Optional[str]


OntologyLink = Union[str, Ontology]


Expand Down
Loading

0 comments on commit 18a0140

Please sign in to comment.