Skip to content

Commit

Permalink
Merge pull request #116 from bcgsc/release/v3.9.0_high_mutation_burde…
Browse files Browse the repository at this point in the history
…n_variant_matches

Release/v3.9.0 high mutation burden variant matches
  • Loading branch information
dustinbleile authored May 17, 2023
2 parents 1494305 + 15691d1 commit 7139981
Show file tree
Hide file tree
Showing 8 changed files with 128 additions and 26 deletions.
71 changes: 66 additions & 5 deletions ipr/annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,11 @@
from graphkb.match import INPUT_COPY_CATEGORIES
from graphkb.types import Variant
from graphkb.util import FeatureNotFoundError, convert_to_rid_list
from pandas import isnull
from progressbar import progressbar
from typing import Any, Dict, List, Sequence, Set, cast

from .constants import FAILED_REVIEW_STATUS
from .constants import FAILED_REVIEW_STATUS, TMB_HIGH_CATEGORY
from .ipr import convert_statements_to_alterations
from .types import (
GkbStatement,
Expand Down Expand Up @@ -328,18 +329,38 @@ def annotate_positional_variants(

for var_key in VARIANT_KEYS:
variant = row.get(var_key)
if not variant:
matches = []
if not variant or isnull(variant):
continue
try:
matches = gkb_match.match_positional_variant(graphkb_conn, variant)
try:
matches = gkb_match.match_positional_variant(graphkb_conn, variant)
except HTTPError as parse_err:
# DEVSU-1885 - fix malformed single deletion described as substitution of blank
# eg. deletion described as substitution with nothing: 'chr1:g.150951027T>'
if (
variant[-1] == '>'
and 'g.' in variant
and variant[-2].isalpha()
and variant[-3].isnumeric()
):
logger.warning(
f"Assuming malformed deletion variant {variant} is {variant[:-2] + 'del'}"
)
variant = variant[:-2] + 'del'
matches = gkb_match.match_positional_variant(graphkb_conn, variant)
else:
raise parse_err

# GERO-299 - check for conflicting nonsense and missense categories

missense = [
m for m in matches if 'missense' in m.get('type', m).get('displayName', '')
]
nonsense = [
m for m in matches if 'nonsense' in m.get('type', m).get('displayName', '')
]

missense_cat = [m for m in missense if m.get('@class', '') == 'CategoryVariant']
nonsense_cat = [m for m in nonsense if m.get('@class', '') == 'CategoryVariant']
if missense_cat and nonsense_cat:
Expand Down Expand Up @@ -429,8 +450,8 @@ def annotate_positional_variants(

def annotate_msi(
graphkb_conn: GraphKBConnection,
msi_category: str,
disease_name: str,
disease_name: str = 'cancer',
msi_category: str = 'microsatellite instability',
) -> List[KbMatch]:
"""Annotate microsatellite instablity from GraphKB in the IPR alterations format.
Expand Down Expand Up @@ -461,3 +482,43 @@ def annotate_msi(
ipr_row['variantType'] = 'msi'
gkb_matches.append(ipr_row)
return gkb_matches


def annotate_tmb(
graphkb_conn: GraphKBConnection,
disease_name: str = 'cancer',
category: str = TMB_HIGH_CATEGORY,
) -> List[KbMatch]:
"""Annotate Tumour Mutation Burden (tmb) categories from GraphKB in the IPR alterations format.
Match to GraphKb Category variants with similar names
Args:
graphkb_conn: the graphkb api connection object
disease_name: oncotree disease name for graphkb matching.
category: such as 'high mutation burden'
Returns:
list of kbMatches records for IPR
"""
gkb_matches = []
categories = graphkb_conn.query(
{
'target': {
'target': 'CategoryVariant',
'filters': {
'reference1': {
'target': 'Signature',
'filters': {'OR': [{'name': category}, {'displayName': category}]},
}
},
},
'queryType': 'similarTo',
'returnProperties': ['@rid', 'displayName'],
},
)
if categories:
for ipr_row in get_ipr_statements_from_variants(graphkb_conn, categories, disease_name):
ipr_row['variant'] = category
ipr_row['variantType'] = 'tmb'
gkb_matches.append(ipr_row)
return gkb_matches
3 changes: 3 additions & 0 deletions ipr/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,6 @@

# all possible values for review status are: ['pending', 'not required', 'passed', 'failed', 'initial']
FAILED_REVIEW_STATUS = 'failed'

TMB_HIGH = 10.0 # genomic mutations per mb - https://www.bcgsc.ca/jira/browse/GERO-296
TMB_HIGH_CATEGORY = 'high mutation burden'
16 changes: 16 additions & 0 deletions ipr/content.spec.json
Original file line number Diff line number Diff line change
Expand Up @@ -1197,6 +1197,22 @@
"null"
]
},
"rnaAltCount": {
"description": "the number of alternate reads in the rna supporting the mutation",
"example": 1,
"type": [
"integer",
"null"
]
},
"rnaDepth": {
"description": "the total number of reads at this position in the rna",
"example": 2,
"type": [
"integer",
"null"
]
},
"svg": {
"description": "svg image file content for this SV",
"type": [
Expand Down
3 changes: 2 additions & 1 deletion ipr/inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,8 @@
'highQuality',
'comments',
'library',
# GERO-307 - tumourAltCount and tumourDepth are available but not rnaAltCount and rnaDepth
'rnaAltCount',
'rnaDepth',
'tumourAltCount',
'tumourDepth',
'germline',
Expand Down
3 changes: 1 addition & 2 deletions ipr/ipr.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,11 @@
from graphkb import GraphKBConnection
from graphkb import statement as gkb_statement
from graphkb import vocab as gkb_vocab
from graphkb.types import Record
from typing import Dict, Iterable, List, Sequence, Set, Tuple

from .constants import GERMLINE_BASE_TERMS, VARIANT_CLASSES
from .types import GkbStatement, ImageDefinition, IprFusionVariant, IprGene, IprVariant, KbMatch
from .util import convert_to_rid_set, find_variant, logger
from .util import find_variant, logger


def display_evidence_levels(statement: GkbStatement) -> str:
Expand Down
41 changes: 36 additions & 5 deletions ipr/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,11 @@
annotate_expression_variants,
annotate_msi,
annotate_positional_variants,
annotate_tmb,
get_gene_information,
)
from .connection import IprConnection
from .constants import DEFAULT_URL
from .constants import DEFAULT_URL, TMB_HIGH, TMB_HIGH_CATEGORY
from .inputs import (
check_comparators,
check_variant_links,
Expand Down Expand Up @@ -198,10 +199,38 @@ def create_report(
gkb_matches: List[KbMatch] = []

# Signature category variants
tmb_variant: IprVariant = {}
tmb_matches = []
if 'tmburMutationBurden' in content.keys():
logger.warning(
'GERO-296 - not yet implemented - high tumour mutation burden category matching.'
)
tmb_val = 0.0
tmb = {}
try:
tmb = content.get('tmburMutationBurden', {})
tmb_val = tmb['genomeIndelTmb'] + tmb['genomeSnvTmb']
except Exception as err:
logger.error(f"tmburMutationBurden parsing failure: {err}")

if tmb_val >= TMB_HIGH:
logger.warning(
f'GERO-296 - tmburMutationBurden high -checking graphkb matches for {TMB_HIGH_CATEGORY}'
)
if not tmb.get('key'):
tmb['key'] = TMB_HIGH_CATEGORY
if not tmb.get('kbCategory'):
tmb['kbCategory'] = TMB_HIGH_CATEGORY

# GERO-296 - try matching to graphkb
tmb_matches = annotate_tmb(graphkb_conn, kb_disease_match, TMB_HIGH_CATEGORY)
if tmb_matches:
tmb_variant['kbCategory'] = TMB_HIGH_CATEGORY # type: ignore
tmb_variant['variant'] = TMB_HIGH_CATEGORY
tmb_variant['key'] = tmb['key']
tmb_variant['variantType'] = 'tmb'
logger.info(
f"GERO-296 '{TMB_HIGH_CATEGORY}' matches {len(tmb_matches)} statements."
)
gkb_matches.extend(tmb_matches)
logger.debug(f"\tgkb_matches: {len(gkb_matches)}")

msi = content.get('msi', [])
msi_matches = []
Expand All @@ -216,7 +245,7 @@ def create_report(
msi_cat = msi.get('kbCategory')
msi_variant = msi.copy()
logger.info(f'Matching GKB msi {msi_cat}')
msi_matches = annotate_msi(graphkb_conn, msi_cat, kb_disease_match)
msi_matches = annotate_msi(graphkb_conn, kb_disease_match, msi_cat)
if msi_matches:
msi_variant['kbCategory'] = msi_cat # type: ignore
msi_variant['variant'] = msi_cat
Expand Down Expand Up @@ -262,6 +291,8 @@ def create_report(
all_variants = expression_variants + copy_variants + structural_variants + small_mutations # type: ignore
if msi_matches:
all_variants.append(msi_variant) # type: ignore
if tmb_matches:
all_variants.append(tmb_variant) # type: ignore

if match_germline: # verify germline kb statements matched germline observed variants
gkb_matches = germline_kb_matches(gkb_matches, all_variants)
Expand Down
4 changes: 2 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ known_standard_library = requests

[metadata]
name = ipr
version = 3.8.0
version = 3.9.0
author_email = [email protected]
author = ipr
maintainer_email = [email protected]
Expand All @@ -32,7 +32,7 @@ python_requires = >=3.6
dependency_links = []
include_package_data = True
install_requires =
graphkb>=1.8.0, <2
graphkb>=1.10.1
biopython==1.76
progressbar2>=3.51.0, <4
pandas>=1.1.0, <2
Expand Down
13 changes: 2 additions & 11 deletions tests/test_annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ def graphkb_conn():
return graphkb_conn


@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason="SDEV-3381 - github workflow failures.")
def test_annotate_nonsense_vs_missense(graphkb_conn):
"""Verify missense (point mutation) is not mistaken for a nonsense (stop codon) mutation."""
disease = 'cancer'
Expand All @@ -84,7 +83,6 @@ def test_annotate_nonsense_vs_missense(graphkb_conn):
assert matched, f"should have matched in {key}: {TP53_MUT_DICT[key]}"


@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason="SDEV-3381 - github workflow failures.")
def test_annotate_nonsense_vs_missense_protein(graphkb_conn):
"""Verify missense (point mutation) is not mistaken for a nonsense (stop codon) mutation."""
disease = 'cancer'
Expand All @@ -96,13 +94,11 @@ def test_annotate_nonsense_vs_missense_protein(graphkb_conn):
assert matched, f"should have matched in {key}: {TP53_MUT_DICT[key]}"


@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason="SDEV-3381 - github workflow failures.")
def test_annotate_structural_variants_tp53(graphkb_conn):
"""Verify alternate TP53 variants match."""
disease = 'cancer'
ref_key = 'prot_only'
pref = annotate_positional_variants(graphkb_conn, [TP53_MUT_DICT[ref_key]], disease)
known_issues = set(['TP53:p.M237X']) # SDEV-3122 -
# GERO-299 - nonsense - stop codon - should not match. This is missense not nonsense (#164:933).
nonsense = [a for a in pref if a['kbVariant'] == 'TP53 nonsense']
assert not nonsense
Expand All @@ -117,21 +113,16 @@ def test_annotate_structural_variants_tp53(graphkb_conn):
diff = pref_vars.symmetric_difference(alt_vars)
missing = pref_vars.difference(alt_vars)

known_issues = set([])
if 'hgvsCds' in alt_rep:
known_issues.add('TP53 nonsense') # GERO-299
if 'p.M237' not in alt_rep:
known_issues.add('TP53:p.M237X') # SDEV-3122 - not matching imprecise mutations
known_issues = set()
if key == 'genome_only':
# genome_only matched to more precise type 'TP53 deleterious mutation' but not 'TP53 mutation'
known_issues.add('TP53 mutation')

# strangely genome_only matched to more precise type 'TP53 deleterious mutation' but not 'TP53 mutation'
missing = pref_vars.difference(alt_vars).difference(known_issues)
print(alt_vars)
assert not missing, f"{key} missing{missing}: {diff}"


@pytest.mark.skipif(EXCLUDE_INTEGRATION_TESTS, reason="SDEV-3381 - github workflow failures.")
def test_get_therapeutic_associated_genes(graphkb_conn):
gene_list = get_therapeutic_associated_genes(graphkb_conn=graphkb_conn)
assert gene_list, 'No get_therapeutic_associated_genes found'
Expand Down

0 comments on commit 7139981

Please sign in to comment.