Skip to content

Commit

Permalink
Fix: #20
Browse files Browse the repository at this point in the history
# Fixed Issues:
- GQ always  showed the default score of the variant. This has been changed to show a  phread like  score. This score is based on a Z-score which is used to determine how likely it is that the variant is true  variant.
- The SVSUPPORT flag was always visible even if no SNFJ file was used. This has been updated to be only present in the VCF if a SNFJ file has been used.
  • Loading branch information
Philippe Sanio committed Apr 22, 2024
1 parent 0e96275 commit 06135b8
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 35 deletions.
5 changes: 4 additions & 1 deletion spectre/analysis/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ def __init__(self, coverage_file, outbed, outvcf, sample_id, spectre_args, metad
self.merged_candidates = None
# output
self.chromosome_names_out = None
# SNFJ
self.snfj_breakpoints = False
# DEV
self.min_chr_length = 1e6
self.dist_min_overwrite = 10000 # 10kb TODO
Expand Down Expand Up @@ -353,6 +355,7 @@ def refine_cnv_calls(self):
def check_snfj_breakpoints(self, snfspc_file):
bp = Breakpoints(snfspc_file, self.as_dev)
bp.correlate_cnvs_with_breakpoints(cnv_candidates=self.cnv_calls_list, bin_size=self.bin_size)
self.snfj_breakpoints = True

# Candidate CNV merge by distance and CNV type
def merge_candidates(self, candidates_cnv_list, chromosome_name):
Expand Down Expand Up @@ -742,7 +745,7 @@ def cnv_result_vcf(self, method=""):
# TODO add LoH if self.snv_loh is not None
# TODO modify make_vcf
output_vcf = os.path.join(os.path.join(self.output_directory, f'{method}{self.sample_id}.vcf.gz'))
vcf_output = outputWriter.VCFOutput(output_vcf, self.genome_info)
vcf_output = outputWriter.VCFOutput(output_vcf, self.genome_info, self.snfj_breakpoints)
vcf_output.make_vcf(self.chromosome_names_out, self.merged_candidates, self.sample_id)

# Plots
Expand Down
52 changes: 26 additions & 26 deletions spectre/analysis/cnv_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,7 +449,7 @@ def check_del_dup_borders(self, del_threshold: float, dup_threshold: float) -> t
dup_threshold = self.ploidy + 1
self.logger.warning(f"Using fallback to threshold of: {dup_threshold}")
return del_threshold, dup_threshold
def evaluate_cnvs(self, cnv_calls=None, refined_cnvs: bool = False) -> dict:
def evaluate_cnvs(self, refined_cnvs: bool = False) -> dict:
"""
Evaluating all submitted CNV calls, by applying statistical tests. Optionally, plotting the location of the CNVs
on the global coverage samples per chr.
Expand Down Expand Up @@ -483,31 +483,31 @@ def evaluate_cnvs(self, cnv_calls=None, refined_cnvs: bool = False) -> dict:
"pvalue": cnv_metrics_Z["pvalue"],
"sample_score": cnv_metrics_Z["sample_score"]}

if self.as_dev and False: # BUG line 385
if not np.isnan(cnv_metrics_Z["cnv_call_mean"]):

# pre check --> avoide div by 0
if cnv_metrics_Z["cnv_call_mean"] == 0:
x_index = 0
else:
if not np.isinf(cnv_metrics_Z["cnv_call_mean"]):
x_index = int(cnv_metrics_Z["cnv_call_mean"] / width)
else:
x_index = int(x[-1])
# check if out of bounds
if x_index >= len(x):
cnv_y = x[-1]
else:
cnv_y = int(x[x_index])
cnv_y = int(cnv_y)
annotation_endpoint_offset = 5 * (100 - x_index) * (
1 - (cnv_y / 150)) + (max(x) / 10) # np.random.randint(100,300)

# Add
ax.annotate(f"{cnv.id}", xy=(cnv_metrics_Z["cnv_call_mean"], cnv_y),
xytext=(cnv_metrics_Z["cnv_call_mean"], cnv_y + annotation_endpoint_offset),
rotation=60,
fontsize=12, arrowprops=dict(facecolor='green', shrink=0.05))
# if self.as_dev and False: # BUG line 385
# if not np.isnan(cnv_metrics_Z["cnv_call_mean"]):
#
# # pre check --> avoide div by 0
# if cnv_metrics_Z["cnv_call_mean"] == 0:
# x_index = 0
# else:
# if not np.isinf(cnv_metrics_Z["cnv_call_mean"]):
# x_index = int(cnv_metrics_Z["cnv_call_mean"] / width)
# else:
# x_index = int(x[-1])
# # check if out of bounds
# if x_index >= len(x):
# cnv_y = x[-1]
# else:
# cnv_y = int(x[x_index])
# cnv_y = int(cnv_y)
# annotation_endpoint_offset = 5 * (100 - x_index) * (
# 1 - (cnv_y / 150)) + (max(x) / 10) # np.random.randint(100,300)
#
# # Add
# ax.annotate(f"{cnv.id}", xy=(cnv_metrics_Z["cnv_call_mean"], cnv_y),
# xytext=(cnv_metrics_Z["cnv_call_mean"], cnv_y + annotation_endpoint_offset),
# rotation=60,
# fontsize=12, arrowprops=dict(facecolor='green', shrink=0.05))
if self.as_dev:
self.logger.debug(f"Creating CNV distribution plot")
fig, ax = plt.subplots()
Expand Down
20 changes: 12 additions & 8 deletions spectre/util/outputWriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,12 @@ def format_vcf_line(self):


class VCFOutput(object):
def __init__(self, output_file, genome_info):
def __init__(self, output_file, genome_info, sv_support: bool = False):
self.supp_vec = {}
self.output_vcf = output_file
self.population_sample_ids = []
self.genome_info = genome_info
self.sv_support = sv_support
# example {'chromosomes': ['chr6'], 'chr_lengths': {'chr6': 170805979}}

@staticmethod
Expand Down Expand Up @@ -105,7 +106,8 @@ def make_vcf_header(self):
'##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Length of the SV">',
'##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of copy number variant">',
'##INFO=<ID=CN,Number=1,Type=Integer,Description="Estimated copy number status">',
'##INFO=<ID=SVSUPPORT,Number=1,Type=String,Description="SV support">']
'##INFO=<ID=SVSUPPORT,Number=1,Type=String,Description="Indicator if a SV support was found in a provided SNFJ file">',
]
if population_mode:
vcf_header += ['##INFO=<ID=SUPP_VEC,Number=1,Type=String,Description="Support vector">']

Expand Down Expand Up @@ -151,14 +153,15 @@ def vcf_result(self, chromosome_list, cnv_candidate_list):
vcf_line.QUAL = str(each_candidate.quality)
vcf_line.INFO = f"END={each_candidate.end};SVLEN={each_candidate.size};SVTYPE={each_candidate.type};" \
f"CN={each_candidate.cn_status};"
vcf_line.INFO += f"SVSUPPORT={'TRUE' if each_candidate.sv_support else 'FALSE'};"
if self.sv_support:
vcf_line.INFO += f"SVSUPPORT={'TRUE' if each_candidate.sv_support else 'FALSE'};"

# checking if any other CNV through merging supported the given CNV
vcf_line.supp_vec = self.supp_vec.copy()
if not each_candidate.support_cnv_calls:
vcf_line.format_data = [each_candidate.gt, f'{round(each_candidate.het_score, 2)}',
f"{int(each_candidate.statistics['z-score']['sample_score'])}",
f'{round(each_candidate.median_raw_cov,2)}']
f'{round(each_candidate.median_raw_cov, 2)}']
else:
vcf_line.format_data = []
vcf_line.FORMAT += ":ID" # ADD ID tag in format as everything that is following are IDs
Expand All @@ -169,7 +172,7 @@ def vcf_result(self, chromosome_list, cnv_candidate_list):
ids = []
scores = []
gts = []
dps = [] # raw read depth
dps = [] # raw read depth
# sample_id =""
for candidate in list(sample_value):
ids.append(candidate.id)
Expand All @@ -189,10 +192,10 @@ def vcf_result(self, chromosome_list, cnv_candidate_list):
dp = np.nan
else:
dp = np.nanmean(dps)
vcf_line.sample_format_data[sample_key] = [gt, "0.0", score,round(dp,2), ",".join(ids)]
vcf_line.sample_format_data[sample_key] = [gt, "0.0", score, round(dp, 2), ",".join(ids)]
vcf_line.supp_vec[sample_key] = 1
else:
vcf_line.sample_format_data[sample_key] = ["./.", "0.0", 0, 0.0,"NULL"]
vcf_line.sample_format_data[sample_key] = ["./.", "0.0", 0, 0.0, "NULL"]
# add support vector only if population mode is active
vcf_line.INFO += ";SUPP_VEC=" + "".join([str(s) for s in vcf_line.supp_vec.values()])
vcf_lines.append(vcf_line.format_vcf_line())
Expand Down Expand Up @@ -226,11 +229,12 @@ def make_vcf(self, chromosome_list, cnv_candidate_list, sample_id, population_sa

# clean up
if os.path.isfile(self.output_vcf) and os.stat(self.output_vcf).st_size != 0:
os.remove(tmp_out) # remove the original unsorted vcf
os.remove(tmp_out) # remove the original unsorted vcf
# sorting file
if os.path.exists(tmp_out + ".sort.tmp"):
os.remove(tmp_out + ".sort.tmp")


class IntermediateFile(object):
def __init__(self, output_dir: str, as_dev=False):
self.output_dir = output_dir
Expand Down

0 comments on commit 06135b8

Please sign in to comment.