Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GA4GHTT-276: v4.4 test file update and minor fixes #255

Merged
merged 2 commits into from
Sep 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions src/vcf/record.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1160,7 +1160,7 @@ namespace ebi
return;
}
if (util::contains(format, CICN) && !util::contains(format, CN)) {
throw new FormatBodyError{line, "Format field CICN must be used only with CN field."};
throw new FormatBodyError{line, "Format field CICN must be used only with CN field"};
}
}

Expand Down Expand Up @@ -1196,7 +1196,7 @@ namespace ebi
if (svlen_val != values[i]) {
//must be same as earlier value
std::stringstream message;
message << "INFO " << SVLEN << " must be same for all CNV, DEL, DUP alleles.";
message << "INFO " << SVLEN << " must be same for all CNV, DEL, DUP alleles";
throw new InfoBodyError{line, message.str()};
}
}
Expand Down Expand Up @@ -1238,7 +1238,6 @@ namespace ebi
//infer phasing based on other alleles phasing
allele->insert(0, anyphased ? "|" : "/");
}
//alleles.insert(alleles.begin(), values.begin(), values.end());
alleles.swap(values);
}
}
Expand Down
5 changes: 3 additions & 2 deletions src/vcf/validate_optional_policy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,8 @@ namespace ebi
void ValidateOptionalPolicy::check_body_entry_info_rb_ruc(ParsingState & state, Record const & record) const
{
std::vector<std::string> valRB, valRUC, valLen;
int rb = 0, ruc = 0 , rul = 0;
int rb = 0, rul = 0;
float ruc = 0;
const float limit = 0.05; //5% variation

if (record.source->version < Version::v44) {
Expand Down Expand Up @@ -318,7 +319,7 @@ namespace ebi
continue;
}
rb = std::stoi(valRB[i]);
ruc = std::stoi(valRUC[i]);
ruc = std::stod(valRUC[i]);
rul = itRUL != record.info.end()? std::stoi(valLen[i]) : valLen[i].size();
//RB ~= RUL * RUC
if ( (abs(rb - rul * ruc) / (float)rb) > limit) {
Expand Down
25 changes: 25 additions & 0 deletions test/input_files/v4.4/failed/failed_body_invalid_CNVTR_1.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
##fileformat=VCFv4.4
##CauseOfFailure=invalid info field on CNV:TR RN-RUS-RUL
##reference=testval
##contig=<ID=1,length=6243,assembly=B3,md5=f121111111618ff66beb2da,species="Homo sapiens",taxonomy=x>
##INFO=<ID=SVCLAIM,Number=A,Type=String,Description="Claim made by the structural variant call. Valid values are D, J, DJ...">
##INFO=<ID=SVLEN,Number=A,Type=Integer,Description="Length of structural variant">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##INFO=<ID=CN,Number=A,Type=Float,Description="Copy number of CNV/breakpoint">
##INFO=<ID=RN,Number=A,Type=Integer,Description="Total number of repeat sequences in this allele">
##INFO=<ID=RUS,Number=.,Type=String,Description="Repeat unit sequence of the corresponding repeat sequence">
##INFO=<ID=RUC,Number=.,Type=Float,Description="Repeat unit count of corresponding repeat sequence">
##INFO=<ID=RB,Number=.,Type=Integer,Description="Total number of bases in the corresponding repeat sequence">
##INFO=<ID=RUB,Number=.,Type=Integer,Description="Number of bases in each individual repeat unit">
##INFO=<ID=RUL,Number=.,Type=Integer,Description="Repeat unit length of the corresponding repeat sequence">
##ALT=<ID=CNV:TR,Description="Tandem repeat determined based on DNA abundance">
##FORMAT=<ID=CN,Number=1,Type=Float,Description="Copy number">
##FORMAT=<ID=CICN,Number=2,Type=Float,Description="Confidence interval around copy number">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096
1 70 rs180734498 C <CNV:TR> 100 PASS SVLEN=10 GT 0|0
1 80 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RN=2;RUL=3 GT 0|0
1 90 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUS=TG,TA GT 0|0
1 90 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RN=2;RUL=2 GT 0|0
1 100 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RN=1;RUS=T,A GT 0|0
1 110 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RN=1;RUL=2;RUS=T,A GT 0|0
1 120 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RN=1;RUL=2;RUS=. GT 0|0
39 changes: 39 additions & 0 deletions test/input_files/v4.4/failed/failed_body_invalid_CNVTR_2.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
##fileformat=VCFv4.4
##CauseOfFailure=invalid info field on CNV:TR RN - RUC, RB, CIRUC, CIRB
##reference=testval
##contig=<ID=1,length=6243,assembly=B3,md5=f121111111618ff66beb2da,species="Homo sapiens",taxonomy=x>
##INFO=<ID=SVCLAIM,Number=A,Type=String,Description="Claim made by the structural variant call. Valid values are D, J, DJ...">
##INFO=<ID=SVLEN,Number=A,Type=Integer,Description="Length of structural variant">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##INFO=<ID=CN,Number=A,Type=Float,Description="Copy number of CNV/breakpoint">
##INFO=<ID=RN,Number=A,Type=Integer,Description="Total number of repeat sequences in this allele">
##INFO=<ID=RUS,Number=.,Type=String,Description="Repeat unit sequence of the corresponding repeat sequence">
##INFO=<ID=RUC,Number=.,Type=Float,Description="Repeat unit count of corresponding repeat sequence">
##INFO=<ID=RB,Number=.,Type=Integer,Description="Total number of bases in the corresponding repeat sequence">
##INFO=<ID=RUB,Number=.,Type=Integer,Description="Number of bases in each individual repeat unit">
##INFO=<ID=RUL,Number=.,Type=Integer,Description="Repeat unit length of the corresponding repeat sequence">
##INFO=<ID=CIRUC,Number=.,Type=Float,Description="Confidence interval around RUC">
##INFO=<ID=CIRB,Number=.,Type=Integer,Description="Confidence interval around RB">
##ALT=<ID=CNV:TR,Description="Tandem repeat determined based on DNA abundance">
##FORMAT=<ID=CN,Number=1,Type=Float,Description="Copy number">
##FORMAT=<ID=CICN,Number=2,Type=Float,Description="Confidence interval around copy number">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096
1 70 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUS=TT;RUC=2,2 GT 0|0
1 80 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RN=2;RUL=2,10;RUC=3 GT 0|0
1 90 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RN=2;RUL=2,10;RB=3 GT 0|0
1 100 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUS=AT;RB=30;CIRB=1 GT 0|0
1 110 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUS=AT;RB=30;CIRB=1,. GT 0|0
1 120 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUS=AT;RB=30;CIRB=.,-1 GT 0|0
1 130 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUS=AT;RB=30;CIRB=1,-1 GT 0|0
1 140 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUS=AT;RB=30;CIRB=-1,-1 GT 0|0
1 150 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUS=AT;RB=30;CIRB=-8,8,9 GT 0|0
1 160 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUS=AT,TT;RB=.,20;CIRB=-8,8,-9,0 GT 0|0
1 170 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUS=AT,TT;CIRB=-8,8 GT 0|0
1 200 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUS=AT;RUC=3.2;CIRUC=1 GT 0|0
1 210 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUS=AT;RUC=3.2;CIRUC=1,. GT 0|0
1 220 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUS=AT;RUC=3.2;CIRUC=.,-1 GT 0|0
1 230 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUS=AT;RUC=3.2;CIRUC=1,-1 GT 0|0
1 240 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUS=AT;RUC=3.2;CIRUC=-1,-1 GT 0|0
1 250 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUS=AT;RUC=3;CIRUC=-8,8,9 GT 0|0
1 260 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUS=AT,TT;RUC=.,2;CIRUC=-8,8,-9,0 GT 0|0
1 270 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUS=AT,TT;CIRUC=-8,8,-9,0 GT 0|0
23 changes: 23 additions & 0 deletions test/input_files/v4.4/failed/failed_body_invalid_CNVTR_3.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
##fileformat=VCFv4.4
##CauseOfFailure=invalid info field on CNV:TR RN - RUC, RB; warning: RB ~= RUC * RUL; error to make this fail: RUC not integer with RUB
##reference=testval
##contig=<ID=1,length=6243,assembly=B3,md5=f121111111618ff66beb2da,species="Homo sapiens",taxonomy=x>
##INFO=<ID=SVCLAIM,Number=A,Type=String,Description="Claim made by the structural variant call. Valid values are D, J, DJ...">
##INFO=<ID=SVLEN,Number=A,Type=Integer,Description="Length of structural variant">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##INFO=<ID=CN,Number=A,Type=Float,Description="Copy number of CNV/breakpoint">
##INFO=<ID=RN,Number=A,Type=Integer,Description="Total number of repeat sequences in this allele">
##INFO=<ID=RUS,Number=.,Type=String,Description="Repeat unit sequence of the corresponding repeat sequence">
##INFO=<ID=RUC,Number=.,Type=Float,Description="Repeat unit count of corresponding repeat sequence">
##INFO=<ID=RB,Number=.,Type=Integer,Description="Total number of bases in the corresponding repeat sequence">
##INFO=<ID=RUB,Number=.,Type=Integer,Description="Number of bases in each individual repeat unit">
##INFO=<ID=RUL,Number=.,Type=Integer,Description="Repeat unit length of the corresponding repeat sequence">
##INFO=<ID=CIRUC,Number=.,Type=Float,Description="Confidence interval around RUC">
##INFO=<ID=CIRB,Number=.,Type=Integer,Description="Confidence interval around RB">
##ALT=<ID=CNV:TR,Description="Tandem repeat determined based on DNA abundance">
##FORMAT=<ID=CN,Number=1,Type=Float,Description="Copy number">
##FORMAT=<ID=CICN,Number=2,Type=Float,Description="Confidence interval around copy number">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096
1 70 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUS=TT;RUC=2.3;RB=5 GT 0|0
1 80 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUL=2;RUC=2.3;RB=5 GT 0|0
1 90 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUL=2;RUC=2.3;RB=5;RUB=10 GT 0|0
24 changes: 24 additions & 0 deletions test/input_files/v4.4/failed/failed_body_invalid_CNVTR_4.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
##fileformat=VCFv4.4
##CauseOfFailure=invalid info field on CNV:TR RN - RUB, RUC
##reference=testval
##contig=<ID=1,length=6243,assembly=B3,md5=f121111111618ff66beb2da,species="Homo sapiens",taxonomy=x>
##INFO=<ID=SVCLAIM,Number=A,Type=String,Description="Claim made by the structural variant call. Valid values are D, J, DJ...">
##INFO=<ID=SVLEN,Number=A,Type=Integer,Description="Length of structural variant">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##INFO=<ID=CN,Number=A,Type=Float,Description="Copy number of CNV/breakpoint">
##INFO=<ID=RN,Number=A,Type=Integer,Description="Total number of repeat sequences in this allele">
##INFO=<ID=RUS,Number=.,Type=String,Description="Repeat unit sequence of the corresponding repeat sequence">
##INFO=<ID=RUC,Number=.,Type=Float,Description="Repeat unit count of corresponding repeat sequence">
##INFO=<ID=RB,Number=.,Type=Integer,Description="Total number of bases in the corresponding repeat sequence">
##INFO=<ID=RUB,Number=.,Type=Integer,Description="Number of bases in each individual repeat unit">
##INFO=<ID=RUL,Number=.,Type=Integer,Description="Repeat unit length of the corresponding repeat sequence">
##INFO=<ID=CIRUC,Number=.,Type=Float,Description="Confidence interval around RUC">
##INFO=<ID=CIRB,Number=.,Type=Integer,Description="Confidence interval around RB">
##ALT=<ID=CNV:TR,Description="Tandem repeat determined based on DNA abundance">
##FORMAT=<ID=CN,Number=1,Type=Float,Description="Copy number">
##FORMAT=<ID=CICN,Number=2,Type=Float,Description="Confidence interval around copy number">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096
1 70 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUS=TT;RUC=2.3;RUB=2 GT 0|0
1 80 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RN=2;RUS=TT,AA;RUC=.,2;RUB=2,2 GT 0|0
1 90 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUS=TT;RUC=2;RUB=2 GT 0|0
1 100 rs180734498 C <CNV:TR> 100 PASS SVLEN=10;RUS=TT;RUB=2 GT 0|0
13 changes: 13 additions & 0 deletions test/input_files/v4.4/failed/failed_body_invalid_SV_1.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
##fileformat=VCFv4.4
##CauseOfFailure=invalid SV
##reference=testval
##contig=<ID=1,length=6243,assembly=B3,md5=f121111111618ff66beb2da,species="Homo sapiens",taxonomy=x>
##INFO=<ID=SVCLAIM,Number=A,Type=String,Description="Claim made by the structural variant call. Valid values are D, J, DJ...">
##INFO=<ID=SVLEN,Number=A,Type=Integer,Description="Length of structural variant">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##ALT=<ID=DEL,Description="Deletion">
##ALT=<ID=INS>
##FILTER=<ID=F1,Description="Filter1">
##FILTER=<ID=F2>
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096
1 700 rs180734498 C <DAL:TRE> 100 PASS SVLEN=1;SVCLAIM=D GT 0|0
13 changes: 13 additions & 0 deletions test/input_files/v4.4/failed/failed_body_invalid_SV_2.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
##fileformat=VCFv4.4
##CauseOfFailure=invalid SV - SVLEN not present
##reference=testval
##contig=<ID=1,length=6243,assembly=B3,md5=f121111111618ff66beb2da,species="Homo sapiens",taxonomy=x>
##INFO=<ID=SVCLAIM,Number=A,Type=String,Description="Claim made by the structural variant call. Valid values are D, J, DJ...">
##INFO=<ID=SVLEN,Number=A,Type=Integer,Description="Length of structural variant">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##ALT=<ID=DEL,Description="Deletion">
##ALT=<ID=INS>
##FILTER=<ID=F1,Description="Filter1">
##FILTER=<ID=F2>
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096
1 700 rs180734498 C <DEL> 100 PASS SVCLAIM=D GT 0|0
17 changes: 17 additions & 0 deletions test/input_files/v4.4/failed/failed_body_invalid_SV_3.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
##fileformat=VCFv4.4
##CauseOfFailure=invalid SV - SVCLAIM not present for DEL/DUP
##reference=testval
##contig=<ID=1,length=6243,assembly=B3,md5=f121111111618ff66beb2da,species="Homo sapiens",taxonomy=x>
##INFO=<ID=SVCLAIM,Number=A,Type=String,Description="Claim made by the structural variant call. Valid values are D, J, DJ...">
##INFO=<ID=SVLEN,Number=A,Type=Integer,Description="Length of structural variant">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##ALT=<ID=DEL,Description="Deletion">
##ALT=<ID=DUP,Description="Duplication">
##ALT=<ID=INV,Description="Inversion">
##ALT=<ID=INS>
##FILTER=<ID=F1,Description="Filter1">
##FILTER=<ID=F2>
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096
1 700 rs180734498 C <INV> 100 PASS SVLEN=10 GT 0|0
1 700 rs180734498 C <DEL> 100 PASS SVLEN=10 GT 0|0
1 700 rs180734498 C <DUP> 100 PASS SVLEN=10 GT 0|0
21 changes: 21 additions & 0 deletions test/input_files/v4.4/failed/failed_body_invalid_SV_4.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
##fileformat=VCFv4.4
##CauseOfFailure=invalid SV - CNV:TR must have RUS/RUL
##reference=testval
##contig=<ID=1,length=6243,assembly=B3,md5=f121111111618ff66beb2da,species="Homo sapiens",taxonomy=x>
##INFO=<ID=SVCLAIM,Number=A,Type=String,Description="Claim made by the structural variant call. Valid values are D, J, DJ...">
##INFO=<ID=SVLEN,Number=A,Type=Integer,Description="Length of structural variant">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##INFO=<ID=CN,Number=A,Type=Float,Description="Copy number of CNV/breakpoint">
##INFO=<ID=RN,Number=A,Type=Integer,Description="Total number of repeat sequences in this allele">
##INFO=<ID=RUS,Number=.,Type=String,Description="Repeat unit sequence of the corresponding repeat sequence">
##INFO=<ID=RUC,Number=.,Type=Float,Description="Repeat unit count of corresponding repeat sequence">
##INFO=<ID=RB,Number=.,Type=Integer,Description="Total number of bases in the corresponding repeat sequence">
##INFO=<ID=RUB,Number=.,Type=Integer,Description="Number of bases in each individual repeat unit">
##INFO=<ID=RUL,Number=.,Type=Integer,Description="Repeat unit length of the corresponding repeat sequence">
##ALT=<ID=CNV:TR,Description="Tandem repeat determined based on DNA abundance">
##FORMAT=<ID=CN,Number=1,Type=Float,Description="Copy number">
##FORMAT=<ID=CICN,Number=2,Type=Float,Description="Confidence interval around copy number">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096
1 700 rs180734498 C <CNV:TR> 100 PASS CN=100;SVLEN=1;SVCLAIM=D;RUS=TTT GT 0|0
1 800 rs180734498 C <CNV:TR> 100 PASS CN=100;SVLEN=1;SVCLAIM=D;RUL=3 GT 0|0
1 900 rs180734498 C <CNV:TR> 100 PASS CN=100;SVLEN=1;SVCLAIM=D GT 0|0
22 changes: 22 additions & 0 deletions test/input_files/v4.4/failed/failed_body_invalid_SV_5.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
##fileformat=VCFv4.4
##CauseOfFailure=invalid SV - CNV:TR - RB ~= RUL * RUC
##reference=testval
##contig=<ID=1,length=6243,assembly=B3,md5=f121111111618ff66beb2da,species="Homo sapiens",taxonomy=x>
##INFO=<ID=SVCLAIM,Number=A,Type=String,Description="Claim made by the structural variant call. Valid values are D, J, DJ...">
##INFO=<ID=SVLEN,Number=A,Type=Integer,Description="Length of structural variant">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##INFO=<ID=CN,Number=A,Type=Float,Description="Copy number of CNV/breakpoint">
##INFO=<ID=RN,Number=A,Type=Integer,Description="Total number of repeat sequences in this allele">
##INFO=<ID=RUS,Number=.,Type=String,Description="Repeat unit sequence of the corresponding repeat sequence">
##INFO=<ID=RUC,Number=.,Type=Float,Description="Repeat unit count of corresponding repeat sequence">
##INFO=<ID=RB,Number=.,Type=Integer,Description="Total number of bases in the corresponding repeat sequence">
##INFO=<ID=RUB,Number=.,Type=Integer,Description="Number of bases in each individual repeat unit">
##INFO=<ID=RUL,Number=.,Type=Integer,Description="Repeat unit length of the corresponding repeat sequence">
##ALT=<ID=CNV:TR,Description="Tandem repeat determined based on DNA abundance">
##FORMAT=<ID=CN,Number=1,Type=Float,Description="Copy number">
##FORMAT=<ID=CICN,Number=2,Type=Float,Description="Confidence interval around copy number">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096
1 700 rs180734498 C <CNV:TR> 100 PASS CN=100;SVLEN=1;SVCLAIM=D;RUS=TTT;RUC=30;RB=100 GT 0|0
1 800 rs180734498 C <CNV:TR> 100 PASS CN=100;SVLEN=1;SVCLAIM=D;RUL=3;RUC=30;RB=100 GT 0|0
1 900 rs180734498 C <CNV:TR> 100 PASS CN=100;SVLEN=1;SVCLAIM=D;RUS=TTT;RUL=2 GT 0|0
1 1000 rs180734498 C <CNV:TR> 100 PASS CN=100;SVLEN=1;SVCLAIM=D;RUS=TTT;RUL=3 GT 0|0
10 changes: 10 additions & 0 deletions test/input_files/v4.4/failed/failed_body_invalid_SV_6.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
##fileformat=VCFv4.4
##CauseOfFailure=invalid SV - BND
##reference=testval
##contig=<ID=1,length=6243,assembly=B3,md5=f121111111618ff66beb2da,species="Homo sapiens",taxonomy=x>
##INFO=<ID=SVLEN,Number=A,Type=Integer,Description="Length of structural variant">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##ALT=<ID=BND:Test,Description="Breakend SV acceptable in v4.3">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096
1 700 rs180734498 C <BND:Test> 100 PASS CN=100;SVLEN=1 GT 0|0

21 changes: 21 additions & 0 deletions test/input_files/v4.4/failed/failed_body_invalid_format_1.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
##fileformat=VCFv4.4
##CauseOfFailure=invalid format fields, with CICN, format CN must be present
##reference=testval
##contig=<ID=1,length=6243,assembly=B3,md5=f121111111618ff66beb2da,species="Homo sapiens",taxonomy=x>
##INFO=<ID=SVCLAIM,Number=A,Type=String,Description="Claim made by the structural variant call. Valid values are D, J, DJ...">
##INFO=<ID=SVLEN,Number=A,Type=Integer,Description="Length of structural variant">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##INFO=<ID=CN,Number=A,Type=Float,Description="Copy number of CNV/breakpoint">
##INFO=<ID=RN,Number=A,Type=Integer,Description="Total number of repeat sequences in this allele">
##INFO=<ID=RUS,Number=.,Type=String,Description="Repeat unit sequence of the corresponding repeat sequence">
##INFO=<ID=RUC,Number=.,Type=Float,Description="Repeat unit count of corresponding repeat sequence">
##INFO=<ID=RB,Number=.,Type=Integer,Description="Total number of bases in the corresponding repeat sequence">
##INFO=<ID=RUB,Number=.,Type=Integer,Description="Number of bases in each individual repeat unit">
##INFO=<ID=RUL,Number=.,Type=Integer,Description="Repeat unit length of the corresponding repeat sequence">
##ALT=<ID=CNV:TR,Description="Tandem repeat determined based on DNA abundance">
##FORMAT=<ID=CN,Number=1,Type=Float,Description="Copy number">
##FORMAT=<ID=CICN,Number=2,Type=Float,Description="Confidence interval around copy number">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096
1 700 rs180734498 C <CNV:TR> 100 PASS CN=100;SVLEN=1;SVCLAIM=D;RUS=TTT GT:CN 0|0:2
1 800 rs180734498 C <CNV:TR> 100 PASS CN=100;SVLEN=1;SVCLAIM=D;RUL=3 GT:CN:CICN 0|0:2:-1,1
1 900 rs180734498 C <CNV:TR> 100 PASS CN=100;SVLEN=1;SVCLAIM=D;RUL=3 GT:CICN 0|0:-1,2
17 changes: 17 additions & 0 deletions test/input_files/v4.4/failed/failed_body_invalid_format_2.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
##fileformat=VCFv4.4
##CauseOfFailure=invalid format fields, SVLEN must be same for DEL/DUP/CNV when format CN is present
##reference=testval
##contig=<ID=1,length=6243,assembly=B3,md5=f121111111618ff66beb2da,species="Homo sapiens",taxonomy=x>
##INFO=<ID=SVCLAIM,Number=A,Type=String,Description="Claim made by the structural variant call. Valid values are D, J, DJ...">
##INFO=<ID=SVLEN,Number=A,Type=Integer,Description="Length of structural variant">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##INFO=<ID=CN,Number=A,Type=Float,Description="Copy number of CNV/breakpoint">
##ALT=<ID=CNV,Description="CNV">
##ALT=<ID=DEL,Description="DEL">
##ALT=<ID=DUP,Description="DUP">
##FORMAT=<ID=CN,Number=1,Type=Float,Description="Copy number">
##FORMAT=<ID=CICN,Number=2,Type=Float,Description="Confidence interval around copy number">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096
1 700 rs180734498 C <DEL>,T,<DUP> 100 PASS SVLEN=1,.,2;SVCLAIM=D,.,D GT:CN 0|0:2
1 800 rs180734498 C <CNV>,T,<DEL> 100 PASS SVLEN=2,.,1;SVCLAIM=D,.,DJ GT:CN:CICN 0|0:2:-1,1
1 900 rs180734498 C <CNV>,T,<DUP> 100 PASS SVLEN=1,.,21;SVCLAIM=D,.,J GT:CN:CICN 0|0:2:-1,2
Loading