generated from sib-swiss/course_website_template
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Geert van Geest
committed
Nov 29, 2024
1 parent
3058976
commit 1956fcb
Showing
9 changed files
with
159 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
|
||
mkdir -p data/resources | ||
|
||
cd data/resources | ||
|
||
# panel of normals | ||
|
||
aws s3 \ | ||
--no-sign-request --region eu-west-1 \ | ||
cp \ | ||
s3://ngi-igenomes/igenomes/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz \ | ||
. | ||
|
||
aws s3 \ | ||
--no-sign-request --region eu-west-1 \ | ||
cp \ | ||
s3://ngi-igenomes/igenomes/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz.tbi \ | ||
. | ||
|
||
cd ../ | ||
mkdir reference | ||
|
||
cd reference | ||
|
||
# exome intervals | ||
wget https://genomedata.org/pmbio-workshop/references/exome/chr6_and_chr17/exome_regions.bed | ||
wget https://genomedata.org/pmbio-workshop/references/exome/chr6_and_chr17/exome_regions.bed.interval_list | ||
|
||
# reference genome | ||
wget https://genomedata.org/pmbio-workshop/references/genome/chr6_and_chr17/ref_genome.tar | ||
tar xvf ref_genome.tar | ||
rm ref_genome.tar | ||
|
||
# reads | ||
cd ../ | ||
mkdir reads | ||
cd reads | ||
|
||
wget https://genomedata.org/pmbio-workshop/fastqs/chr6_and_chr17/Exome_Norm.tar | ||
wget https://genomedata.org/pmbio-workshop/fastqs/chr6_and_chr17/Exome_Tumor.tar | ||
|
||
tar xvf Exome_Norm.tar | ||
rm Exome_Norm.tar | ||
tar xvf Exome_Tumor.tar | ||
rm Exome_Tumor.tar | ||
|
||
mv Exome_Norm/Exome_Norm_R1.fastq.gz normal_R1.fastq.gz | ||
mv Exome_Norm/Exome_Norm_R2.fastq.gz normal_R2.fastq.gz | ||
|
||
mv Exome_Tumor/Exome_Tumor_R1.fastq.gz tumor_R1.fastq.gz | ||
mv Exome_Tumor/Exome_Tumor_R2.fastq.gz tumor_R2.fastq.gz | ||
|
||
rm -r Exome_Norm | ||
rm -r Exome_Tumor | ||
|
||
# subset vcf | ||
cd ../resources | ||
bcftools view -Oz -r chr6,chr17 af-only-gnomad.hg38.vcf.gz > af-only-gnomad.hg38.subset.vcf.gz | ||
bcftools index --tbi af-only-gnomad.hg38.subset.vcf.gz | ||
|
||
bcftools view -Oz -r chr6,chr17 1000g_pon.hg38.vcf.gz > 1000g_pon.hg38.subset.vcf.gz | ||
bcftools index --tbi 1000g_pon.hg38.subset.vcf.gz |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
|
||
REFDIR=/config/data/reference/ | ||
|
||
mkdir -p "$ALIGNDIR" | ||
|
||
bwa index "$REFDIR"/ref_genome.fa |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
|
||
REFDIR=/config/data/reference/ | ||
READDIR=/config/data/reads | ||
ALIGNDIR=/config/data/alignments | ||
|
||
mkdir -p "$ALIGNDIR" | ||
|
||
for sample in tumor normal | ||
do | ||
bwa mem \ | ||
"$REFDIR"/ref_genome.fa \ | ||
"$READDIR"/"$sample"_R1.fastq.gz \ | ||
"$READDIR"/"$sample"_R2.fastq.gz \ | ||
2> "$ALIGNDIR"/$sample.bwa.log \ | ||
| samtools sort \ | ||
| samtools view -bh \ | ||
> "$ALIGNDIR"/"$sample".bam | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
#!/usr/bin/env bash | ||
|
||
ALIGNDIR=/config/data/alignments | ||
|
||
for sample in tumor normal | ||
do | ||
gatk AddOrReplaceReadGroups \ | ||
--INPUT "$ALIGNDIR"/"$sample".bam \ | ||
--OUTPUT "$ALIGNDIR"/"$sample".rg.bam \ | ||
--RGLB "$sample" \ | ||
--RGPU HWI-ST466.C1TD1ACXX \ | ||
--RGPL ILLUMINA \ | ||
--RGSM "$sample" \ | ||
--RGID HWI-ST466.C1TD1ACXX."$sample" | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
#!/usr/bin/env bash | ||
|
||
ALIGNDIR=/config/data/alignments | ||
|
||
for sample in tumor normal | ||
do | ||
gatk MarkDuplicates \ | ||
--INPUT "$ALIGNDIR"/"$sample".rg.bam \ | ||
--OUTPUT "$ALIGNDIR"/"$sample".rg.md.bam \ | ||
--METRICS_FILE "$ALIGNDIR"/marked_dup_metrics_"$sample".txt | ||
|
||
samtools index "$ALIGNDIR"/"$sample".rg.md.bam | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
#!/usr/bin/env bash | ||
|
||
ALIGNDIR=/config/data/alignments | ||
REFDIR=/config/data/reference | ||
RESOURCEDIR=/config/data/resources | ||
VARIANTDIR=/config/data/variants | ||
|
||
mkdir -p $VARIANTDIR | ||
|
||
gatk Mutect2 \ | ||
-R "$REFDIR"/ref_genome.fa \ | ||
--intervals "$REFDIR"/exome_regions.bed.interval_list \ | ||
-I "$ALIGNDIR"/tumor.rg.md.bam \ | ||
-I "$ALIGNDIR"/normal.rg.md.bam \ | ||
-normal normal \ | ||
--germline-resource "$RESOURCEDIR"/af-only-gnomad.hg38.subset.vcf.gz \ | ||
--panel-of-normals "$RESOURCEDIR"/1000g_pon.hg38.subset.vcf.gz \ | ||
-O "$VARIANTDIR"/somatic.vcf.gz |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
#!/usr/bin/env bash | ||
|
||
ALIGNDIR=/config/data/alignments | ||
RESOURCEDIR=/config/data/resources | ||
VARIANTDIR=/config/data/variants | ||
|
||
for sample in tumor normal | ||
do | ||
gatk GetPileupSummaries \ | ||
-I "$ALIGNDIR"/"$sample".rg.md.bam \ | ||
-V "$RESOURCEDIR"/af-only-gnomad.hg38.vcf.gz \ | ||
-L "$RESOURCEDIR"/af-only-gnomad.hg38.vcf.gz \ | ||
-O "$VARIANTDIR"/"$sample".pileups.table | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
|
||
#!/usr/bin/env bash | ||
|
||
ALIGNDIR=/config/data/alignments | ||
RESOURCEDIR=/config/data/resources | ||
|
||
gatk CalculateContamination \ | ||
-I "$VARIANTDIR"/tumor.pileups.table \ | ||
-matched "$VARIANTDIR"/normal.pileups.table \ | ||
-O "$VARIANTDIR"/ontamination.table |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
|
||
cd /Users/geertvangeest/Documents/repositories/cancer-variants-training | ||
docker run --rm -v $PWD:/config -p 8443:8443 geertvangeest/cancer-variants-vscode:latest |