Skip to content

Commit

Permalink
Working DAG with proper grouping (fixed to 001 for now)
Browse files Browse the repository at this point in the history
  • Loading branch information
Ulthran committed Feb 5, 2024
1 parent 43799ae commit e441906
Show file tree
Hide file tree
Showing 5 changed files with 200 additions and 151 deletions.
3 changes: 1 addition & 2 deletions config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,4 @@ sbx_demic:
demic_threads: 4
single_genome: false # Switch to true if your samples only contain a single species
extras: "" # Parameters passed to DEMIC.pl
ref_fp: "" # Path to reference genome
operams_db_fp: "" # Path to Opera-MS database
ref_fp: "" # Path to reference genome
9 changes: 1 addition & 8 deletions envs/demic_ref_env.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,5 @@ name: demic_ref_env
channels:
- conda-forge
- bioconda
#- compbiocore
dependencies:
- python =3.9
- perl-app-cpanminus # for installing perl modules in Opera-MS
- perl #=5.26.2
#- perl-switch
#- bioconda::perl-file-which
#- bioconda::perl-statistics-basic
#- bioconda::perl-statistics-r
- spades
6 changes: 2 additions & 4 deletions sbx_demic.smk
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ rule maxbin:

rule bowtie2_build:
input:
COASSEMBLY_DEMIC_FP / "split",
COASSEMBLY_DEMIC_FP / "max_bin" / "max_bin",
output:
touch(COASSEMBLY_DEMIC_FP / ".indexed"),
threads: Cfg["sbx_demic"]["demic_threads"]
Expand All @@ -229,7 +229,7 @@ rule bowtie2_build:

rule bowtie2:
input:
bin_dir=COASSEMBLY_DEMIC_FP / "split",
bin_dir=COASSEMBLY_DEMIC_FP / "max_bin" / "max_bin",
reads=expand(
QC_FP / "decontam" / "{sample}_{rp}.fastq.gz",
sample=Samples.keys(),
Expand All @@ -255,8 +255,6 @@ rule samtools_sort:
threads: Cfg["sbx_demic"]["demic_threads"]
conda:
"envs/demic_bio_env.yml"
log:
str(DEMIC_FP / "logs" / "samtools.error"),
script:
"scripts/samtools_sort.py"

Expand Down
325 changes: 188 additions & 137 deletions sbx_demic_ref.smk
Original file line number Diff line number Diff line change
@@ -1,137 +1,188 @@
try:
BENCHMARK_FP
except NameError:
BENCHMARK_FP = output_subdir(Cfg, "benchmarks")
try:
LOG_FP
except NameError:
LOG_FP = output_subdir(Cfg, "logs")


DEMIC_FP = MAPPING_FP / "demic"
DEMIC_REF_FP = DEMIC_FP / "ref"


def get_demic_path() -> Path:
for fp in sys.path:
if fp.split("/")[-1] == "sbx_demic":
return Path(fp)
raise Error(
"Filepath for demic not found, are you sure it's installed under extensions/sbx_demic?"
)


localrules:
all_demic_ref,


rule all_demic_ref:
input:
#DEMIC_REF_FP / "all_PTR.txt"
expand(DEMIC_REF_FP / "OPERA_MS" / "{sample}" / "contigs.fasta", sample=Samples.keys())


rule install_opera_ms:
output:
DEMIC_REF_FP / ".installed"
params:
demic_fp=str(get_demic_path()),
conda:
"envs/demic_ref_env.yml"
shell:
"""
cd {params.demic_fp}
if [ ! -d "OPERA-MS" ]; then
git clone https://github.com/CSB5/OPERA-MS.git
fi
cd OPERA-MS
make
perl tools_opera_ms/install_perl_module.pl
perl OPERA-MS.pl check-dependency
"""


rule run_opera_ms:
input:
#DEMIC_REF_FP / ".installed",
reads=expand(QC_FP / "decontam" / "{{sample}}_{rp}.fastq.gz", rp=Pairs),
contigs=Cfg["sbx_demic"]["ref_fp"],
output:
DEMIC_REF_FP / "OPERA_MS" / "{sample}" / "contigs.fasta",
params:
demic_fp=str(get_demic_path()),
threads=Cfg["sbx_demic"]["demic_threads"],
#conda:
# "envs/demic_ref_env.yml"
container:
"docker://lorenzgerber/operams"
threads: 8
shell:
"""
cd {params.demic_fp}/OPERA-MS
perl OPERA-MS.pl --num-processors {params.threads} --long-read {input.contigs} --short-read1 {input.reads[0]} --short-read2 {input.reads[1]} --out-dir {output} --no-polishing
"""







rule run_pycov3_ref:
input:
DEMIC_FP / "sorted",
#COASSEMBLY_DEMIC_FP / "max_bin" / "max_bin",
output:
directory(DEMIC_FP / "pycov3"),
params:
sam_dir=str(DEMIC_FP / "sorted"),
#fasta_dir=str(COASSEMBLY_DEMIC_FP / "max_bin"),
extras=Cfg["sbx_demic"]["extras"],
threads: Cfg["sbx_demic"]["demic_threads"]
resources:
mem_mb=20000,
runtime=720,
conda:
"envs/demic_bio_env.yml"
log:
LOG_FP / "run_pycov3.log",
benchmark:
BENCHMARK_FP / "run_pycov3_ref.txt",
shell:
"""
pycov3 -S {params.sam_dir} -F {params.fasta_dir} -O {output} -X {params.extras} 2>&1 | tee {log}
"""
rule run_demic_ref:
input:
input=DEMIC_FP / "pycov3",
installed=DEMIC_FP / ".installed",
output:
out=directory(DEMIC_FP / "DEMIC_OUT"),
threads: Cfg["sbx_demic"]["demic_threads"]
resources:
mem_mb=20000,
runtime=720,
conda:
"envs/demic_env.yml"
log:
LOG_FP / "run_demic.log",
script:
"scripts/run_demic.R"
rule aggregate_demic_ref:
input:
DEMIC_FP / "DEMIC_OUT",
output:
DEMIC_REF_FP / "all_PTR.txt",
shell:
"""
echo "sample\testPTR\tcoefficient\tpValue\tcor\tcorrectY" > {output}
tail -n +1 {input}/*.ptr >> {output}
"""
try:
BENCHMARK_FP
except NameError:
BENCHMARK_FP = output_subdir(Cfg, "benchmarks")
try:
LOG_FP
except NameError:
LOG_FP = output_subdir(Cfg, "logs")


DEMIC_FP = MAPPING_FP / "demic"
DEMIC_REF_FP = DEMIC_FP / "ref"


def get_demic_path() -> Path:
for fp in sys.path:
if fp.split("/")[-1] == "sbx_demic":
return Path(fp)
raise Error(
"Filepath for demic not found, are you sure it's installed under extensions/sbx_demic?"
)


localrules:
all_demic_ref,


rule all_demic_ref:
input:
all=DEMIC_REF_FP / "all_PTR.txt",
contig=DEMIC_REF_FP / "contig_PTR.txt",
sample=DEMIC_REF_FP / "sample_PTR.txt",


rule spades_assemble_with_ref:
input:
r1=expand(QC_FP / "decontam" / "{sample}_1.fastq.gz", sample=Samples.keys()),
r2=expand(QC_FP / "decontam" / "{sample}_2.fastq.gz", sample=Samples.keys()),
contigs=Cfg["sbx_demic"]["ref_fp"],
output:
r1=temp(ASSEMBLY_FP / "demic_ref" / "contigs" / "spades.001.1.fastq"),
r2=temp(ASSEMBLY_FP / "demic_ref" / "contigs" / "spades.001.2.fastq"),
r1s=temp(expand(QC_FP / "decontam" / "{sample}_1.fastq", sample=Samples.keys())),
r2s=temp(expand(QC_FP / "decontam" / "{sample}_2.fastq", sample=Samples.keys())),
contigs=ASSEMBLY_FP / "demic_ref" / "contigs" / "spades.001.fasta",
params:
output_dir=str(ASSEMBLY_FP / "demic_ref"),
threads: 8
conda:
"envs/demic_ref_env.yml"
shell:
"""
gzip -dk {input.r1}
gzip -dk {input.r2}
cat {output.r1s} > {output.r1}
cat {output.r2s} > {output.r2}
spades.py -1 {output.r1} -2 {output.r2} -o {params.output_dir} -t {threads} --trusted-contigs {input.contigs}
mv {params.output_dir}/contigs.fasta {output.contigs}
"""


rule bowtie2_build_demic_ref:
input:
ASSEMBLY_FP / "demic_ref" / "contigs" / "spades.001.fasta",
output:
ASSEMBLY_FP / "demic_ref" / "contigs" / "spades.001.fasta.1.bt2",
threads: Cfg["sbx_demic"]["demic_threads"]
conda:
"envs/demic_bio_env.yml"
shell:
"""
bowtie2-build --threads {threads} {input} {input}
"""


rule bowtie2_demic_ref:
input:
contigs=ASSEMBLY_FP / "demic_ref" / "contigs" / "spades.001.fasta",
indexes=ASSEMBLY_FP / "demic_ref" / "contigs" / "spades.001.fasta.1.bt2",
reads=expand(
QC_FP / "decontam" / "{{sample}}_{rp}.fastq.gz",
rp=Pairs,
),
output:
DEMIC_REF_FP / "raw" / "{sample}_001.sam",
threads: Cfg["sbx_demic"]["demic_threads"]
params:
reads_dir=str(QC_FP / "decontam"),
conda:
"envs/demic_bio_env.yml"
shell:
"""
bowtie2 -q -x {input.contigs} -1 {input.reads[0]} -2 {input.reads[1]} -p {threads} -S {output}
"""


rule samtools_sort_demic_ref:
input:
DEMIC_REF_FP / "raw" / "{sample}_001.sam",
output:
temp_files=temp(DEMIC_REF_FP / "sorted" / "{sample}_001.bam"),
sorted_files=DEMIC_REF_FP / "sorted" / "{sample}_001.sam",
threads: Cfg["sbx_demic"]["demic_threads"]
conda:
"envs/demic_bio_env.yml"
shell:
"""
samtools view -@ {threads} -bS {input} | samtools sort -@ {threads} - -o {output.temp_files}
samtools view -@ {threads} -h {output.temp_files} > {output.sorted_files}
"""


rule run_pycov3_ref:
input:
sams=expand(DEMIC_REF_FP / "sorted" / "{sample}_001.sam", sample=Samples.keys()),
contigs=ASSEMBLY_FP / "demic_ref" / "contigs" / "spades.001.fasta",
output:
DEMIC_REF_FP / "pycov3" / "spades.001.cov3",
params:
sam_dir=str(DEMIC_REF_FP / "sorted"),
fasta_dir=str(ASSEMBLY_FP / "demic_ref" / "contigs"),
output_dir=str(DEMIC_REF_FP / "pycov3"),
extras=Cfg["sbx_demic"]["extras"],
threads: Cfg["sbx_demic"]["demic_threads"]
resources:
mem_mb=20000,
runtime=720,
conda:
"envs/demic_bio_env.yml"
log:
LOG_FP / "run_pycov3_ref.log",
benchmark:
BENCHMARK_FP / "run_pycov3_ref.txt"
shell:
"""
pycov3 -S {params.sam_dir} -F {params.fasta_dir} -O {params.output_dir} -X {params.extras} 2>&1 | tee {log}
"""


rule run_demic_ref:
input:
input=DEMIC_REF_FP / "pycov3" / "spades.001.cov3",
#installed=DEMIC_FP / ".installed",
output:
all=DEMIC_REF_FP / "DEMIC_OUT" / "spades.001.all.ptr",
contig=DEMIC_REF_FP / "DEMIC_OUT" / "spades.001.contig.ptr",
sample=DEMIC_REF_FP / "DEMIC_OUT" / "spades.001.sample.ptr",
params:
output_dir=str(DEMIC_REF_FP / "DEMIC_OUT"),
threads: Cfg["sbx_demic"]["demic_threads"]
resources:
mem_mb=20000,
runtime=720,
conda:
"envs/demic_env.yml"
log:
LOG_FP / "run_demic.log",
script:
"scripts/run_demic_ref.R"


rule aggregate_demic_ref:
input:
all=expand(DEMIC_REF_FP / "DEMIC_OUT" / "spades.001.all.ptr", sample=Samples.keys()),
contig=expand(
DEMIC_REF_FP / "DEMIC_OUT" / "spades.001.contig.ptr", sample=Samples.keys()
),
sample=expand(
DEMIC_REF_FP / "DEMIC_OUT" / "spades.001.sample.ptr", sample=Samples.keys()
),
output:
all=DEMIC_REF_FP / "all_PTR.txt",
contig=DEMIC_REF_FP / "contig_PTR.txt",
sample=DEMIC_REF_FP / "sample_PTR.txt",
params:
dir=DEMIC_REF_FP / "DEMIC_OUT",
shell:
"""
echo "sample\testPTR\tcoefficient\tpValue\tcor\tcorrectY" | tee {output.all} {output.contig} {output.sample} > /dev/null
tail -n +1 {params.dir}/*.all.ptr >> {output.all}
tail -n +1 {params.dir}/*.contig.ptr >> {output.contig}
tail -n +1 {params.dir}/*.sample.ptr >> {output.sample}
"""
8 changes: 8 additions & 0 deletions scripts/run_demic_ref.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
dir.create(snakemake@params[['output_dir']])

X <- read.csv(snakemake@input[['input']], stringsAsFactors = TRUE)
O <- demic::est_ptr(X)

write.table(O$all_ptr, snakemake@output[['all']], sep="\t", quote=FALSE, col.names=FALSE)
write.table(O$contigs_ptr, snakemake@output[['contig']], sep="\t", quote=FALSE, col.names=FALSE)
write.table(O$samples_ptr, snakemake@output[['sample']], sep="\t", quote=FALSE, col.names=FALSE)

0 comments on commit e441906

Please sign in to comment.