Skip to content

Commit

Permalink
fix: applying testing fixes for disambiguate feat
Browse files Browse the repository at this point in the history
  • Loading branch information
Ryan Routsong committed Jan 11, 2024
1 parent e857d0f commit 539ed71
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 36 deletions.
30 changes: 28 additions & 2 deletions scripts/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
# ~~~~~~~~~~~~~~~
# file system helper functions for the Dmux software package
# ~~~~~~~~~~~~~~~
from pathlib import Path
import xml.etree.ElementTree as ET
from pathlib import Path
from os import access as check_access, R_OK, W_OK
from functools import partial
from .samplesheet import IllumniaSampleSheet
from .config import get_current_server, LABKEY_CONFIGS, DIRECTORY_CONFIGS
from .config import get_current_server, GENOME_CONFIGS, DIRECTORY_CONFIGS


def get_all_seq_dirs(top_dir, server):
Expand Down Expand Up @@ -48,6 +48,32 @@ def valid_run_output(output_directory, dry_run=False):
return output_directory


def valid_fasta(suspect):
server_genomes = GENOME_CONFIGS[get_current_server()]
is_valid = False
if suspect.lower() in server_genomes:
is_valid = True
suspect = server_genomes[suspect.lower()]
else:
the_suspect = Path(suspect)
exts = the_suspect.suffixes
if any([
'.fna' in exts,
'.fa' in exts,
'.fasta' in exts,
'.fna' in exts and '.gz' in exts,
'.fa' in exts and '.gz' in exts,
'.fasta' in exts and '.gz' in exts,
]):
is_valid = True
suspect = str(Path(suspect).absolute())

if not is_valid:
raise ValueError

return suspect


def get_all_staged_dirs(top_dir, server):
return list(filter(partial(is_dir_staged, server), get_all_seq_dirs(top_dir, server)))

Expand Down
14 changes: 4 additions & 10 deletions scripts/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ def exec_pipeline(configs, dry_run=False, local=False):
if not bclcon_log_dir.exists():
bclcon_log_dir.mkdir(mode=0o755, parents=True)
extra_to_mount.append(str(bclcon_log_dir) + ":" + "/var/log/bcl-convert:rw")
if this_config['disambiguate']:
if this_config.get('disambiguate', False):
extra_to_mount.append(Path(this_config['host_genome']).parent)
extra_to_mount.append(Path(this_config['pathogen_genome']).parent)
singularity_binds = get_mounts(*extra_to_mount)
Expand Down Expand Up @@ -301,21 +301,15 @@ def valid_host_pathogen_genomes(host, pathogen):
if Path(host).absolute().exists():
g1 = True
host = str(Path(host).absolute())
elif host.lower() in genomes:
g1 = True
host = genomes[host.lower()]

if Path(g2).absolute().exists():
if Path(pathogen).absolute().exists():
g2 = True
pathogen = str(Path(pathogen).absolute())
elif pathogen.lower() in genomes:
g2 = True
pathogen = genomes[pathogen.lower()]

if not all([g1, g2]):
if not g1:
raise ValueError('Host genome does not exist on the file system or in the aliases.')
raise ValueError('Host genome does not exist on the file system.')
if not g2:
raise ValueError('Pathogen genome does not exist on the file system or in the aliases.')
raise ValueError('Pathogen genome does not exist on the file system.')

return host, pathogen
17 changes: 11 additions & 6 deletions weave
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,18 @@ def run(args):
exec_config['demux_data'].append(files.check_if_demuxed(rundir))

# ~~~ disambiguate genome configuration ~~~
exec_config['host_genome'] = None
exec_config['pathogen_genome'] = None
if all([args.host, args.pathogen]):
if 'disambiguate' not in exec_config:
exec_config['disambiguate'] = []
if 'host_genome' not in exec_config:
exec_config['host_genome'] = []
if 'pathogen_genome' not in exec_config:
exec_config['pathogen_genome'] = []

utils.valid_host_pathogen_genomes(args.host, args.pathogen)
exec_config['disambiguate'] = True
exec_config['host_genome'] = config.get_genome_path(args.host)
exec_config['pathogen_genome'] = config.get_genome_path(args.pathogen)
exec_config['disambiguate'].append(True)
exec_config['host_genome'].append(args.host)
exec_config['pathogen_genome'].append(args.pathogen)
else:
assert(not any([args.host, args.pathogen])), 'Must specify both host and pathogen genometype!'

Expand Down Expand Up @@ -99,7 +104,7 @@ if __name__ == '__main__':
help='Execute pipeline locally without a dispatching executor')

# disambiguate arguments
parser_run.add_argument('-h', '--host', type=files.valid_fasta, default=None,
parser_run.add_argument('-t', '--host', type=files.valid_fasta, default=None,
help='Execute pipeline locally without a dispatching executor.')
alias_table = "\n" + utils.get_alias_table() + "\n"
parser_run.add_argument('-p', '--pathogen', type=files.valid_fasta, default=None,
Expand Down
2 changes: 1 addition & 1 deletion workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ else:
if config["runqc"]:
all_outputs.extend(qa_qc_outputs)

if config['disambiguate']:
if config.get('disambiguate', False):
all_outputs.extend(flatten([
expand("{out_to}/{project}/{sids}/disambiguate/{sids}.ambiguousSpeciesA.bam", **demux_expand_args),
expand("{out_to}/{project}/{sids}/disambiguate/{sids}.ambiguousSpeciesB.bam", **demux_expand_args),
Expand Down
34 changes: 17 additions & 17 deletions workflow/qc.smk
Original file line number Diff line number Diff line change
Expand Up @@ -55,14 +55,14 @@ rule fastqc_trimmed:

rule bwa:
input:
in_read1 = config["out_to"] + "/" + config["project"] + "/{sids}/fastp/{sids}_trimmed_R1.fastq.gz" if config['disambiguate'] else [],
in_read2 = config["out_to"] + "/" + config["project"] + "/{sids}/fastp/{sids}_trimmed_R2.fastq.gz" if config['disambiguate'] and len(config['rnums']) == 2 else [],
in_read1 = config["out_to"] + "/" + config["project"] + "/{sids}/fastp/{sids}_trimmed_R1.fastq.gz" if config.get('disambiguate', False) else [],
in_read2 = config["out_to"] + "/" + config["project"] + "/{sids}/fastp/{sids}_trimmed_R2.fastq.gz" if config.get('disambiguate', False) and len(config['rnums']) == 2 else [],
output:
aligntoA = config["out_to"] + "/" + config["project"] + "/{sids}/disambiguate/{sids}.AligntoGenomeA.bam"
aligntoB = config["out_to"] + "/" + config["project"] + "/{sids}/disambiguate/{sids}.AligntoGenomeB.bam"
aligntoA = config["out_to"] + "/" + config["project"] + "/{sids}/disambiguate/{sids}.AligntoGenomeA.bam",
aligntoB = config["out_to"] + "/" + config["project"] + "/{sids}/disambiguate/{sids}.AligntoGenomeB.bam",
params:
host_genome = config['host_genome']
path_genome = config['pathogen_genome']
host_genome = config.get('host_genome', ''),
path_genome = config.get('pathogen_genome', ''),
threads: 32
resources: mem_mb = 64768
containerized: config["resources"]["sif"] + "weave_ngsqc_0.0.2.sif"
Expand All @@ -76,19 +76,19 @@ rule bwa:

rule disambiguate:
input:
aligntoA = config["out_to"] + "/" + config["project"] + "/{sids}/disambiguate/{sids}.AligntoGenomeA.bam"
aligntoB = config["out_to"] + "/" + config["project"] + "/{sids}/disambiguate/{sids}.AligntoGenomeB.bam"
aligntoA = config["out_to"] + "/" + config["project"] + "/{sids}/disambiguate/{sids}.AligntoGenomeA.bam",
aligntoB = config["out_to"] + "/" + config["project"] + "/{sids}/disambiguate/{sids}.AligntoGenomeB.bam",
output:
ambiguousA = config["out_to"] + "/" + config["project"] + "/{sids}/disambiguate/{sids}.ambiguousSpeciesA.bam"
ambiguousB = config["out_to"] + "/" + config["project"] + "/{sids}/disambiguate/{sids}.ambiguousSpeciesB.bam"
disambiguousA = config["out_to"] + "/" + config["project"] + "/{sids}/disambiguate/{sids}.disambiguatedSpeciesA.bam"
disambiguousA = config["out_to"] + "/" + config["project"] + "/{sids}/disambiguate/{sids}.disambiguatedSpeciesB.bam"
ambiguousA = config["out_to"] + "/" + config["project"] + "/{sids}/disambiguate/{sids}_summary.txt"
ambiguousA = config["out_to"] + "/" + config["project"] + "/{sids}/disambiguate/{sids}.ambiguousSpeciesA.bam",
ambiguousB = config["out_to"] + "/" + config["project"] + "/{sids}/disambiguate/{sids}.ambiguousSpeciesB.bam",
disambiguousA = config["out_to"] + "/" + config["project"] + "/{sids}/disambiguate/{sids}.disambiguatedSpeciesA.bam",
disambiguousB = config["out_to"] + "/" + config["project"] + "/{sids}/disambiguate/{sids}.disambiguatedSpeciesB.bam",
dis_summary = config["out_to"] + "/" + config["project"] + "/{sids}/disambiguate/{sids}_summary.txt",
params:
host_genome = config['host_genome']
path_genome = config['pathogen_genome']
this_sid = lambda wc: wc.sids
out_dir = config["out_to"] + "/" + config["project"] + "/{sids}/disambiguate/"
host_genome = config.get('host_genome', ''),
path_genome = config.get('pathogen_genome', ''),
this_sid = lambda wc: wc.sids,
out_dir = config["out_to"] + "/" + config["project"] + "/{sids}/disambiguate/",
containerized: config["resources"]["sif"] + "ngs_disambiguate_2018.05.03.sif"
log: config['out_to'] + "/logs/" + config["project"] + "/disambiguate/{sids}.log"
threads: 32
Expand Down

0 comments on commit 539ed71

Please sign in to comment.