Skip to content

Commit

Permalink
Merge pull request #13 from Eco-Flow/simon-dev
Browse files Browse the repository at this point in the history
Merge major overhaul to main branch
  • Loading branch information
SimonDMurray authored Jan 10, 2024
2 parents e8e6143 + c50e781 commit 934e980
Show file tree
Hide file tree
Showing 21 changed files with 142 additions and 259 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ nextflow
Go/
*fna
*.gff
reports/
17 changes: 0 additions & 17 deletions .gitpod.yml
Original file line number Diff line number Diff line change
@@ -1,20 +1,3 @@
github:
prebuilds:
# enable for the master/default branch (defaults to true)
master: true
# enable for all branches in this repo (defaults to false)
branches: true
# enable for pull requests coming from this repo (defaults to true)
pullRequests: true
# enable for pull requests coming from forks (defaults to false)
pullRequestsFromForks: true
# add a "Review in Gitpod" button as a comment to pull requests (defaults to true)
addComment: true
# add a "Review in Gitpod" button to pull requests (defaults to false)
addBadge: false
# add a label once the prebuild is ready to pull requests (defaults to false)
addLabel: prebuilt-in-gitpod

# List the start up tasks. Learn more https://www.gitpod.io/docs/config-start-tasks/
tasks:
- name: Download Nextflow Tutorial
Expand Down
1 change: 0 additions & 1 deletion bin/Trans_location_Inversion_score.pl
Original file line number Diff line number Diff line change
Expand Up @@ -265,4 +265,3 @@
}


`plotting-inversions.R > R_output.txt`
1 change: 0 additions & 1 deletion bin/Trans_location_Inversion_score_treeSort.pl
Original file line number Diff line number Diff line change
Expand Up @@ -265,4 +265,3 @@
}


`plotting-inversions-treeSort.R > R_output.txt`
2 changes: 0 additions & 2 deletions bin/plotting-inversions-treeSort.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
#!/opt/conda/bin/Rscript --vanilla

# import data
Bee_inver_trans_prot <- read.delim("Trans_location_version.out.txt", stringsAsFactors=FALSE)

Expand Down
2 changes: 0 additions & 2 deletions bin/plotting-inversions.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
#!/opt/conda/bin/Rscript --vanilla

# import data
Bee_inver_trans_prot <- read.delim("Trans_location_version.out.txt", stringsAsFactors=FALSE)

Expand Down
2 changes: 0 additions & 2 deletions bin/plotting-synteny_go.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
#!/opt/conda/bin/Rscript --vanilla

library(pheatmap)

erefd<-read.table("Go_summary_topSynteny.tsv", h=T, sep="\t")
Expand Down
21 changes: 4 additions & 17 deletions conf/docker.config
Original file line number Diff line number Diff line change
@@ -1,44 +1,31 @@
docker.runOptions='-u $(id -u):$(id -g)'

process {

errorStrategy = 'retry'
maxRetries = 5

withLabel: 'jcvi' {
container = 'chriswyatt/jcvi'
cpus = 1
echo = true
}

withLabel: 'chromo' {
container = 'chriswyatt/jcvi'
cpus = 1
echo = true
}

withLabel: 'gffread' {
container = 'chriswyatt/gffread_python3'
cpus = 1
echo = true
}

withLabel: 'syn' {
container = 'chriswyatt/jcvi'
cpus = 1
echo = true
}

withLabel: 'config' {
container = 'chriswyatt/jcvi'
cpus = 1
echo = true
}

withLabel: 'macro' {
container = 'chriswyatt/jcvi'
cpus = 1
echo = true
}

withLabel: 'download' {
container = 'chriswyatt/ncbi_datasets:version3.0'
cpus = 1
echo = true
}
Expand Down
71 changes: 17 additions & 54 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,14 @@

/*
* Authors:
* - Chris Wyatt <[email protected]>
* - Chris Wyatt <[email protected]>
* - Simon Murray <[email protected]>
*/

/*
* Default pipeline parameters (on test data). They can be overriden on the command line eg.
* given `params.name` specify on the run command line `--name My_run_v1`.
*/

params.outdir = "Results"
params.input = "data/Example.csv"
params.seqids = "./data/default1"
params.layout = "./data/default2"
params.hex = "data/unique_hex2"
params.go = null
params.test=0
params.tree= false

log.info """\
===================================
Expand All @@ -40,15 +32,12 @@ include { GFFREAD } from './modules/gffread.nf'
include { JCVI } from './modules/jcvi.nf'
include { SYNTENY } from './modules/synteny.nf'
include { DOWNLOAD_NCBI } from './modules/download_ncbi.nf'
include { DOWNLOAD_NCBI as DOWNLOAD_NCBI2 } from './modules/download_ncbi.nf'
include { CHROMOPAINT } from './modules/chromo.nf'
include { SCORE } from './modules/score.nf'
include { LONGEST } from './modules/longest_orf.nf'
include { GO } from './modules/go.nf'
include { SCORE_TREE } from './modules/score_tree.nf'
include { GO_SUMMARISE } from './modules/go_summarise.nf'


Channel
.fromPath(params.input)
.splitCsv()
Expand All @@ -66,74 +55,48 @@ Channel

Channel
.fromPath(params.layout)
.set { in_layout }
.set { in_layout }

Channel
.fromPath(params.hex)
.set { in_hex }
.set { in_hex }

Channel
.fromPath(params.input)
.splitCsv()
.branch {
ncbi: it.size() == 2
.branch {
ncbi: it.size() == 2
local: it.size() == 3
}
.set { input_type }


// input_type.ncbi.view { "$it is small" }
// input_type.local.view { "$it is large" }



workflow {

DOWNLOAD_NCBI ( input_type.ncbi )

GFFREAD ( DOWNLOAD_NCBI.out.genome.mix(input_type.local) )

JCVI ( GFFREAD.out.proteins )

SYNTENY ( JCVI.out.new_format.combine(JCVI.out.new_format).filter{ it[0] != it[3] } )

CHROMOPAINT ( in_hex , SYNTENY.out.anchors , JCVI.out.beds.collect() )

if (params.tree){

tree_in = Channel.fromPath(params.tree)

SCORE_TREE ( SYNTENY.out.anchors.collect() , SYNTENY.out.percsim.collect() , GFFREAD.out.gff.collect() , tree_in )
if (params.tree) {
tree_in = Channel.fromPath(params.tree)
SCORE_TREE ( SYNTENY.out.anchors.collect() , SYNTENY.out.percsim.collect() , GFFREAD.out.gff.collect() , tree_in )
}

else{

else {
SCORE ( SYNTENY.out.anchors.collect() , SYNTENY.out.percsim.collect() , GFFREAD.out.gff.collect() )

}

if (params.go){

go_datasets = Channel.fromPath(params.go)

if (params.tree){

GO ( go_datasets.collect() , SCORE_TREE.out.speciesSummary.flatten() , JCVI.out.beds.collect() )

}
else{

GO ( go_datasets.collect() , SCORE.out.speciesSummary.flatten() , JCVI.out.beds.collect() )

}

GO_SUMMARISE ( GO.out.go_table.collect() )

}

if (params.go) {
ch_go = params.tree != null ? SCORE_TREE.out.speciesSummary : SCORE.speciesSummary
ch_go.view()
GO ( go_datasets.collect() , ch_go.flatten(), JCVI.out.beds.collect() )
GO_SUMMARISE ( GO.out.go_table.collect() )
}
}

workflow.onComplete {
println ( workflow.success ? "\nDone! Check results in $params.outdir/ \n" : "Hmmm .. something went wrong\n" )
}

29 changes: 13 additions & 16 deletions modules/chromo.nf
Original file line number Diff line number Diff line change
@@ -1,28 +1,25 @@
process CHROMOPAINT {

label 'chromo'
tag "$anchors"
publishDir "$params.outdir/Chromosome_plots" , mode: "copy"
container = 'chriswyatt/jcvi'
errorStrategy = 'ignore'

container = 'ecoflowucl/jcvi:python-3.10_last-1522'

input:
path (hex)
each (anchors)
path ('*')

path (hex)
each (anchors)
path ('*')

output:

path("*.pdf"), emit: pdf
path("*.pdf"), emit: pdf

script:
"""
echo '${anchors}' | rev | cut -d'/' -f 1 | rev > Name
A="\$(cut -d'.' -f1 Name)"
B="\$(cut -d'.' -f2 Name)"
anchor.pl \$A.bed \$B.bed ${anchors}
python -m jcvi.graphics.chromosome Chromopaint.txt colour.idmap
mv Chromopaint.pdf "\$A.\$B.chromo.pdf"
echo '${anchors}' | rev | cut -d'/' -f 1 | rev > Name
A="\$(cut -d'.' -f1 Name)"
B="\$(cut -d'.' -f2 Name)"
anchor.pl \$A.bed \$B.bed ${anchors}
python -m jcvi.graphics.chromosome Chromopaint.txt colour.idmap
mv Chromopaint.pdf "\$A.\$B.chromo.pdf"
"""
}
42 changes: 0 additions & 42 deletions modules/default_config.nf

This file was deleted.

28 changes: 13 additions & 15 deletions modules/download_ncbi.nf
Original file line number Diff line number Diff line change
@@ -1,29 +1,27 @@
process DOWNLOAD_NCBI {

label 'download'
tag "$sample_id via $accension_id"
container = 'chriswyatt/ncbi_download'
errorStrategy = 'ignore'

container "${ params.architecture == 'arm' ? 'ecoflowucl/ncbi_download:v16.1.2-arm64' : 'ecoflowucl/ncbi_download:v16.1.2-amd64' }"

input:
tuple val(sample_id), val(accension_id)
tuple val(sample_id), val(accension_id)

output:
tuple val(sample_id), path("${sample_id}.genome.fna"), path("${sample_id}.genomic.gff"), emit: genome
output:
tuple val(sample_id), path("${sample_id}.genome.fna"), path("${sample_id}.genomic.gff"), emit: genome

script:
"""
#Get a genome and GFF assembly from NCBI using their datasets scripts
datasets download genome accession ${accension_id}
unzip ncbi_dataset.zip
datasets download genome accession ${accension_id} --include genome,gff3
unzip ncbi_dataset.zip
if ls ncbi_dataset/data/${accension_id}/chr*.fna 1> /dev/null 2>&1; then
if [ -f ncbi_dataset/data/${accension_id}/chr*.fna ]; then
cat ncbi_dataset/data/${accension_id}/chr*.fna > ${sample_id}.genome.fna
fi
if ls ncbi_dataset/data/${accension_id}/unplaced.scaf.fna 1> /dev/null 2>&1; then
cat ncbi_dataset/data/${accension_id}/unplaced.scaf.fna >> ${sample_id}.genome.fna
fi
if ls ncbi_dataset/data/${accension_id}/${accension_id}*_genomic.fna 1> /dev/null 2>&1; then
cat ncbi_dataset/data/${accension_id}/${accension_id}*_genomic.fna >> ${sample_id}.genome.fna
elif [ -f ncbi_dataset/data/${accension_id}/unplaced.scaf.fna ]; then
cat ncbi_dataset/data/${accension_id}/unplaced.scaf.fna >> ${sample_id}.genome.fna
elif [ -f ncbi_dataset/data/${accension_id}/${accension_id}*_genomic.fna ]; then
cat ncbi_dataset/data/${accension_id}/${accension_id}*_genomic.fna >> ${sample_id}.genome.fna
fi
cat ncbi_dataset/data/${accension_id}/genomic.gff > ${sample_id}.genomic.gff
Expand Down
18 changes: 7 additions & 11 deletions modules/gffread.nf
Original file line number Diff line number Diff line change
@@ -1,23 +1,19 @@
process GFFREAD {

label 'gffread'
tag "$sample_id"
container = 'chriswyatt/gffread_python3'
container = 'ecoflowucl/gffread_python:python-3.10_Linux_x86_64'
publishDir "$params.outdir/Gffread_results" , mode: "copy"

input:

tuple val(sample_id), path(fasta), path(gff)
input:
tuple val(sample_id), path(fasta), path(gff)

output:

tuple val(sample_id), path( "${sample_id}.nucl.fa" ), path( "${sample_id}.gff_for_jvci.gff3" ), emit: proteins
path( "${sample_id}.gff_for_jvci.gff3" ), emit: gff
tuple val(sample_id), path( "${sample_id}.nucl.fa" ), path( "${sample_id}.gff_for_jvci.gff3" ), emit: proteins
path( "${sample_id}.gff_for_jvci.gff3" ), emit: gff

script:
"""
gffread_unzip.pl ${sample_id} ${fasta} ${gff}
gffread_unzip.pl ${sample_id} ${fasta} ${gff}
"""
}



Loading

0 comments on commit 934e980

Please sign in to comment.