Merge pull request #13 from Eco-Flow/simon-dev

Merge major overhaul to main branch
Eco-Flow · Jan 10, 2024 · 934e980 · 934e980
2 parents e8e6143 + c50e781
commit 934e980
Show file tree

Hide file tree

Showing 21 changed files with 142 additions and 259 deletions.
diff --git a/.gitignore b/.gitignore
@@ -9,3 +9,4 @@ nextflow
 Go/
 *fna
 *.gff
+reports/
diff --git a/.gitpod.yml b/.gitpod.yml
@@ -1,20 +1,3 @@
-github:
-  prebuilds:
-    # enable for the master/default branch (defaults to true)
-    master: true
-    # enable for all branches in this repo (defaults to false)
-    branches: true
-    # enable for pull requests coming from this repo (defaults to true)
-    pullRequests: true
-    # enable for pull requests coming from forks (defaults to false)
-    pullRequestsFromForks: true
-    # add a "Review in Gitpod" button as a comment to pull requests (defaults to true)
-    addComment: true
-    # add a "Review in Gitpod" button to pull requests (defaults to false)
-    addBadge: false
-    # add a label once the prebuild is ready to pull requests (defaults to false)
-    addLabel: prebuilt-in-gitpod
-
 # List the start up tasks. Learn more https://www.gitpod.io/docs/config-start-tasks/
 tasks:    
   - name: Download Nextflow Tutorial

diff --git a/bin/Trans_location_Inversion_score.pl b/bin/Trans_location_Inversion_score.pl
@@ -265,4 +265,3 @@
 }
 
 
-`plotting-inversions.R > R_output.txt`
diff --git a/bin/Trans_location_Inversion_score_treeSort.pl b/bin/Trans_location_Inversion_score_treeSort.pl
@@ -265,4 +265,3 @@
 }
 
 
-`plotting-inversions-treeSort.R > R_output.txt`
diff --git a/bin/plotting-inversions-treeSort.R b/bin/plotting-inversions-treeSort.R
@@ -1,5 +1,3 @@
-#!/opt/conda/bin/Rscript --vanilla
-
 # import data
 Bee_inver_trans_prot <- read.delim("Trans_location_version.out.txt", stringsAsFactors=FALSE)
 

diff --git a/bin/plotting-inversions.R b/bin/plotting-inversions.R
@@ -1,5 +1,3 @@
-#!/opt/conda/bin/Rscript --vanilla
-
 # import data
 Bee_inver_trans_prot <- read.delim("Trans_location_version.out.txt", stringsAsFactors=FALSE)
 

diff --git a/bin/plotting-synteny_go.R b/bin/plotting-synteny_go.R
@@ -1,5 +1,3 @@
-#!/opt/conda/bin/Rscript --vanilla
-
 library(pheatmap)
 
 erefd<-read.table("Go_summary_topSynteny.tsv", h=T, sep="\t")

diff --git a/conf/docker.config b/conf/docker.config
@@ -1,44 +1,31 @@
 docker.runOptions='-u $(id -u):$(id -g)'
 
 process {
+
+  errorStrategy = 'retry'
+  maxRetries    = 5
+
   withLabel: 'jcvi' {
-    container = 'chriswyatt/jcvi'
     cpus = 1
     echo = true
   }
 
   withLabel: 'chromo' {
-    container = 'chriswyatt/jcvi'
     cpus = 1
     echo = true
   }
 
   withLabel: 'gffread' {
-    container = 'chriswyatt/gffread_python3'
     cpus = 1
     echo = true
   }
 
   withLabel: 'syn' {
-    container = 'chriswyatt/jcvi'
-    cpus = 1
-    echo = true
-  }
-
-  withLabel: 'config' {
-    container = 'chriswyatt/jcvi'
-    cpus = 1
-    echo = true
-  }
-
-  withLabel: 'macro' {
-    container = 'chriswyatt/jcvi'
     cpus = 1
     echo = true
   }
 
   withLabel: 'download' {
-    container = 'chriswyatt/ncbi_datasets:version3.0'
     cpus = 1
     echo = true
   }

diff --git a/main.nf b/main.nf
@@ -4,22 +4,14 @@
 
 /*
  * Authors:
- * - Chris Wyatt <[email protected]>
+ * - Chris Wyatt <[email protected]>
+ * - Simon Murray <[email protected]>
  */
 
 /*
  * Default pipeline parameters (on test data). They can be overriden on the command line eg.
  * given `params.name` specify on the run command line `--name My_run_v1`.
  */
-
-params.outdir = "Results"
-params.input = "data/Example.csv"
-params.seqids = "./data/default1"
-params.layout = "./data/default2"
-params.hex = "data/unique_hex2"
-params.go = null
-params.test=0
-params.tree= false
 
 log.info """\
  ===================================
@@ -40,15 +32,12 @@ include { GFFREAD } from './modules/gffread.nf'
 include { JCVI } from './modules/jcvi.nf'
 include { SYNTENY } from './modules/synteny.nf'
 include { DOWNLOAD_NCBI } from './modules/download_ncbi.nf'
-include { DOWNLOAD_NCBI as DOWNLOAD_NCBI2 } from './modules/download_ncbi.nf'
 include { CHROMOPAINT } from './modules/chromo.nf'
 include { SCORE } from './modules/score.nf'
-include { LONGEST } from './modules/longest_orf.nf'
 include { GO } from './modules/go.nf'
 include { SCORE_TREE } from './modules/score_tree.nf'
 include { GO_SUMMARISE } from './modules/go_summarise.nf'
 
-
 Channel
     .fromPath(params.input)
     .splitCsv()
@@ -66,74 +55,48 @@ Channel
 
 Channel
     .fromPath(params.layout)
-    .set { in_layout }   
+    .set { in_layout }
 
 Channel
     .fromPath(params.hex)
-    .set { in_hex } 
+    .set { in_hex }
 
 Channel
     .fromPath(params.input)
     .splitCsv()
-    .branch { 
-        ncbi: it.size() == 2 
+    .branch {
+        ncbi: it.size() == 2
         local: it.size() == 3
     }
     .set { input_type }
 
-
-// input_type.ncbi.view { "$it is small" }
-// input_type.local.view { "$it is large" }
-
-
-
 workflow {
 
     DOWNLOAD_NCBI ( input_type.ncbi )
 
     GFFREAD ( DOWNLOAD_NCBI.out.genome.mix(input_type.local) )
-    
+
     JCVI ( GFFREAD.out.proteins )
 
     SYNTENY ( JCVI.out.new_format.combine(JCVI.out.new_format).filter{ it[0] != it[3] } )
 
     CHROMOPAINT ( in_hex , SYNTENY.out.anchors , JCVI.out.beds.collect() )
 
-    if (params.tree){
-
-	tree_in = Channel.fromPath(params.tree)
-
-	SCORE_TREE ( SYNTENY.out.anchors.collect() , SYNTENY.out.percsim.collect() , GFFREAD.out.gff.collect() , tree_in )
+    if (params.tree) {
+        tree_in = Channel.fromPath(params.tree)
+        SCORE_TREE ( SYNTENY.out.anchors.collect() , SYNTENY.out.percsim.collect() , GFFREAD.out.gff.collect() , tree_in )
     }
-
-    else{
-
+    else {
         SCORE ( SYNTENY.out.anchors.collect() , SYNTENY.out.percsim.collect() , GFFREAD.out.gff.collect() )
-
-    }  
-
-    if (params.go){
-
-	go_datasets = Channel.fromPath(params.go)
-
-    	if (params.tree){
-
-		GO ( go_datasets.collect() , SCORE_TREE.out.speciesSummary.flatten() , JCVI.out.beds.collect() )
-
-    	}
-    	else{
-
-        	GO ( go_datasets.collect() , SCORE.out.speciesSummary.flatten() , JCVI.out.beds.collect() )
-
-    	}
-
-	GO_SUMMARISE ( GO.out.go_table.collect() )
-
     }
-
+    if (params.go) {
+        ch_go = params.tree != null ? SCORE_TREE.out.speciesSummary : SCORE.speciesSummary
+        ch_go.view()
+        GO ( go_datasets.collect() , ch_go.flatten(), JCVI.out.beds.collect() )
+        GO_SUMMARISE ( GO.out.go_table.collect() )
+    }
 }
 
 workflow.onComplete {
     println ( workflow.success ? "\nDone! Check results in $params.outdir/ \n" : "Hmmm .. something went wrong\n" )
 }
-
diff --git a/modules/chromo.nf b/modules/chromo.nf
@@ -1,28 +1,25 @@
 process CHROMOPAINT {
+
     label 'chromo'
     tag "$anchors"
     publishDir "$params.outdir/Chromosome_plots" , mode: "copy"
-    container = 'chriswyatt/jcvi'
-    errorStrategy = 'ignore'
-
+    container = 'ecoflowucl/jcvi:python-3.10_last-1522'
+
     input:
+    path (hex)
+    each (anchors)
+    path ('*')
 
-	path (hex)
-        each (anchors)
-        path ('*')
-
     output:
-
-        path("*.pdf"), emit: pdf
+    path("*.pdf"), emit: pdf
 
     script:
     """
-        echo '${anchors}' | rev | cut -d'/' -f 1 | rev > Name
-
-        A="\$(cut -d'.' -f1 Name)"
-        B="\$(cut -d'.' -f2 Name)"
-        anchor.pl \$A.bed \$B.bed ${anchors} 
-        python -m jcvi.graphics.chromosome Chromopaint.txt colour.idmap
-        mv Chromopaint.pdf "\$A.\$B.chromo.pdf"
+    echo '${anchors}' | rev | cut -d'/' -f 1 | rev > Name
+    A="\$(cut -d'.' -f1 Name)"
+    B="\$(cut -d'.' -f2 Name)"
+    anchor.pl \$A.bed \$B.bed ${anchors}
+    python -m jcvi.graphics.chromosome Chromopaint.txt colour.idmap
+    mv Chromopaint.pdf "\$A.\$B.chromo.pdf"
     """
 }
diff --git a/modules/default_config.nf b/modules/default_config.nf
diff --git a/modules/download_ncbi.nf b/modules/download_ncbi.nf
@@ -1,29 +1,27 @@
 process DOWNLOAD_NCBI {
+
     label 'download'
     tag "$sample_id via $accension_id"
-    container = 'chriswyatt/ncbi_download'
-    errorStrategy = 'ignore'
-
+    container "${ params.architecture == 'arm' ? 'ecoflowucl/ncbi_download:v16.1.2-arm64' : 'ecoflowucl/ncbi_download:v16.1.2-amd64' }"
+
     input:
-        tuple val(sample_id), val(accension_id)
+    tuple val(sample_id), val(accension_id)
 
-    output:    
-        tuple val(sample_id), path("${sample_id}.genome.fna"), path("${sample_id}.genomic.gff"), emit: genome
+    output:
+    tuple val(sample_id), path("${sample_id}.genome.fna"), path("${sample_id}.genomic.gff"), emit: genome
 
     script:
     """
     #Get a genome and GFF assembly from NCBI using their datasets scripts
-    datasets download genome accession ${accension_id}
-    unzip ncbi_dataset.zip 
+    datasets download genome accession ${accension_id} --include genome,gff3
+    unzip ncbi_dataset.zip
     
-    if ls ncbi_dataset/data/${accension_id}/chr*.fna 1> /dev/null 2>&1; then
+    if [ -f ncbi_dataset/data/${accension_id}/chr*.fna ]; then
         cat ncbi_dataset/data/${accension_id}/chr*.fna > ${sample_id}.genome.fna
-    fi
-    if ls ncbi_dataset/data/${accension_id}/unplaced.scaf.fna 1> /dev/null 2>&1; then
-        cat ncbi_dataset/data/${accension_id}/unplaced.scaf.fna >> ${sample_id}.genome.fna 
-    fi
-    if ls ncbi_dataset/data/${accension_id}/${accension_id}*_genomic.fna 1> /dev/null 2>&1; then
-        cat ncbi_dataset/data/${accension_id}/${accension_id}*_genomic.fna >> ${sample_id}.genome.fna 
+    elif [ -f ncbi_dataset/data/${accension_id}/unplaced.scaf.fna ]; then
+        cat ncbi_dataset/data/${accension_id}/unplaced.scaf.fna >> ${sample_id}.genome.fna
+    elif [ -f ncbi_dataset/data/${accension_id}/${accension_id}*_genomic.fna ]; then
+        cat ncbi_dataset/data/${accension_id}/${accension_id}*_genomic.fna >> ${sample_id}.genome.fna
     fi
     
     cat ncbi_dataset/data/${accension_id}/genomic.gff > ${sample_id}.genomic.gff

diff --git a/modules/gffread.nf b/modules/gffread.nf
@@ -1,23 +1,19 @@
 process GFFREAD {
+
     label 'gffread'
     tag "$sample_id"
-    container = 'chriswyatt/gffread_python3'
+    container = 'ecoflowucl/gffread_python:python-3.10_Linux_x86_64'
     publishDir "$params.outdir/Gffread_results" , mode: "copy"
-
-    input:
 
-        tuple val(sample_id), path(fasta), path(gff)
+    input:
+    tuple val(sample_id), path(fasta), path(gff)
 
     output:
-
-        tuple val(sample_id), path( "${sample_id}.nucl.fa" ), path( "${sample_id}.gff_for_jvci.gff3" ), emit: proteins
-	    path( "${sample_id}.gff_for_jvci.gff3" ), emit: gff
+    tuple val(sample_id), path( "${sample_id}.nucl.fa" ), path( "${sample_id}.gff_for_jvci.gff3" ), emit: proteins
+    path( "${sample_id}.gff_for_jvci.gff3" ), emit: gff
 
     script:
     """
-	    gffread_unzip.pl ${sample_id} ${fasta} ${gff}	
+    gffread_unzip.pl ${sample_id} ${fasta} ${gff}
     """
 }
-
-
-
-Original file line number
+Diff line change
@@ Expand Up / @@ -9,3 +9,4 @@ nextflow @@
     Go/
     *fna
     *.gff
+    reports/
Original file line number	Diff line number	Diff line change
Expand Up		@@ -265,4 +265,3 @@
		}


		`plotting-inversions.R > R_output.txt`
Original file line number	Diff line number	Diff line change
Expand Up		@@ -265,4 +265,3 @@
		}


		`plotting-inversions-treeSort.R > R_output.txt`