Merge pull request #142 from nf-core/STAR

Add STAR aligner
nf-core · Jul 17, 2024 · 9561941 · 9561941
2 parents 899ac94 + c47db7e
commit 9561941
Show file tree

Hide file tree

Showing 43 changed files with 3,555 additions and 68 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -1,85 +1,136 @@
-name: nf-core CI
 # This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors
+name: nf-core CI
 on:
-  push:
-    branches:
-      - dev
   pull_request:
   release:
     types: [published]
+  merge_group:
+    types:
+      - checks_requested
+    branches:
+      - master
+      - dev
 
 env:
   NXF_ANSI_LOG: false
+  NFT_VER: "0.8.4"
+  NFT_WORKDIR: "~"
+  NFT_DIFF: "pdiff"
+  NFT_DIFF_ARGS: "--line-numbers --expand-tabs=2"
 
 concurrency:
   group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}"
   cancel-in-progress: true
 
 jobs:
+  changes:
+    name: Check for changes
+    runs-on: ubuntu-latest
+    outputs:
+      nf_test_files: ${{ steps.list.outputs.components }}
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+
+      - name: List nf-test files
+        id: list
+        uses: adamrtalbot/[email protected]
+        with:
+          head: ${{ github.sha }}
+          base: origin/${{ github.base_ref }}
+          include: .github/include.yaml
+
+      - name: print list of nf-test files
+        run: |
+          echo ${{ steps.list.outputs.components }}
+
   test:
-    name: nf-test ${{ matrix.profile }}-${{ matrix.NXF_VER }}
-    # Only run on push if this is the nf-core dev branch (merged PRs)
-    if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/nascent') }}"
+    name: ${{ matrix.nf_test_files }} ${{ matrix.profile }} NF-${{ matrix.NXF_VER }}
+    needs: [changes]
+    if: needs.changes.outputs.nf_test_files != '[]'
     runs-on: ubuntu-latest
     strategy:
+      fail-fast: false
       matrix:
         NXF_VER:
-          - "23.04.0"
           - "latest-everything"
-        profile: ["docker"] # TODO , "singularity", "conda"]
+          - "23.04"
+        nf_test_files: ["${{ fromJson(needs.changes.outputs.nf_test_files) }}"]
+        profile:
+          - "docker"
+
     steps:
       - name: Check out pipeline code
-        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4
+        uses: actions/checkout@v4
+
+      - name: Install Nextflow
+        uses: nf-core/setup-nextflow@v2
+        with:
+          version: "${{ matrix.NXF_VER }}"
+
+      - uses: actions/setup-python@v4
+        with:
+          python-version: "3.11"
+          architecture: "x64"
 
-      - name: Cache Nextflow installation
+      - name: Install pdiff to see diff between nf-test snapshots
+        run: |
+          python -m pip install --upgrade pip
+          pip install pdiff
+
+      - name: Cache nf-test installation
         id: cache-software
         uses: actions/cache@v3
         with:
           path: |
             /usr/local/bin/nf-test
             /home/runner/.nf-test/nf-test.jar
-          key: nascent-${{ runner.os }}-${{ matrix.NXF_VER }}
-
-      - name: Install Nextflow
-        uses: nf-core/setup-nextflow@v1
-        with:
-          version: "${{ matrix.NXF_VER }}"
-
-      - name: Disk space cleanup
-        uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
+          key: ${{ runner.os }}-${{ env.NFT_VER }}-nftest
 
       - name: Install nf-test
         if: steps.cache-software.outputs.cache-hit != 'true'
         run: |
           wget -qO- https://code.askimed.com/install/nf-test | bash
           sudo mv nf-test /usr/local/bin/
 
-      - name: Set up Singularity
-        if: matrix.profile == 'singularity'
-        uses: eWaterCycle/setup-singularity@v5
-        with:
-          singularity-version: 3.7.1
-
-      - name: Set up miniconda
-        if: matrix.profile == 'conda'
-        uses: conda-incubator/setup-miniconda@v2
-        with:
-          auto-update-conda: true
-          channels: conda-forge,bioconda,defaults
-          python-version: ${{ matrix.python-version }}
-
-      - name: Conda clean
-        if: matrix.profile == 'conda'
-        run: conda clean -a
-
       - name: Run nf-test
         run: |
-          nf-test test \
-            --profile=${{ matrix.profile }} \
-            workflows/tests/*.nf.test \
-            --tap=test.tap
+          nf-test test --verbose ${{ matrix.nf_test_files }} --profile "+${{ matrix.profile }}" --junitxml=test.xml --tap=test.tap
 
       - uses: pcolby/tap-summary@v1
         with:
           path: >-
             test.tap
+
+      - name: Output log on failure
+        if: failure()
+        run: |
+          sudo apt install bat > /dev/null
+          batcat --decorations=always --color=always ${{ github.workspace }}/.nf-test/tests/*/meta/nextflow.log
+
+      - name: Publish Test Report
+        uses: mikepenz/action-junit-report@v3
+        if: always() # always run even if the previous step fails
+        with:
+          report_paths: test.xml
+
+  confirm-pass:
+    runs-on: ubuntu-latest
+    needs:
+      - changes
+      - test
+    if: always()
+    steps:
+      - name: All tests ok
+        if: ${{ !contains(needs.*.result, 'failure') }}
+        run: exit 0
+      - name: One or more tests failed
+        if: ${{ contains(needs.*.result, 'failure') }}
+        run: exit 1
+
+      - name: debug-print
+        if: always()
+        run: |
+          echo "toJSON(needs) = ${{ toJSON(needs) }}"
+          echo "toJSON(needs.*.result) = ${{ toJSON(needs.*.result) }}"
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,11 +9,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - [5bcfe4f](https://github.com/nf-core/nascent/commit/5bcfe4ff1729b89e9e5741c473d32168b836a57f) - Update pipeline template to [nf-core/tools 2.13](https://github.com/nf-core/tools/releases/tag/2.13)
 - [a3bc907](https://github.com/nf-core/nascent/commit/a3bc907e9afd9dd2a9572798fa16fbc781c3dcb0) - Update pipeline template to [nf-core/tools 2.13.1](https://github.com/nf-core/tools/releases/tag/2.13.1)
-- [#140](https://github.com/nf-core/nascent/pull/140) - Add HISAT2
+- [#140](https://github.com/nf-core/nascent/pull/140) - Add HISAT2 aligner
+- [#142](https://github.com/nf-core/nascent/pull/142) - Add STAR aligner
 
 ### Changed
 
-- [[#137](https://github.com/nf-core/nascent/pull/137)] - Use singularity containers for PINTS
+- [#137](https://github.com/nf-core/nascent/pull/137) - Use singularity containers for PINTS
+- [#142](https://github.com/nf-core/nascent/pull/142) - Updated CHM13 references
 
 ## v2.2.0 - 2024-03-05
 

diff --git a/conf/igenomes.config b/conf/igenomes.config
@@ -37,11 +37,8 @@ params {
             blacklist   = "${projectDir}/assets/blacklists/hg38-blacklist.bed"
         }
         'CHM13' {
-            fasta       = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa"
-            bwa         = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAIndex/"
-            bwamem2     = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAmem2Index/"
-            gtf         = "${params.igenomes_base}/Homo_sapiens/NCBI/CHM13/Annotation/Genes/genes.gtf"
-            gff         = "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/914/755/GCF_009914755.1_T2T-CHM13v2.0/GCF_009914755.1_T2T-CHM13v2.0_genomic.gff.gz"
+            fasta       = "${params.human-pangenomics_base}/T2T/CHM13/assemblies/analysis_set/chm13v2.0.fa.gz"
+            gff         = "${params.human-pangenomics_base}/T2T/CHM13/assemblies/annotation/chm13v2.0_RefSeq_Liftoff_v5.1.gff3.gz"
             mito_name   = "chrM"
         }
         'GRCm38' {

diff --git a/conf/modules.config b/conf/modules.config
@@ -80,6 +80,59 @@ process {
         ]
     }
 
+    withName: 'STAR_ALIGN' {
+        ext.args   = {
+            // Function to convert argument strings into a map
+            def argsToMap = { String args ->
+                args.split("\\s(?=--)").collectEntries {
+                    def parts = it.trim().split(/\s+/, 2)
+                    [(parts.first()): parts.last()]
+                }
+            }
+
+            // Initialize the map with preconfigured values
+            def preset_args_map = argsToMap("""
+                                            --quantMode TranscriptomeSAM
+                                            --twopassMode Basic
+                                            --outSAMtype BAM Unsorted
+                                            --readFilesCommand zcat
+                                            --runRNGseed 0
+                                            --outFilterMultimapNmax 20
+                                            --alignSJDBoverhangMin 1
+                                            --outSAMattributes NH HI AS NM MD
+                                            --quantTranscriptomeBan Singleend
+                                            --outSAMstrandField intronMotif
+                                            ${params.save_unaligned ? '--outReadsUnmapped Fastx' : ''}
+                                            """.trim())
+
+            // Consolidate the extra arguments
+            def final_args_map = preset_args_map + (params.extra_star_align_args ? argsToMap(params.extra_star_align_args) : [:])
+
+            // Convert the map back to a list and then to a single string
+            final_args_map.collect { key, value -> "${key} ${value}" }.join(' ').trim()
+        }
+
+        publishDir = [
+            [
+                path: { "${params.outdir}/${params.aligner}/log" },
+                mode: params.publish_dir_mode,
+                pattern: '*.{out,tab}'
+            ],
+            [
+                path: { params.save_align_intermeds ? "${params.outdir}/${params.aligner}" : params.outdir },
+                mode: params.publish_dir_mode,
+                pattern: '*.bam',
+                saveAs: { params.save_align_intermeds ? it : null }
+            ],
+            [
+                path: { params.save_unaligned ? "${params.outdir}/${params.aligner}/unmapped" : params.outdir },
+                mode: params.publish_dir_mode,
+                pattern: '*.fastq.gz',
+                saveAs: { params.save_unaligned ? it : null }
+            ]
+        ]
+    }
+
     if(params.with_umi) {
         withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS:UMITOOLS_DEDUP' {
             ext.args = { [

diff --git a/conf/test.config b/conf/test.config
@@ -35,6 +35,10 @@ params {
 }
 
 process {
+    withName: STAR_GENOMEGENERATE_IGENOMES {
+        ext.args = '--genomeSAindexNbases 9'
+    }
+
     withName: 'PINTS_CALLER' {
         // HACK Tests fail after latest modules update
         ext.args = { "--disable-small" }

diff --git a/docs/usage.md b/docs/usage.md
@@ -50,6 +50,12 @@ TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz,
 
 An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline.
 
+:::info
+The sample column is essentially a concatenation of the group and replicate columns. If all values of sample have the same number of underscores, fields defined by these underscore-separated names may be used in the transcript identification produced by the pipeline, to regain the ability to represent different groupings.
+
+`GM_0h` and `GM_1h` would be grouped for example but `GM0h` and `GM1h` would go through individual transcript identification
+:::
+
 ## Alignment Options
 
 By default, the pipeline uses [BWA](https://bio-bwa.sourceforge.net/) (i.e. `--aligner bwa`) to map the raw FastQ reads to the reference genome. Research as to which aligner works best with Nascent Transcript and Transcription Start Site assays is pending.

diff --git a/main.nf b/main.nf
@@ -38,6 +38,7 @@ params.bwamem2_index = getGenomeAttribute('bwamem2')
 params.dragmap = getGenomeAttribute('dragmap')
 params.bowtie2_index = getGenomeAttribute('bowtie2')
 params.hisat2_index = getGenomeAttribute('hisat2')
+params.star_index = null
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -69,6 +70,7 @@ workflow NFCORE_NASCENT {
         params.dragmap,
         params.bowtie2_index,
         params.hisat2_index,
+        params.star_index,
     )
 
     emit:

diff --git a/modules.json b/modules.json
@@ -202,6 +202,16 @@
                         "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62",
                         "installed_by": ["bam_stats_samtools"]
                     },
+                    "star/align": {
+                        "branch": "master",
+                        "git_sha": "a21faa6a3481af92a343a10926f59c189a2c16c9",
+                        "installed_by": ["fastq_align_star", "modules"]
+                    },
+                    "star/genomegenerate": {
+                        "branch": "master",
+                        "git_sha": "a21faa6a3481af92a343a10926f59c189a2c16c9",
+                        "installed_by": ["modules"]
+                    },
                     "subread/featurecounts": {
                         "branch": "master",
                         "git_sha": "f6bba1a67cdbb605f24d7a4e8dd383b0eec45b52",
@@ -229,7 +239,12 @@
                     "bam_sort_stats_samtools": {
                         "branch": "master",
                         "git_sha": "4352dbdb09ec40db71e9b172b97a01dcf5622c26",
-                        "installed_by": ["fastq_align_bowtie2", "fastq_align_bwa", "fastq_align_hisat2"]
+                        "installed_by": [
+                            "fastq_align_bowtie2",
+                            "fastq_align_bwa",
+                            "fastq_align_hisat2",
+                            "fastq_align_star"
+                        ]
                     },
                     "bam_stats_samtools": {
                         "branch": "master",
@@ -251,6 +266,11 @@
                         "git_sha": "701ae347c4508fba1b7d65262596f278b6a11cb6",
                         "installed_by": ["subworkflows"]
                     },
+                    "fastq_align_star": {
+                        "branch": "master",
+                        "git_sha": "1d1d7df613ff53223259c14185858cd742cd4743",
+                        "installed_by": ["subworkflows"]
+                    },
                     "homer/groseq": {
                         "branch": "master",
                         "git_sha": "575e1bc54b083fb15e7dd8b5fcc40bea60e8ce83",

diff --git a/modules/local/grohmm/parametertuning/main.nf b/modules/local/grohmm/parametertuning/main.nf
@@ -9,7 +9,7 @@ process GROHMM_PARAMETERTUNING {
         'quay.io/biocontainers/mulled-v2-e9a6cb7894dd2753aff7d9446ea95c962cce8c46:0a46dae3241b1c4f02e46468f5d54eadcf64beca-0' }"
 
     input:
-    tuple val(meta), path(bam)
+    tuple val(meta), path(bams), path(bais)
     path gtf
     path tune_parameter_file
 

diff --git a/modules/local/grohmm/transcriptcalling/main.nf b/modules/local/grohmm/transcriptcalling/main.nf
@@ -9,7 +9,7 @@ process GROHMM_TRANSCRIPTCALLING {
         'quay.io/biocontainers/mulled-v2-e9a6cb7894dd2753aff7d9446ea95c962cce8c46:0a46dae3241b1c4f02e46468f5d54eadcf64beca-0' }"
 
     input:
-    tuple val(meta), path(bams)
+    tuple val(meta), path(bams), path(bais)
     path gtf
     path tuning_file
 

diff --git a/modules/nf-core/homer/maketagdirectory/main.nf b/modules/nf-core/homer/maketagdirectory/main.nf
diff --git a/modules/nf-core/pints/caller/main.nf b/modules/nf-core/pints/caller/main.nf