diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 00000000..b78de6e6 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,24 @@ +root = true + +[*] +charset = utf-8 +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true +indent_size = 4 +indent_style = space + +[*.{md,yml,yaml,html,css,scss,js,cff}] +indent_size = 2 + +# These files are edited and tested upstream in nf-core/modules +[/modules/nf-core/**] +charset = unset +end_of_line = unset +insert_final_newline = unset +trim_trailing_whitespace = unset +indent_style = unset +indent_size = unset + +[/assets/email*] +indent_size = unset diff --git a/.gitattributes b/.gitattributes index 7fe55006..050bb120 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1,3 @@ *.config linguist-language=nextflow +modules/nf-core/** linguist-generated +subworkflows/nf-core/** linguist-generated diff --git a/.github/.dockstore.yml b/.github/.dockstore.yml new file mode 100644 index 00000000..191fabd2 --- /dev/null +++ b/.github/.dockstore.yml @@ -0,0 +1,6 @@ +# Dockstore config version, not pipeline version +version: 1.2 +workflows: + - subclass: nfl + primaryDescriptorPath: /nextflow.config + publish: True diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index deb92b74..74b68e43 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -1,45 +1,103 @@ # nf-core/nascent: Contributing Guidelines -Hi there! Many thanks for taking an interest in improving nf-core/nascent. +Hi there! +Many thanks for taking an interest in improving nf-core/nascent. -We try to manage the required tasks for nf-core/nascent using GitHub issues, you probably came to this page when creating one. Please use the pre-filled template to save time. +We try to manage the required tasks for nf-core/nascent using GitHub issues, you probably came to this page when creating one. +Please use the pre-filled template to save time. -However, don't be put off by this template - other more general issues and suggestions are welcome! Contributions to the code are even more welcome ;) +However, don't be put off by this template - other more general issues and suggestions are welcome! +Contributions to the code are even more welcome ;) -> If you need help using or modifying nf-core/nascent then the best place to go is the Gitter chatroom where you can ask us questions directly: https://gitter.im/nf-core/Lobby +> If you need help using or modifying nf-core/nascent then the best place to ask is on the nf-core Slack [#nascent](https://nfcore.slack.com/channels/nascent) channel ([join our Slack here](https://nf-co.re/join/slack)). ## Contribution workflow -If you'd like to write some code for nf-core/nascent, the standard workflow -is as follows: -1. Check that there isn't already an issue about your idea in the - [nf-core/nascent issues](https://github.com/nf-core/nascent/issues) to avoid - duplicating work. - * If there isn't one already, please create one so that others know you're working on this -2. Fork the [nf-core/nascent repository](https://github.com/nf-core/nascent) to your GitHub account -3. Make the necessary changes / additions within your forked repository -4. Submit a Pull Request against the `dev` branch and wait for the code to be reviewed and merged. +If you'd like to write some code for nf-core/nascent, the standard workflow is as follows: -If you're not used to this workflow with git, you can start with some [basic docs from GitHub](https://help.github.com/articles/fork-a-repo/) or even their [excellent interactive tutorial](https://try.github.io/). +1. Check that there isn't already an issue about your idea in the [nf-core/nascent issues](https://github.com/nf-core/nascent/issues) to avoid duplicating work. If there isn't one already, please create one so that others know you're working on this +2. [Fork](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) the [nf-core/nascent repository](https://github.com/nf-core/nascent) to your GitHub account +3. Make the necessary changes / additions within your forked repository following [Pipeline conventions](#pipeline-contribution-conventions) +4. Use `nf-core schema build` and add any new parameters to the pipeline JSON schema (requires [nf-core tools](https://github.com/nf-core/tools) >= 1.10). +5. Submit a Pull Request against the `dev` branch and wait for the code to be reviewed and merged +If you're not used to this workflow with git, you can start with some [docs from GitHub](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests) or even their [excellent `git` resources](https://try.github.io/). ## Tests -When you create a pull request with changes, [Travis CI](https://travis-ci.org/) will run automatic tests. + +When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. There are typically two types of tests that run: -### Lint Tests -The nf-core has a [set of guidelines](http://nf-co.re/guidelines) which all pipelines must adhere to. +### Lint tests + +`nf-core` has a [set of guidelines](https://nf-co.re/developers/guidelines) which all pipelines must adhere to. To enforce these and ensure that all pipelines stay in sync, we have developed a helper tool which runs checks on the pipeline code. This is in the [nf-core/tools repository](https://github.com/nf-core/tools) and once installed can be run locally with the `nf-core lint ` command. If any failures or warnings are encountered, please follow the listed URL for more documentation. -### Pipeline Tests -Each nf-core pipeline should be set up with a minimal set of test-data. -Travis CI then runs the pipeline on this data to ensure that it exists successfully. +### Pipeline tests + +Each `nf-core` pipeline should be set up with a minimal set of test-data. +`GitHub Actions` then runs the pipeline on this data to ensure that it exits successfully. If there are any failures then the automated tests fail. -These tests are run both with the latest available version of Nextflow and also the minimum required version that is stated in the pipeline code. +These tests are run both with the latest available version of `Nextflow` and also the minimum required version that is stated in the pipeline code. + +## Patch + +:warning: Only in the unlikely and regretful event of a release happening with a bug. + +- On your own fork, make a new branch `patch` based on `upstream/master`. +- Fix the bug, and bump version (X.Y.Z+1). +- A PR should be made on `master` from patch to directly this particular bug. ## Getting help -For further information/help, please consult the [nf-core/nascent documentation](https://github.com/nf-core/nascent#documentation) and don't hesitate to get in touch on the pipeline channel on [Slack](https://nf-core-invite.herokuapp.com/). + +For further information/help, please consult the [nf-core/nascent documentation](https://nf-co.re/nascent/usage) and don't hesitate to get in touch on the nf-core Slack [#nascent](https://nfcore.slack.com/channels/nascent) channel ([join our Slack here](https://nf-co.re/join/slack)). + +## Pipeline contribution conventions + +To make the nf-core/nascent code and processing logic more understandable for new contributors and to ensure quality, we semi-standardise the way the code and other contributions are written. + +### Adding a new step + +If you wish to contribute a new step, please use the following coding standards: + +1. Define the corresponding input channel into your new process from the expected previous process channel +2. Write the process block (see below). +3. Define the output channel if needed (see below). +4. Add any new parameters to `nextflow.config` with a default (see below). +5. Add any new parameters to `nextflow_schema.json` with help text (via the `nf-core schema build` tool). +6. Add sanity checks and validation for all relevant parameters. +7. Perform local tests to validate that the new code works as expected. +8. If applicable, add a new test command in `.github/workflow/ci.yml`. +9. Update MultiQC config `assets/multiqc_config.yml` so relevant suffixes, file name clean up and module plots are in the appropriate order. If applicable, add a [MultiQC](https://https://multiqc.info/) module. +10. Add a description of the output files and if relevant any appropriate images from the MultiQC report to `docs/output.md`. + +### Default values + +Parameters should be initialised / defined with default values in `nextflow.config` under the `params` scope. + +Once there, use `nf-core schema build` to add to `nextflow_schema.json`. + +### Default processes resource requirements + +Sensible defaults for process resource requirements (CPUs / memory / time) for a process should be defined in `conf/base.config`. These should generally be specified generic with `withLabel:` selectors so they can be shared across multiple processes/steps of the pipeline. A nf-core standard set of labels that should be followed where possible can be seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/master/nf_core/pipeline-template/conf/base.config), which has the default process as a single core-process, and then different levels of multi-core configurations for increasingly large memory requirements defined with standardised labels. + +The process resources can be passed on to the tool dynamically within the process with the `${task.cpu}` and `${task.memory}` variables in the `script:` block. + +### Naming schemes + +Please use the following naming schemes, to make it easy to understand what is going where. + +- initial process channel: `ch_output_from_` +- intermediate and terminal channels: `ch__for_` + +### Nextflow version bumping + +If you are using a new feature from core Nextflow, you may bump the minimum required version of nextflow in the pipeline with: `nf-core bump-version --nextflow . [min-nf-version]` + +### Images and figures + +For overview images and other documents we follow the nf-core [style guidelines and examples](https://nf-co.re/developers/design_guidelines). diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md deleted file mode 100644 index 0bdb2be9..00000000 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ /dev/null @@ -1,31 +0,0 @@ -Hi there! - -Thanks for telling us about a problem with the pipeline. Please delete this text and anything that's not relevant from the template below: - -#### Describe the bug -A clear and concise description of what the bug is. - -#### Steps to reproduce -Steps to reproduce the behaviour: -1. Command line: `nextflow run ...` -2. See error: _Please provide your error message_ - -#### Expected behaviour -A clear and concise description of what you expected to happen. - -#### System: - - Hardware: [e.g. HPC, Desktop, Cloud...] - - Executor: [e.g. slurm, local, awsbatch...] - - OS: [e.g. CentOS Linux, macOS, Linux Mint...] - - Version [e.g. 7, 10.13.6, 18.3...] - -#### Nextflow Installation: - - Version: [e.g. 0.31.0] - -#### Container engine: - - Engine: [e.g. Conda, Docker or Singularity] - - version: [e.g. 1.0.0] - - Image tag: [e.g. nfcore/nascent:1.0.0] - -#### Additional context -Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 00000000..1f8fb6be --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,50 @@ +name: Bug report +description: Report something that is broken or incorrect +labels: bug +body: + - type: markdown + attributes: + value: | + Before you post this issue, please check the documentation: + + - [nf-core website: troubleshooting](https://nf-co.re/usage/troubleshooting) + - [nf-core/nascent pipeline documentation](https://nf-co.re/nascent/usage) + + - type: textarea + id: description + attributes: + label: Description of the bug + description: A clear and concise description of what the bug is. + validations: + required: true + + - type: textarea + id: command_used + attributes: + label: Command used and terminal output + description: Steps to reproduce the behaviour. Please paste the command you used to launch the pipeline and the output from your terminal. + render: console + placeholder: | + $ nextflow run ... + + Some output where something broke + + - type: textarea + id: files + attributes: + label: Relevant files + description: | + Please drag and drop the relevant files here. Create a `.zip` archive if the extension is not allowed. + Your verbose log file `.nextflow.log` is often useful _(this is a hidden file in the directory where you launched the pipeline)_ as well as custom Nextflow configuration files. + + - type: textarea + id: system + attributes: + label: System information + description: | + * Nextflow version _(eg. 21.10.3)_ + * Hardware _(eg. HPC, Desktop, Cloud)_ + * Executor _(eg. slurm, local, awsbatch)_ + * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter or Charliecloud)_ + * OS _(eg. CentOS Linux, macOS, Linux Mint)_ + * Version of nf-core/nascent _(eg. 1.1, 1.5, 1.8.2)_ diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 00000000..4c196c95 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,7 @@ +contact_links: + - name: Join nf-core + url: https://nf-co.re/join + about: Please join the nf-core community here + - name: "Slack #nascent channel" + url: https://nfcore.slack.com/channels/nascent + about: Discussion about the nf-core/nascent pipeline diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md deleted file mode 100644 index 1f025b77..00000000 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ /dev/null @@ -1,16 +0,0 @@ -Hi there! - -Thanks for suggesting a new feature for the pipeline! Please delete this text and anything that's not relevant from the template below: - -#### Is your feature request related to a problem? Please describe. -A clear and concise description of what the problem is. -Ex. I'm always frustrated when [...] - -#### Describe the solution you'd like -A clear and concise description of what you want to happen. - -#### Describe alternatives you've considered -A clear and concise description of any alternative solutions or features you've considered. - -#### Additional context -Add any other context about the feature request here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml new file mode 100644 index 00000000..eb002195 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -0,0 +1,11 @@ +name: Feature request +description: Suggest an idea for the nf-core/nascent pipeline +labels: enhancement +body: + - type: textarea + id: description + attributes: + label: Description of feature + description: Please describe your suggestion for a new feature. It might help to describe a problem or use case, plus any alternatives that you have considered. + validations: + required: true diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 621f47ba..e29a2f49 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,15 +1,24 @@ -Many thanks to contributing to nf-core/nascent! + ## PR checklist - - [ ] This comment contains a description of changes (with reason) - - [ ] If you've fixed a bug or added code that should be tested, add tests! - - [ ] If necessary, also make a PR on the [nf-core/nascent branch on the nf-core/test-datasets repo]( https://github.com/nf-core/test-datasets/pull/new/nf-core/nascent) - - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker`). - - [ ] Make sure your code lints (`nf-core lint .`). - - [ ] Documentation in `docs` is updated - - [ ] `CHANGELOG.md` is updated - - [ ] `README.md` is updated - -**Learn more about contributing:** https://github.com/nf-core/nascent/tree/master/.github/CONTRIBUTING.md + +- [ ] This comment contains a description of changes (with reason). +- [ ] If you've fixed a bug or added code that should be tested, add tests! +- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/nascent/tree/master/.github/CONTRIBUTING.md)- [ ] If necessary, also make a PR on the nf-core/nascent _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. +- [ ] Make sure your code lints (`nf-core lint`). +- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). +- [ ] Usage Documentation in `docs/usage.md` is updated. +- [ ] Output Documentation in `docs/output.md` is updated. +- [ ] `CHANGELOG.md` is updated. +- [ ] `README.md` is updated (including new tool citations and authors/contributors). diff --git a/.github/markdownlint.yml b/.github/markdownlint.yml deleted file mode 100644 index e052a635..00000000 --- a/.github/markdownlint.yml +++ /dev/null @@ -1,9 +0,0 @@ -# Markdownlint configuration file -default: true, -line-length: false -no-multiple-blanks: 0 -blanks-around-headers: false -blanks-around-lists: false -header-increment: false -no-duplicate-header: - siblings_only: true diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml new file mode 100644 index 00000000..50ab4ffc --- /dev/null +++ b/.github/workflows/awsfulltest.yml @@ -0,0 +1,34 @@ +name: nf-core AWS full size tests +# This workflow is triggered on published releases. +# It can be additionally triggered manually with GitHub actions workflow dispatch button. +# It runs the -profile 'test_full' on AWS batch + +on: + release: + types: [published] + workflow_dispatch: +jobs: + run-tower: + name: Run AWS full tests + if: github.repository == 'nf-core/nascent' + runs-on: ubuntu-latest + steps: + - name: Launch workflow via tower + uses: nf-core/tower-action@v3 + # TODO nf-core: You can customise AWS full pipeline tests as required + # Add full size test data (but still relatively small datasets for few samples) + # on the `test_full.config` test runs with only one set of parameters + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/nascent/work-${{ github.sha }} + parameters: | + { + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/nascent/results-${{ github.sha }}" + } + profiles: test_full,aws_tower + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: tower_action_*.log diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml new file mode 100644 index 00000000..ff5e5678 --- /dev/null +++ b/.github/workflows/awstest.yml @@ -0,0 +1,29 @@ +name: nf-core AWS test +# This workflow can be triggered manually with the GitHub actions workflow dispatch button. +# It runs the -profile 'test' on AWS batch + +on: + workflow_dispatch: +jobs: + run-tower: + name: Run AWS tests + if: github.repository == 'nf-core/nascent' + runs-on: ubuntu-latest + steps: + # Launch workflow using Tower CLI tool action + - name: Launch workflow via tower + uses: nf-core/tower-action@v3 + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/nascent/work-${{ github.sha }} + parameters: | + { + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/nascent/results-test-${{ github.sha }}" + } + profiles: test,aws_tower + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: tower_action_*.log diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml new file mode 100644 index 00000000..46c65194 --- /dev/null +++ b/.github/workflows/branch.yml @@ -0,0 +1,44 @@ +name: nf-core branch protection +# This workflow is triggered on PRs to master branch on the repository +# It fails when someone tries to make a PR against the nf-core `master` branch instead of `dev` +on: + pull_request_target: + branches: [master] + +jobs: + test: + runs-on: ubuntu-latest + steps: + # PRs to the nf-core repo master branch are only ok if coming from the nf-core repo `dev` or any `patch` branches + - name: Check PRs + if: github.repository == 'nf-core/nascent' + run: | + { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/nascent ]] && [[ $GITHUB_HEAD_REF = "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] + + # If the above check failed, post a comment on the PR explaining the failure + # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets + - name: Post PR comment + if: failure() + uses: mshick/add-pr-comment@v1 + with: + message: | + ## This PR is against the `master` branch :x: + + * Do not close this PR + * Click _Edit_ and change the `base` to `dev` + * This CI test will remain failed until you push a new commit + + --- + + Hi @${{ github.event.pull_request.user.login }}, + + It looks like this pull-request is has been made against the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `master` branch. + The `master` branch on nf-core repositories should always contain code from the latest release. + Because of this, PRs to `master` are only allowed if they come from the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `dev` branch. + + You do not need to close this PR, you can change the target branch to `dev` by clicking the _"Edit"_ button at the top of this page. + Note that even after this, the test will continue to show as failing until you push a new commit. + + Thanks again for your contribution! + repo-token: ${{ secrets.GITHUB_TOKEN }} + allow-repeats: false diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000..befd22f5 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,40 @@ +name: nf-core CI +# This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors +on: + push: + branches: + - dev + pull_request: + release: + types: [published] + +env: + NXF_ANSI_LOG: false + +jobs: + test: + name: Run pipeline with test data + # Only run on push if this is the nf-core dev branch (merged PRs) + if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/nascent') }}" + runs-on: ubuntu-latest + strategy: + matrix: + NXF_VER: + - "21.10.6" + - "latest-everything" + parameters: + - "" + - "--aligner bwamem2" + - "--aligner dragmap" + steps: + - name: Check out pipeline code + uses: actions/checkout@v2 + + - name: Install Nextflow + uses: nf-core/setup-nextflow@v1 + with: + version: "${{ matrix.NXF_VER }}" + + - name: Run pipeline with test data + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test,docker ${{ matrix.parameters }} --outdir ./results diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml new file mode 100644 index 00000000..d9980b02 --- /dev/null +++ b/.github/workflows/fix-linting.yml @@ -0,0 +1,55 @@ +name: Fix linting from a comment +on: + issue_comment: + types: [created] + +jobs: + deploy: + # Only run if comment is on a PR with the main repo, and if it contains the magic keywords + if: > + contains(github.event.comment.html_url, '/pull/') && + contains(github.event.comment.body, '@nf-core-bot fix linting') && + github.repository == 'nf-core/nascent' + runs-on: ubuntu-latest + steps: + # Use the @nf-core-bot token to check out so we can push later + - uses: actions/checkout@v3 + with: + token: ${{ secrets.nf_core_bot_auth_token }} + + # Action runs on the issue comment, so we don't get the PR by default + # Use the gh cli to check out the PR + - name: Checkout Pull Request + run: gh pr checkout ${{ github.event.issue.number }} + env: + GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} + + - uses: actions/setup-node@v2 + + - name: Install Prettier + run: npm install -g prettier @prettier/plugin-php + + # Check that we actually need to fix something + - name: Run 'prettier --check' + id: prettier_status + run: | + if prettier --check ${GITHUB_WORKSPACE}; then + echo "::set-output name=result::pass" + else + echo "::set-output name=result::fail" + fi + + - name: Run 'prettier --write' + if: steps.prettier_status.outputs.result == 'fail' + run: prettier --write ${GITHUB_WORKSPACE} + + - name: Commit & push changes + if: steps.prettier_status.outputs.result == 'fail' + run: | + git config user.email "core@nf-co.re" + git config user.name "nf-core-bot" + git config push.default upstream + git add . + git status + git commit -m "[automated] Fix linting with Prettier" + git push diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml new file mode 100644 index 00000000..eae4685a --- /dev/null +++ b/.github/workflows/linting.yml @@ -0,0 +1,129 @@ +name: nf-core linting +# This workflow is triggered on pushes and PRs to the repository. +# It runs the `nf-core lint` and markdown lint tests to ensure +# that the code meets the nf-core guidelines. +on: + push: + pull_request: + release: + types: [published] + +jobs: + EditorConfig: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - uses: actions/setup-node@v2 + + - name: Install editorconfig-checker + run: npm install -g editorconfig-checker + + - name: Run ECLint check + run: editorconfig-checker -exclude README.md $(find .* -type f | grep -v '.git\|.py\|.md\|json\|yml\|yaml\|html\|css\|work\|.nextflow\|build\|nf_core.egg-info\|log.txt\|Makefile') + + Prettier: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - uses: actions/setup-node@v2 + + - name: Install Prettier + run: npm install -g prettier + + - name: Run Prettier --check + run: prettier --check ${GITHUB_WORKSPACE} + + PythonBlack: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Check code lints with Black + uses: psf/black@stable + + # If the above check failed, post a comment on the PR explaining the failure + - name: Post PR comment + if: failure() + uses: mshick/add-pr-comment@v1 + with: + message: | + ## Python linting (`black`) is failing + + To keep the code consistent with lots of contributors, we run automated code consistency checks. + To fix this CI test, please run: + + * Install [`black`](https://black.readthedocs.io/en/stable/): `pip install black` + * Fix formatting errors in your pipeline: `black .` + + Once you push these changes the test should pass, and you can hide this comment :+1: + + We highly recommend setting up Black in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help! + + Thanks again for your contribution! + repo-token: ${{ secrets.GITHUB_TOKEN }} + allow-repeats: false + + nf-core: + runs-on: ubuntu-latest + steps: + - name: Check out pipeline code + uses: actions/checkout@v2 + + - name: Install Nextflow + uses: nf-core/setup-nextflow@v1 + + - uses: actions/setup-python@v3 + with: + python-version: "3.7" + architecture: "x64" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install nf-core + + - name: Run nf-core lint + env: + GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} + run: nf-core -l lint_log.txt lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md + + - name: Save PR number + if: ${{ always() }} + run: echo ${{ github.event.pull_request.number }} > PR_number.txt + + - name: Upload linting log file artifact + if: ${{ always() }} + uses: actions/upload-artifact@v2 + with: + name: linting-logs + path: | + lint_log.txt + lint_results.md + PR_number.txt + + bin: + name: Lint bin/ directory + runs-on: ubuntu-latest + steps: + - name: Checkout Code + uses: actions/checkout@v2 + with: + # Full git history is needed to get a proper list of changed files within `super-linter` + fetch-depth: 0 + + - name: Lint Code Base + uses: github/super-linter@v4 + env: + VALIDATE_ALL_CODEBASE: false + DEFAULT_BRANCH: master + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + FILTER_REGEX_INCLUDE: ".*bin/.*" + PYTHON_BLACK_CONFIG_FILE: pyproject.toml + PYTHON_ISORT_CONFIG_FILE: pyproject.toml + LINTER_RULES_PATH: / + VALIDATE_PERL: false +# diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml new file mode 100644 index 00000000..04758f61 --- /dev/null +++ b/.github/workflows/linting_comment.yml @@ -0,0 +1,28 @@ +name: nf-core linting comment +# This workflow is triggered after the linting action is complete +# It posts an automated comment to the PR, even if the PR is coming from a fork + +on: + workflow_run: + workflows: ["nf-core linting"] + +jobs: + test: + runs-on: ubuntu-latest + steps: + - name: Download lint results + uses: dawidd6/action-download-artifact@v2 + with: + workflow: linting.yml + workflow_conclusion: completed + + - name: Get PR number + id: pr_number + run: echo "::set-output name=pr_number::$(cat linting-logs/PR_number.txt)" + + - name: Post PR comment + uses: marocchino/sticky-pull-request-comment@v2 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + number: ${{ steps.pr_number.outputs.pr_number }} + path: linting-logs/lint_results.md diff --git a/.gitignore b/.gitignore index 5b54e3e6..5124c9ac 100644 --- a/.gitignore +++ b/.gitignore @@ -3,5 +3,6 @@ work/ data/ results/ .DS_Store -tests/test_data +testing/ +testing* *.pyc diff --git a/.gitpod.yml b/.gitpod.yml new file mode 100644 index 00000000..85d95ecc --- /dev/null +++ b/.gitpod.yml @@ -0,0 +1,14 @@ +image: nfcore/gitpod:latest + +vscode: + extensions: # based on nf-core.nf-core-extensionpack + - codezombiech.gitignore # Language support for .gitignore files + # - cssho.vscode-svgviewer # SVG viewer + - esbenp.prettier-vscode # Markdown/CommonMark linting and style checking for Visual Studio Code + - eamodio.gitlens # Quickly glimpse into whom, why, and when a line or code block was changed + - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files + - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar + - mechatroner.rainbow-csv # Highlight columns in csv files in different colors + # - nextflow.nextflow # Nextflow syntax highlighting + - oderwat.indent-rainbow # Highlight indentation level + - streetsidesoftware.code-spell-checker # Spelling checker for source code diff --git a/.nf-core.yml b/.nf-core.yml new file mode 100644 index 00000000..89348e55 --- /dev/null +++ b/.nf-core.yml @@ -0,0 +1,6 @@ +repository_type: pipeline +lint: + files_unchanged: + - .github/workflows/linting.yml + - LICENSE + - assets/email_template.html diff --git a/.prettierignore b/.prettierignore new file mode 100644 index 00000000..eb74a574 --- /dev/null +++ b/.prettierignore @@ -0,0 +1,10 @@ +email_template.html +adaptivecard.json +.nextflow* +work/ +data/ +results/ +.DS_Store +testing/ +testing* +*.pyc diff --git a/.prettierrc.yml b/.prettierrc.yml new file mode 100644 index 00000000..c81f9a76 --- /dev/null +++ b/.prettierrc.yml @@ -0,0 +1 @@ +printWidth: 120 diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index b88805da..00000000 --- a/.travis.yml +++ /dev/null @@ -1,42 +0,0 @@ -sudo: required -language: python -jdk: openjdk8 -services: docker -python: '3.6' -cache: pip -matrix: - fast_finish: true - -before_install: - # PRs to master are only ok if coming from dev branch - - '[ $TRAVIS_PULL_REQUEST = "false" ] || [ $TRAVIS_BRANCH != "master" ] || ([ $TRAVIS_PULL_REQUEST_SLUG = $TRAVIS_REPO_SLUG ] && [ $TRAVIS_PULL_REQUEST_BRANCH = "dev" ])' - # Pull the docker image first so the test doesn't wait for this - - docker pull nfcore/nascent:dev - # Fake the tag locally so that the pipeline runs properly - # Looks weird when this is :dev to :dev, but makes sense when testing code for a release (:dev to :1.0.1) - - docker tag nfcore/nascent:dev nfcore/nascent:1.0 - -install: - # Install Nextflow - - mkdir /tmp/nextflow && cd /tmp/nextflow - - wget -qO- get.nextflow.io | bash - - sudo ln -s /tmp/nextflow/nextflow /usr/local/bin/nextflow - # Install nf-core/tools - - pip install --upgrade pip - - pip install nf-core - # Reset - - mkdir ${TRAVIS_BUILD_DIR}/tests && cd ${TRAVIS_BUILD_DIR}/tests - # Install markdownlint-cli - - sudo apt-get install npm && npm install -g markdownlint-cli - -env: - - NXF_VER='0.32.0' # Specify a minimum NF version that should be tested and work - - NXF_VER='' # Plus: get the latest NF version and check that it works - -script: - # Lint the pipeline code - - nf-core lint ${TRAVIS_BUILD_DIR} - # Lint the documentation - - markdownlint ${TRAVIS_BUILD_DIR} -c ${TRAVIS_BUILD_DIR}/.github/markdownlint.yml - # Run the pipeline with the test profile - - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker diff --git a/CHANGELOG.md b/CHANGELOG.md index c025c0d4..e85c9d6a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,31 @@ # nf-core/nascent: Changelog +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## v2.0.0 - 2022-10-20 + +### `Added` + +- DSL2 conversion +- [[#28](https://github.com/nf-core/nascent/issues/28)] - Added DRAGMAP alignment +- [[#64](https://github.com/nf-core/nascent/pull/64)] - Added CHM13 igenomes config +- [[#39](https://github.com/nf-core/nascent/issues/39)] - Add PINTS for TSS identification +- [[#71](https://github.com/nf-core/nascent/issues/71)] - Add FASTP for adapter trimming +- [[#77](https://github.com/nf-core/nascent/issues/77)] - Add dedup subworkflow + +### `Fixed` + +- [[#33](https://github.com/nf-core/nascent/issues/33)] - groHMM works on full runs. Added the keep standard chromosomes function to standardize bam files. + +### `Dependencies` + +- Updated Nextflow version to `v21.10.6` + +## v1.0.1 - 2020-03-03 + +Update to the container to meet the latest template requirements, and dependencies for new features in an upcoming PR (R and Picard tools). + ## v1.0 - 2019-04-16 + Initial release of nf-core/nascent, created with the [nf-core](http://nf-co.re/) template. diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 00000000..017666c0 --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,56 @@ +cff-version: 1.2.0 +message: "If you use `nf-core tools` in your work, please cite the `nf-core` publication" +authors: + - family-names: Ewels + given-names: Philip + - family-names: Peltzer + given-names: Alexander + - family-names: Fillinger + given-names: Sven + - family-names: Patel + given-names: Harshil + - family-names: Alneberg + given-names: Johannes + - family-names: Wilm + given-names: Andreas + - family-names: Garcia + given-names: Maxime Ulysse + - family-names: Di Tommaso + given-names: Paolo + - family-names: Nahnsen + given-names: Sven +title: "The nf-core framework for community-curated bioinformatics pipelines." +version: 2.4.1 +doi: 10.1038/s41587-020-0439-x +date-released: 2022-05-16 +url: https://github.com/nf-core/tools +prefered-citation: + type: article + authors: + - family-names: Ewels + given-names: Philip + - family-names: Peltzer + given-names: Alexander + - family-names: Fillinger + given-names: Sven + - family-names: Patel + given-names: Harshil + - family-names: Alneberg + given-names: Johannes + - family-names: Wilm + given-names: Andreas + - family-names: Garcia + given-names: Maxime Ulysse + - family-names: Di Tommaso + given-names: Paolo + - family-names: Nahnsen + given-names: Sven + doi: 10.1038/s41587-020-0439-x + journal: nature biotechnology + start: 276 + end: 278 + title: "The nf-core framework for community-curated bioinformatics pipelines." + issue: 3 + volume: 38 + year: 2020 + url: https://dx.doi.org/10.1038/s41587-020-0439-x diff --git a/CITATIONS.md b/CITATIONS.md new file mode 100644 index 00000000..d4897cef --- /dev/null +++ b/CITATIONS.md @@ -0,0 +1,112 @@ +# nf-core/nascent: Citations + +## [nf-core](https://pubmed.ncbi.nlm.nih.gov/32055031/) + +> Ewels PA, Peltzer A, Fillinger S, Patel H, Alneberg J, Wilm A, Garcia MU, Di Tommaso P, Nahnsen S. The nf-core framework for community-curated bioinformatics pipelines. Nat Biotechnol. 2020 Mar;38(3):276-278. doi: 10.1038/s41587-020-0439-x. PubMed PMID: 32055031. + +## [Nextflow](https://pubmed.ncbi.nlm.nih.gov/28398311/) + +> Di Tommaso P, Chatzou M, Floden EW, Barja PP, Palumbo E, Notredame C. Nextflow enables reproducible computational workflows. Nat Biotechnol. 2017 Apr 11;35(4):316-319. doi: 10.1038/nbt.3820. PubMed PMID: 28398311. + +## Pipeline tools + +- [BBMap](https://sourceforge.net/projects/bbmap/) + +- [BEDTools](https://pubmed.ncbi.nlm.nih.gov/20110278/) + + > Quinlan AR, Hall IM. BEDTools: a flexible suite of utilities for comparing genomic features. Bioinformatics. 2010 Mar 15;26(6):841-2. doi: 10.1093/bioinformatics/btq033. Epub 2010 Jan 28. PubMed PMID: 20110278; PubMed Central PMCID: PMC2832824. + +- [BWA-MEM](https://arxiv.org/abs/1303.3997v2) + + > Li H: Aligning sequence reads, clone sequences and assembly contigs with BWA-MEM. arXiv 2013. doi: 10.48550/arXiv.1303.3997 + +- [BWA-MEM2](https://ieeexplore.ieee.org/document/8820962) + + > M. Vasimuddin, S. Misra, H. Li and S. Aluru, "Efficient Architecture-Aware Acceleration of BWA-MEM for Multicore Systems," 2019 IEEE International Parallel and Distributed Processing Symposium (IPDPS), 2019, pp. 314-324. doi: 10.1109/IPDPS.2019.00041. + +- [deepTools](https://github.com/deeptools/deepTools) + + > Ramírez, Fidel, Devon P. Ryan, Björn Grüning, Vivek Bhardwaj, Fabian Kilpert, Andreas S. Richter, Steffen Heyne, Friederike Dündar, and Thomas Manke. deepTools2: A next Generation Web Server for Deep-Sequencing Data Analysis. Nucleic Acids Research (2016). doi:10.1093/nar/gkw257. + +- [DragMap](https://github.com/Illumina/DRAGMAP) + +- [FastP](https://academic.oup.com/bioinformatics/article/34/17/i884/5093234) + + > Shifu Chen, Yanqing Zhou, Yaru Chen, Jia Gu, fastp: an ultra-fast all-in-one FASTQ preprocessor, Bioinformatics, Volume 34, Issue 17, 01 September 2018, Pages i884–i890, doi: 10.1093/bioinformatics/bty560. PubMed PMID: 30423086. PubMed Central PMCID: PMC6129281 + +- [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) + +- [featureCounts](https://pubmed.ncbi.nlm.nih.gov/24227677/) + + > Liao Y, Smyth GK, Shi W. featureCounts: an efficient general purpose program for assigning sequence reads to genomic features. Bioinformatics. 2014 Apr 1;30(7):923-30. doi: 10.1093/bioinformatics/btt656. Epub 2013 Nov 13. PubMed PMID: 24227677. + +- [GffRead](https://pubmed.ncbi.nlm.nih.gov/32489650/) + + > Pertea G, Pertea M. GFF Utilities: GffRead and GffCompare. F1000Res. 2020 Apr 28;9:ISCB Comm J-304. doi: 10.12688/f1000research.23297.2. eCollection 2020. PubMed PMID: 32489650; PubMed Central PMCID: PMC7222033. + +- [HOMER](http://homer.ucsd.edu/homer/index.html) + +> Heinz S, Benner C, Spann N, Bertolino E et al. Simple Combinations of Lineage-Determining Transcription Factors Prime cis-Regulatory Elements Required for Macrophage and B Cell Identities. Mol Cell 2010 May 28;38(4):576-589. PMID: 20513432 + +- [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) + + > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. + +- [PINTS](https://pints.yulab.org/) + + > Yao, L., Liang, J., Ozer, A. et al. A comparison of experimental assays and analytical methods for genome-wide identification of active enhancers. Nat Biotechnol 40, 1056–1065 (2022). https://doi.org/10.1038/s41587-022-01211-7 + +- [preseq](https://pubmed.ncbi.nlm.nih.gov/23435259/) + + > Daley T, Smith AD. Predicting the molecular complexity of sequencing libraries. Nat Methods. 2013 Apr;10(4):325-7. doi: 10.1038/nmeth.2375. Epub 2013 Feb 24. PubMed PMID: 23435259; PubMed Central PMCID: PMC3612374. + +- [RSeQC](https://pubmed.ncbi.nlm.nih.gov/22743226/) + + > Wang L, Wang S, Li W. RSeQC: quality control of RNA-seq experiments Bioinformatics. 2012 Aug 15;28(16):2184-5. doi: 10.1093/bioinformatics/bts356. Epub 2012 Jun 27. PubMed PMID: 22743226. + +- [SAMtools](https://pubmed.ncbi.nlm.nih.gov/19505943/) + + > Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. doi: 10.1093/bioinformatics/btp352. Epub 2009 Jun 8. PubMed PMID: 19505943; PubMed Central PMCID: PMC2723002. + +- [UMI-tools](https://pubmed.ncbi.nlm.nih.gov/28100584/) + + > Smith T, Heger A, Sudbery I. UMI-tools: modeling sequencing errors in Unique Molecular Identifiers to improve quantification accuracy Genome Res. 2017 Mar;27(3):491-499. doi: 10.1101/gr.209601.116. Epub 2017 Jan 18. PubMed PMID: 28100584; PubMed Central PMCID: PMC5340976. + +## R packages + +- [R](https://www.R-project.org/) + + > R Core Team (2017). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. + +- [argparse](https://cran.r-project.org/package=argparse) + +- [GenomicAlignments](https://bioconductor.org/packages/GenomicAlignments/) + + > Lawrence M, Huber W, Pagès H, Aboyoun P, Carlson M, Gentleman R, Morgan M, Carey V (2013). “Software for Computing and Annotating Genomic Ranges.” PLoS Computational Biology, 9. doi: 10.1371/journal.pcbi.1003118, http://www.ploscompbiol.org/article/info%3Adoi%2F10.1371%2Fjournal.pcbi.1003118. + +- [GenomicFeatures](https://www.bioconductor.org/packages/GenomicFeatures/) + + > Lawrence M, Huber W, Pagès H, Aboyoun P, Carlson M, Gentleman R, Morgan M, Carey V (2013). “Software for Computing and Annotating Genomic Ranges.” PLoS Computational Biology, 9. doi: 10.1371/journal.pcbi.1003118, http://www.ploscompbiol.org/article/info%3Adoi%2F10.1371%2Fjournal.pcbi.1003118. + +- [groHMM](https://www.bioconductor.org/packages/groHMM/) + + > Chae M, Danko CG, Kraus WL (2015). “groHMM: a computational tool for identifying unannotated and cell type-specific transcription units from global run-on sequencing data.” BMC Bioinformatics, 16(222). + +## Software packaging/containerisation tools + +- [Anaconda](https://anaconda.com) + + > Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web. + +- [Bioconda](https://pubmed.ncbi.nlm.nih.gov/29967506/) + + > Grüning B, Dale R, Sjödin A, Chapman BA, Rowe J, Tomkins-Tinch CH, Valieris R, Köster J; Bioconda Team. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat Methods. 2018 Jul;15(7):475-476. doi: 10.1038/s41592-018-0046-7. PubMed PMID: 29967506. + +- [BioContainers](https://pubmed.ncbi.nlm.nih.gov/28379341/) + + > da Veiga Leprevost F, Grüning B, Aflitos SA, Röst HL, Uszkoreit J, Barsnes H, Vaudel M, Moreno P, Gatto L, Weber J, Bai M, Jimenez RC, Sachsenberg T, Pfeuffer J, Alvarez RV, Griss J, Nesvizhskii AI, Perez-Riverol Y. BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics. 2017 Aug 15;33(16):2580-2582. doi: 10.1093/bioinformatics/btx192. PubMed PMID: 28379341; PubMed Central PMCID: PMC5870671. + +- [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) + +- [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 09226d0d..f4fd052f 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,46 +1,111 @@ -# Contributor Covenant Code of Conduct +# Code of Conduct at nf-core (v1.0) ## Our Pledge -In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. +In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core, pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: -## Our Standards +- Age +- Body size +- Familial status +- Gender identity and expression +- Geographical location +- Level of experience +- Nationality and national origins +- Native language +- Physical and neurological ability +- Race or ethnicity +- Religion +- Sexual identity and orientation +- Socioeconomic status -Examples of behavior that contributes to creating a positive environment include: +Please note that the list above is alphabetised and is therefore not ranked in any order of preference or importance. -* Using welcoming and inclusive language -* Being respectful of differing viewpoints and experiences -* Gracefully accepting constructive criticism -* Focusing on what is best for the community -* Showing empathy towards other community members +## Preamble -Examples of unacceptable behavior by participants include: +> Note: This Code of Conduct (CoC) has been drafted by the nf-core Safety Officer and been edited after input from members of the nf-core team and others. "We", in this document, refers to the Safety Officer and members of the nf-core core team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will amended periodically to keep it up-to-date, and in case of any dispute, the most current version will apply. -* The use of sexualized language or imagery and unwelcome sexual attention or advances -* Trolling, insulting/derogatory comments, and personal or political attacks -* Public or private harassment -* Publishing others' private information, such as a physical or electronic address, without explicit permission -* Other conduct which could reasonably be considered inappropriate in a professional setting +An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). Our current safety officer is Renuka Kudva. + +nf-core is a young and growing community that welcomes contributions from anyone with a shared vision for [Open Science Policies](https://www.fosteropenscience.eu/taxonomy/term/8). Open science policies encompass inclusive behaviours and we strive to build and maintain a safe and inclusive environment for all individuals. + +We have therefore adopted this code of conduct (CoC), which we require all members of our community and attendees in nf-core events to adhere to in all our workspaces at all times. Workspaces include but are not limited to Slack, meetings on Zoom, Jitsi, YouTube live etc. + +Our CoC will be strictly enforced and the nf-core team reserve the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. + +We ask all members of our community to help maintain a supportive and productive workspace and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. + +Questions, concerns or ideas on what we can include? Contact safety [at] nf-co [dot] re ## Our Responsibilities -Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. +The safety officer is responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. + +The safety officer in consultation with the nf-core core team have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. + +Members of the core team or the safety officer who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and be subject to the same actions as others in violation of the CoC. + +## When are where does this Code of Conduct apply? + +Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events. This includes but is not limited to the following listed alphabetically and therefore in no order of preference: + +- Communicating with an official project email address. +- Communicating with community members within the nf-core Slack channel. +- Participating in hackathons organised by nf-core (both online and in-person events). +- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence. +- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, Jitsi, YouTube live etc. +- Representing nf-core on social media. This includes both official and personal accounts. + +## nf-core cares 😊 + +nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include but are not limited to the following (listed in alphabetical order): + +- Ask for consent before sharing another community member’s personal information (including photographs) on social media. +- Be respectful of differing viewpoints and experiences. We are all here to learn from one another and a difference in opinion can present a good learning opportunity. +- Celebrate your accomplishments at events! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) +- Demonstrate empathy towards other community members. (We don’t all have the same amount of time to dedicate to nf-core. If tasks are pending, don’t hesitate to gently remind members of your team. If you are leading a task, ask for help if you feel overwhelmed.) +- Engage with and enquire after others. (This is especially important given the geographically remote nature of the nf-core community, so let’s do this the best we can) +- Focus on what is best for the team and the community. (When in doubt, ask) +- Graciously accept constructive criticism, yet be unafraid to question, deliberate, and learn. +- Introduce yourself to members of the community. (We’ve all been outsiders and we know that talking to strangers can be hard for some, but remember we’re interested in getting to know you and your visions for open science!) +- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communications to be kind.**) +- Take breaks when you feel like you need them. +- Using welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack.) + +## nf-core frowns on 😕 + +The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this code of conduct. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces. + +- Deliberate intimidation, stalking or following and sustained disruption of communication among participants of the community. This includes hijacking shared screens through actions such as using the annotate tool in conferencing software such as Zoom. +- “Doxing” i.e. posting (or threatening to post) another person’s personal identifying information online. +- Spamming or trolling of individuals on social media. +- Use of sexual or discriminatory imagery, comments, or jokes and unwelcome sexual attention. +- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion or work experience. + +### Online Trolling + +The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the added issue of online trolling. This is unacceptable, reports of such behaviour will be taken very seriously, and perpetrators will be excluded from activities immediately. + +All community members are required to ask members of the group they are working within for explicit consent prior to taking screenshots of individuals during video calls. + +## Procedures for Reporting CoC violations -Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. +If someone makes you feel uncomfortable through their behaviours or actions, report it as soon as possible. -## Scope +You can reach out to members of the [nf-core core team](https://nf-co.re/about) and they will forward your concerns to the safety officer(s). -This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. +Issues directly concerning members of the core team will be dealt with by other members of the core team and the safety manager, and possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson, and details will be shared in due course. -## Enforcement +All reports will be handled with utmost discretion and confidentially. -Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team on [Slack](https://nf-core-invite.herokuapp.com/). The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. +## Attribution and Acknowledgements -Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. +- The [Contributor Covenant, version 1.4](http://contributor-covenant.org/version/1/4) +- The [OpenCon 2017 Code of Conduct](http://www.opencon2017.org/code_of_conduct) (CC BY 4.0 OpenCon organisers, SPARC and Right to Research Coalition) +- The [eLife innovation sprint 2020 Code of Conduct](https://sprint.elifesciences.org/code-of-conduct/) +- The [Mozilla Community Participation Guidelines v3.1](https://www.mozilla.org/en-US/about/governance/policies/participation/) (version 3.1, CC BY-SA 3.0 Mozilla) -## Attribution +## Changelog -This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] +### v1.0 - March 12th, 2021 -[homepage]: http://contributor-covenant.org -[version]: http://contributor-covenant.org/version/1/4/ +- Complete rewrite from original [Contributor Covenant](http://contributor-covenant.org/) CoC. diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index 0553b09a..00000000 --- a/Dockerfile +++ /dev/null @@ -1,7 +0,0 @@ -FROM nfcore/base -LABEL authors="Ignacio Tripodi (ignacio.tripodi@colorado.edu), Margaret Gruca (margaret.gruca@colorado.edu)" \ - description="Docker image containing all requirements for nf-core/nascent pipeline" - -COPY environment.yml / -RUN conda env create -f /environment.yml && conda clean -a -ENV PATH /opt/conda/envs/nf-core-nascent-1.0/bin:$PATH diff --git a/LICENSE b/LICENSE index 0ecb5be4..afec2257 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) Ignacio Tripodi, Margaret Gruca +Copyright (c) Edmund Miller, Ignacio Tripodi, Margaret Gruca Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 8225cc64..b9b31f3d 100644 --- a/README.md +++ b/README.md @@ -1,84 +1,112 @@ -# nf-core/nascent +# ![nf-core/nascent](docs/images/nf-core-nascent_logo_light.png#gh-light-mode-only) ![nf-core/nascent](docs/images/nf-core-nascent_logo_dark.png#gh-dark-mode-only) -**Nascent Transcription Processing Pipeline**. +[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/nascent/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.157735234-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.157735234) -[![Build Status](https://travis-ci.com/nf-core/nascent.svg?branch=master)](https://travis-ci.com/nf-core/nascent) -[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A50.32.0-brightgreen.svg)](https://www.nextflow.io/) -[![DOI](https://zenodo.org/badge/157735234.svg)](https://zenodo.org/badge/latestdoi/157735234) +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A521.10.6-23aa62.svg)](https://www.nextflow.io/) +[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) +[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) +[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) +[![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/nascent) -[![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg)](http://bioconda.github.io/) -[![Docker](https://img.shields.io/docker/automated/nfcore/nascent.svg)](https://hub.docker.com/r/nfcore/nascent) -![Singularity Container available]( -https://img.shields.io/badge/singularity-available-7E4C74.svg) +[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23nascent-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/nascent)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) -### Introduction -The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It comes with docker / singularity containers making installation trivial and results highly reproducible. +## Introduction +**nf-core/nascent** is a bioinformatics best-practice analysis pipeline for nascent transcript (NT) and Transcriptional Start Site (TSS) assays. -#### Reference +The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community! -If you've used this pipeline in your research, you can cite this pipeline using DOI 10.17605/OSF.IO/SV4UB ([OSF project](https://osf.io/sv4ub/)). + + -### Documentation -The nf-core/nascent pipeline comes with documentation about the pipeline, found in the `docs/` directory: -1. [Installation](https://nf-co.re/usage/installation) -2. Pipeline configuration - * [Local installation](https://nf-co.re/usage/local_installation) - * [Adding your own system config](https://nf-co.re/usage/adding_own_config) - * [Reference genomes](https://nf-co.re/usage/reference_genomes) -3. [Running the pipeline](docs/usage.md) -4. [Output and how to interpret the results](docs/output.md) -5. [Troubleshooting](https://nf-co.re/usage/troubleshooting) +On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources.The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/nascent/results). -This pipeline is designed to process the sequencing output of nascent transcription assays, like GRO-seq or PRO-seq. It produces bedGraph- and bigWig-fomatted outputs after mapping strand-specific reads, as well as other useful outputs like quality control reports or IGV-ready (Integrative Genomics Viewer) tdf files. +## Pipeline summary -### Quick start +1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) +2. Adapter and quality trimming ([`fastp`](https://github.com/OpenGene/fastp)) +3. Alignment + 1. [`bwa`](https://bio-bwa.sourceforge.net/) + 2. [`bwamem2`](https://github.com/bwa-mem2/bwa-mem2) + 3. [`DRAGMAP`](https://github.com/Illumina/DRAGMAP) +4. Sort and index alignments ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) +5. UMI-based deduplication ([`UMI-tools`](https://github.com/CGATOxford/UMI-tools)) +6. Duplicate read marking ([`picard MarkDuplicates`](https://broadinstitute.github.io/picard/)) +7. Quality Control + 1. [`RSeQC`](https://rseqc.sourceforge.net/index.html) - Various RNA-seq QC metrics + 2. [`Preseq`](http://smithlabresearch.org/software/preseq/) - Estimation of library complexity + 3. [`BBMap`](https://sourceforge.net/projects/bbmap/) - Analyzes the sequencing coverage +8. Coverage Graphs + 1. Create bedGraph coverage files ([`BEDTools`](https://github.com/arq5x/bedtools2/) + 2. Create bigWig coverage files ([`deeptools`](https://deeptools.readthedocs.io/en/develop/)) +9. Transcript identification + 1. [`HOMER`](http://homer.ucsd.edu/) + 2. [`GroHMM`](https://bioconductor.org/packages/release/bioc/html/groHMM.html) + 3. [`PINTS`](https://pints.yulab.org/) +10. Quantification of Genes and Nascent Transcripts ([`featureCounts`](https://subread.sourceforge.net/featureCounts.html)) +11. Aggregate report describing results and QC from the whole pipeline ([`MultiQC`](http://multiqc.info/)) -Edit the appropriate config file, e.g. `conf/slurm_grch38.config`, to ensure the proper paths are set for genome reference files and other executables (look for all mentions of `COMPLETE_*`). Variable names should hopefully be self-explanatory. You can specify the Nextflow working directory and output directory with flags. Note you must also now specify the email to which the report will be sent for the run. +## Quick Start - nextflow run nf-core/nascent --reads '*_R{1,2}.fastq.gz' -profile standard,docker +1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`>=21.10.6`) -## Arguments +2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) (you can follow [this tutorial](https://singularity-tutorial.github.io/01-installation/)), [`Podman`](https://podman.io/), [`Shifter`](https://nersc.gitlab.io/development/shifter/how-to-use/) or [`Charliecloud`](https://hpc.github.io/charliecloud/) for full pipeline reproducibility _(you can use [`Conda`](https://conda.io/miniconda.html) both to install Nextflow itself and also to manage software within pipelines. Please only use it within pipelines as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_. -### Required Arguments -| Argument | Usage | Description | -|-----------|----------------------------------|----------------------------------------------------------------------| -| -profile | \ | Configuration profile to use. | -| --fastqs | \ | Directory pattern for fastq files. | -| --sras | \ | Directory pattern for sra files. | -| --genome_id | \<'hg38'> | Genome ID to which the samples will be mapped (e.g. hg38, mm10, rn6).| -| --workdir | \ | Nextflow working directory where all intermediate files are saved. | -| --email | \ | Where to send workflow report email. | +3. Download the pipeline and test it on a minimal dataset with a single command: -### Save Options -| Arguments | Usage | Description | -|------------|---------------|-----------------------------------------------------------| -| --outdir | \ | Specifies where to save the output from the nextflow run. | -| --savefq | | Compresses and saves raw fastq reads. | -| --saveTrim | | Compresses and saves trimmed fastq reads. | -| --saveAll | | Compresses and saves all fastq reads. | -| --skipBAM | | Skips saving BAM files (only save CRAM). Default=False | + ```bash + nextflow run nf-core/nascent -profile test,YOURPROFILE --outdir + ``` -### Input File Options -| Arguments | Usage | Description | -|--------------|-------------|------------------------------------------------------------------------------| -| --singleEnd | | Specifies that the input files are not paired reads (default is paired-end). | -| --flip | | Reverse complements each strand. Necessary for some library preps. | + Note that some form of configuration will be needed so that Nextflow knows how to fetch the required software. This is usually done in the form of a config profile (`YOURPROFILE` in the example command above). You can chain multiple config profiles in a comma-separated string. -### Performance Options + > - The pipeline comes with config profiles called `docker`, `singularity`, `podman`, `shifter`, `charliecloud` and `conda` which instruct the pipeline to use the named tool for software management. For example, `-profile test,docker`. + > - Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile ` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment. + > - If you are using `singularity`, please use the [`nf-core download`](https://nf-co.re/tools/#downloading-pipelines-for-offline-use) command to download images first, before running the pipeline. Setting the [`NXF_SINGULARITY_CACHEDIR` or `singularity.cacheDir`](https://www.nextflow.io/docs/latest/singularity.html?#singularity-docker-hub) Nextflow options enables you to store and re-use the images from a central location for future pipeline runs. + > - If you are using `conda`, it is highly recommended to use the [`NXF_CONDA_CACHEDIR` or `conda.cacheDir`](https://www.nextflow.io/docs/latest/conda.html) settings to store the environments in a central location for future pipeline runs. -| Arguments | Usage | Description | -|-----------------|-------------|---------------------------------------------------------| -| --threadfqdump | | Runs multi-threading for fastq-dump for sra processing. | +4. Start running your own analysis! -### QC Options + ```bash + nextflow run nf-core/nascent --input samplesheet.csv --outdir --genome GRCh37 -profile + ``` -| Arguments | Usage | Description | -|-----------------|-------------|---------------------------------------------------------| -| --skipMultiQC | | Skip running MultiQC. | -| --skipRSeQC | | Skip running RSeQC. | +## Documentation + +The nf-core/nascent pipeline comes with documentation about the pipeline [usage](https://nf-co.re/nascent/usage), [parameters](https://nf-co.re/nascent/parameters) and [output](https://nf-co.re/nascent/output). ## Credits + nf-core/nascent was originally written by Ignacio Tripodi ([@ignaciot](https://github.com/ignaciot)) and Margaret Gruca ([@magruca](https://github.com/magruca)). -Many thanks to the nf-core team and all who provided invaluable feedback and assistance along the way, particularly to [@apeltzer](https://github.com/apeltzer), [@ewels](https://github.com/ewels), [@drpatelh](https://github.com/drpatelh), and [@pditommaso](https://github.com/pditommaso). +The pipeline was re-written in Nextflow DSL2 by Edmund Miller ([@Emiller88](https://github.com/emiller88)) and Sruthi Suresh ([@sruthipsuresh](https://github.com/sruthipsuresh)) from [The Functional Genomics Laboratory](https://taehoonkim.org/) at [The Univeristy of Texas at Dallas](https://www.utdallas.edu/) + +We thank the following people for their extensive assistance in the development of this pipeline: + +[@apeltzer](https://github.com/apeltzer) +[@ewels](https://github.com/ewels) +[@drpatelh](https://github.com/drpatelh) +[@pditommaso](https://github.com/pditommaso) +[@FriederikeHanssen](https://github.com/FriederikeHanssen) +[Tae Hoon Kim](https://github.com/taehoonkim-phd) +[@easterwoods](https://github.com/easterwoods) + +## Contributions and Support + +If you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md). + +For further information or help, don't hesitate to get in touch on the [Slack `#nascent` channel](https://nfcore.slack.com/channels/nascent) (you can join with [this invite](https://nf-co.re/join/slack)). + +## Citations + +If you use nf-core/nascent for your analysis, please cite it using the following doi: [10.5281/zenodo.157735234](https://doi.org/10.5281/zenodo.157735234) + +An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. + +You can cite the `nf-core` publication as follows: + +> **The nf-core framework for community-curated bioinformatics pipelines.** +> +> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen. +> +> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x). diff --git a/assets/adaptivecard.json b/assets/adaptivecard.json new file mode 100644 index 00000000..7202d7ff --- /dev/null +++ b/assets/adaptivecard.json @@ -0,0 +1,67 @@ +{ + "type": "message", + "attachments": [ + { + "contentType": "application/vnd.microsoft.card.adaptive", + "contentUrl": null, + "content": { + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "msteams": { + "width": "Full" + }, + "type": "AdaptiveCard", + "version": "1.2", + "body": [ + { + "type": "TextBlock", + "size": "Large", + "weight": "Bolder", + "color": "<% if (success) { %>Good<% } else { %>Attention<%} %>", + "text": "nf-core/nascent v${version} - ${runName}", + "wrap": true + }, + { + "type": "TextBlock", + "spacing": "None", + "text": "Completed at ${dateComplete} (duration: ${duration})", + "isSubtle": true, + "wrap": true + }, + { + "type": "TextBlock", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors. The full error message was: ${errorReport}.<% } %>", + "wrap": true + }, + { + "type": "TextBlock", + "text": "The command used to launch the workflow was as follows:", + "wrap": true + }, + { + "type": "TextBlock", + "text": "${commandLine}", + "isSubtle": true, + "wrap": true + } + ], + "actions": [ + { + "type": "Action.ShowCard", + "title": "Pipeline Configuration", + "card": { + "type": "AdaptiveCard", + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "body": [ + { + "type": "FactSet", + "facts": [<% out << summary.collect{ k,v -> "{\"title\": \"$k\", \"value\" : \"$v\"}"}.join(",\n") %> + ] + } + ] + } + } + ] + } + } + ] +} diff --git a/assets/email_template.html b/assets/email_template.html index f8fe7553..a0e74c5f 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -1,6 +1,5 @@ - @@ -11,6 +10,8 @@
+ +

nf-core/nascent v${version}

Run Name: $runName

diff --git a/assets/email_template.txt b/assets/email_template.txt index de3d6153..368017ad 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -1,6 +1,11 @@ -======================================== - nf-core/nascent v${version} -======================================== +---------------------------------------------------- + ,--./,-. + ___ __ __ __ ___ /,-._.--~\\ + |\\ | |__ __ / ` / \\ |__) |__ } { + | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-, + `._,._,' + nf-core/nascent v${version} +---------------------------------------------------- Run Name: $runName <% if (success){ diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml new file mode 100644 index 00000000..e9c81e2f --- /dev/null +++ b/assets/methods_description_template.yml @@ -0,0 +1,25 @@ +id: "nf-core-nascent-methods-description" +description: "Suggested text and references to use when describing pipeline usage within the methods section of a publication." +section_name: "nf-core/nascent Methods Description" +section_href: "https://github.com/nf-core/nascent" +plot_type: "html" +## TODO nf-core: Update the HTML below to your prefered methods description, e.g. add publication citation for this pipeline +## You inject any metadata in the Nextflow '${workflow}' object +data: | +

Methods

+

Data was processed using nf-core/nascent v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020).

+

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

+
${workflow.commandLine}
+

References

+
    +
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. https://doi.org/10.1038/nbt.3820
  • +
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. https://doi.org/10.1038/s41587-020-0439-x
  • +
+
+
Notes:
+
    + ${nodoi_text} +
  • The command above does not include parameters contained in any configs or profiles that may have been used. Ensure the config file is also uploaded with your publication!
  • +
  • You should also cite all software used within this run. Check the "Software Versions" of this report to get version information.
  • +
+
diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml deleted file mode 100644 index 5ec55b92..00000000 --- a/assets/multiqc_config.yaml +++ /dev/null @@ -1,9 +0,0 @@ -report_comment: > - This report has been generated by the nf-core/nascent - analysis pipeline. For information about how to interpret these results, please see the - documentation. -report_section_order: - nf-core/nascent-software-versions: - order: -1000 - -export_plots: true diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml new file mode 100644 index 00000000..9692109c --- /dev/null +++ b/assets/multiqc_config.yml @@ -0,0 +1,38 @@ +report_comment: > + This report has been generated by the nf-core/nascent + analysis pipeline. For information about how to interpret these results, please see the + documentation. +report_section_order: + "nf-core-nascent-methods-description": + order: -1000 + software_versions: + order: -1001 + "nf-core-nascent-summary": + order: -1002 + +run_modules: + - custom_content + - fastqc + - fastp + - samtools + - preseq + - rseqc + - featureCounts + - homer + +module_order: + - fastqc: + name: "FastQC (raw)" + info: "This section of the report shows FastQC results before adapter trimming." + path_filters: + - "./fastqc/*.zip" + +custom_data: + grohmm_plot: + section_name: "groHMM TD Plot" +sp: + my_custom_content_image: + fn: "./grohmm/*.tdplot.jpg" +ignore_images: false + +export_plots: true diff --git a/assets/nf-core-nascent_logo_light.png b/assets/nf-core-nascent_logo_light.png new file mode 100644 index 00000000..b90cb011 Binary files /dev/null and b/assets/nf-core-nascent_logo_light.png differ diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv new file mode 100644 index 00000000..684096ed --- /dev/null +++ b/assets/samplesheet.csv @@ -0,0 +1,7 @@ +sample,fastq_1,fastq_2 +cd4_REP1,https://raw.githubusercontent.com/nf-core/test-datasets/nascent/testdata/SRX882903_T1.fastq.gz, +cd4_REP2,https://raw.githubusercontent.com/nf-core/test-datasets/nascent/testdata/SRX882903_T2.fastq.gz, +cd4_REP3,https://raw.githubusercontent.com/nf-core/test-datasets/nascent/testdata/SRX882903_T3.fastq.gz, +cd4_REP4,https://raw.githubusercontent.com/nf-core/test-datasets/nascent/testdata/SRX882903_T4.fastq.gz, +jurkat_REP1,https://raw.githubusercontent.com/nf-core/test-datasets/nascent/testdata/SRX882904_T1.fastq.gz, +jurkat_REP2,https://raw.githubusercontent.com/nf-core/test-datasets/nascent/testdata/SRX882904_T2.fastq.gz, diff --git a/assets/schema_input.json b/assets/schema_input.json new file mode 100644 index 00000000..762ac215 --- /dev/null +++ b/assets/schema_input.json @@ -0,0 +1,36 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/nf-core/nascent/master/assets/schema_input.json", + "title": "nf-core/nascent pipeline - params.input schema", + "description": "Schema for the file provided with params.input", + "type": "array", + "items": { + "type": "object", + "properties": { + "sample": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Sample name must be provided and cannot contain spaces" + }, + "fastq_1": { + "type": "string", + "pattern": "^\\S+\\.f(ast)?q\\.gz$", + "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + }, + "fastq_2": { + "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", + "anyOf": [ + { + "type": "string", + "pattern": "^\\S+\\.f(ast)?q\\.gz$" + }, + { + "type": "string", + "maxLength": 0 + } + ] + } + }, + "required": ["sample", "fastq_1"] + } +} diff --git a/assets/sendmail_template.txt b/assets/sendmail_template.txt index 2d671220..164867c9 100644 --- a/assets/sendmail_template.txt +++ b/assets/sendmail_template.txt @@ -8,6 +8,23 @@ Content-Type: text/html; charset=utf-8 $email_html +--nfcoremimeboundary +Content-Type: image/png;name="nf-core-nascent_logo.png" +Content-Transfer-Encoding: base64 +Content-ID: +Content-Disposition: inline; filename="nf-core-nascent_logo_light.png" + +<% out << new File("$projectDir/assets/nf-core-nascent_logo_light.png"). + bytes. + encodeBase64(). + toString(). + tokenize( '\n' )*. + toList()*. + collate( 76 )*. + collect { it.join() }. + flatten(). + join( '\n' ) %> + <% if (mqcFile){ def mqcFileObj = new File("$mqcFile") @@ -20,15 +37,15 @@ Content-ID: Content-Disposition: attachment; filename=\"${mqcFileObj.getName()}\" ${mqcFileObj. - bytes. - encodeBase64(). - toString(). - tokenize( '\n' )*. - toList()*. - collate( 76 )*. - collect { it.join() }. - flatten(). - join( '\n' )} + bytes. + encodeBase64(). + toString(). + tokenize( '\n' )*. + toList()*. + collate( 76 )*. + collect { it.join() }. + flatten(). + join( '\n' )} """ }} %> diff --git a/assets/tuningparamstotest.csv b/assets/tuningparamstotest.csv new file mode 100644 index 00000000..1de8b4ac --- /dev/null +++ b/assets/tuningparamstotest.csv @@ -0,0 +1,64 @@ +"LtProbB","UTS" +-100,5 +-100,10 +-100,15 +-100,20 +-100,25 +-100,30 +-100,35 +-100,40 +-100,45 +-150,5 +-150,10 +-150,15 +-150,20 +-150,25 +-150,30 +-150,35 +-150,40 +-150,45 +-200,5 +-200,10 +-200,15 +-200,20 +-200,25 +-200,30 +-200,35 +-200,40 +-200,45 +-250,5 +-250,10 +-250,15 +-250,20 +-250,25 +-250,30 +-250,35 +-250,40 +-250,45 +-300,5 +-300,10 +-300,15 +-300,20 +-300,25 +-300,30 +-300,35 +-300,40 +-300,45 +-350,5 +-350,10 +-350,15 +-350,20 +-350,25 +-350,30 +-350,35 +-350,40 +-350,45 +-400,5 +-400,10 +-400,15 +-400,20 +-400,25 +-400,30 +-400,35 +-400,40 +-400,45 diff --git a/bin/__pycache__/scrape_software_versions.cpython-37.pyc b/bin/__pycache__/scrape_software_versions.cpython-37.pyc deleted file mode 100644 index b03b0d76..00000000 Binary files a/bin/__pycache__/scrape_software_versions.cpython-37.pyc and /dev/null differ diff --git a/bin/bedGraphToBigWig b/bin/bedGraphToBigWig deleted file mode 100755 index 47e3fbd0..00000000 Binary files a/bin/bedGraphToBigWig and /dev/null differ diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py new file mode 100755 index 00000000..399a8a84 --- /dev/null +++ b/bin/check_samplesheet.py @@ -0,0 +1,263 @@ +#!/usr/bin/env python + + +"""Provide a command line tool to validate and transform tabular samplesheets.""" + + +import argparse +import csv +import logging +import sys +from collections import Counter +from pathlib import Path + +logger = logging.getLogger() + + +class RowChecker: + """ + Define a service that can validate and transform each given row. + + Attributes: + modified (list): A list of dicts, where each dict corresponds to a previously + validated and transformed row. The order of rows is maintained. + + """ + + VALID_FORMATS = ( + ".fq.gz", + ".fastq.gz", + ) + + def __init__( + self, + sample_col="sample", + first_col="fastq_1", + second_col="fastq_2", + single_col="single_end", + **kwargs, + ): + """ + Initialize the row checker with the expected column names. + + Args: + sample_col (str): The name of the column that contains the sample name + (default "sample"). + first_col (str): The name of the column that contains the first (or only) + FASTQ file path (default "fastq_1"). + second_col (str): The name of the column that contains the second (if any) + FASTQ file path (default "fastq_2"). + single_col (str): The name of the new column that will be inserted and + records whether the sample contains single- or paired-end sequencing + reads (default "single_end"). + + """ + super().__init__(**kwargs) + self._sample_col = sample_col + self._first_col = first_col + self._second_col = second_col + self._single_col = single_col + self._seen = set() + self.modified = [] + + def validate_and_transform(self, row): + """ + Perform all validations on the given row and insert the read pairing status. + + Args: + row (dict): A mapping from column headers (keys) to elements of that row + (values). + + """ + self._validate_sample(row) + self._validate_first(row) + self._validate_second(row) + self._validate_pair(row) + self._seen.add((row[self._sample_col], row[self._first_col])) + self.modified.append(row) + + def _validate_sample(self, row): + """Assert that the sample name exists and convert spaces to underscores.""" + if len(row[self._sample_col]) <= 0: + raise AssertionError("Sample input is required.") + # Sanitize samples slightly. + row[self._sample_col] = row[self._sample_col].replace(" ", "_") + + def _validate_first(self, row): + """Assert that the first FASTQ entry is non-empty and has the right format.""" + if len(row[self._first_col]) <= 0: + raise AssertionError("At least the first FASTQ file is required.") + self._validate_fastq_format(row[self._first_col]) + + def _validate_second(self, row): + """Assert that the second FASTQ entry has the right format if it exists.""" + if len(row[self._second_col]) > 0: + self._validate_fastq_format(row[self._second_col]) + + def _validate_pair(self, row): + """Assert that read pairs have the same file extension. Report pair status.""" + if row[self._first_col] and row[self._second_col]: + row[self._single_col] = False + first_col_suffix = Path(row[self._first_col]).suffixes[-2:] + second_col_suffix = Path(row[self._second_col]).suffixes[-2:] + if first_col_suffix != second_col_suffix: + raise AssertionError("FASTQ pairs must have the same file extensions.") + else: + row[self._single_col] = True + + def _validate_fastq_format(self, filename): + """Assert that a given filename has one of the expected FASTQ extensions.""" + if not any(filename.endswith(extension) for extension in self.VALID_FORMATS): + raise AssertionError( + f"The FASTQ file has an unrecognized extension: {filename}\n" + f"It should be one of: {', '.join(self.VALID_FORMATS)}" + ) + + def validate_unique_samples(self): + """ + Assert that the combination of sample name and FASTQ filename is unique. + + In addition to the validation, also rename all samples to have a suffix of _T{n}, where n is the + number of times the same sample exist, but with different FASTQ files, e.g., multiple runs per experiment. + + """ + if len(self._seen) != len(self.modified): + raise AssertionError("The pair of sample name and FASTQ must be unique.") + seen = Counter() + for row in self.modified: + sample = row[self._sample_col] + seen[sample] += 1 + row[self._sample_col] = f"{sample}_T{seen[sample]}" + + +def read_head(handle, num_lines=10): + """Read the specified number of lines from the current position in the file.""" + lines = [] + for idx, line in enumerate(handle): + if idx == num_lines: + break + lines.append(line) + return "".join(lines) + + +def sniff_format(handle): + """ + Detect the tabular format. + + Args: + handle (text file): A handle to a `text file`_ object. The read position is + expected to be at the beginning (index 0). + + Returns: + csv.Dialect: The detected tabular format. + + .. _text file: + https://docs.python.org/3/glossary.html#term-text-file + + """ + peek = read_head(handle) + handle.seek(0) + sniffer = csv.Sniffer() + # FIXME https://github.com/nf-core/tools/issues/1539 + # if not sniffer.has_header(peek): + # logger.critical(f"The given sample sheet does not appear to contain a header.") + # sys.exit(1) + dialect = sniffer.sniff(peek) + return dialect + + +def check_samplesheet(file_in, file_out): + """ + Check that the tabular samplesheet has the structure expected by nf-core pipelines. + + Validate the general shape of the table, expected columns, and each row. Also add + an additional column which records whether one or two FASTQ reads were found. + + Args: + file_in (pathlib.Path): The given tabular samplesheet. The format can be either + CSV, TSV, or any other format automatically recognized by ``csv.Sniffer``. + file_out (pathlib.Path): Where the validated and transformed samplesheet should + be created; always in CSV format. + + Example: + This function checks that the samplesheet follows the following structure, + see also the `viral recon samplesheet`_:: + + sample,fastq_1,fastq_2 + SAMPLE_PE,SAMPLE_PE_RUN1_1.fastq.gz,SAMPLE_PE_RUN1_2.fastq.gz + SAMPLE_PE,SAMPLE_PE_RUN2_1.fastq.gz,SAMPLE_PE_RUN2_2.fastq.gz + SAMPLE_SE,SAMPLE_SE_RUN1_1.fastq.gz, + + .. _viral recon samplesheet: + https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv + + """ + required_columns = {"sample", "fastq_1", "fastq_2"} + # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. + with file_in.open(newline="") as in_handle: + reader = csv.DictReader(in_handle, dialect=sniff_format(in_handle)) + # Validate the existence of the expected header columns. + if not required_columns.issubset(reader.fieldnames): + req_cols = ", ".join(required_columns) + logger.critical(f"The sample sheet **must** contain these column headers: {req_cols}.") + sys.exit(1) + # Validate each row. + checker = RowChecker() + for i, row in enumerate(reader): + try: + checker.validate_and_transform(row) + except AssertionError as error: + logger.critical(f"{str(error)} On line {i + 2}.") + sys.exit(1) + checker.validate_unique_samples() + header = list(reader.fieldnames) + header.insert(1, "single_end") + # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. + with file_out.open(mode="w", newline="") as out_handle: + writer = csv.DictWriter(out_handle, header, delimiter=",") + writer.writeheader() + for row in checker.modified: + writer.writerow(row) + + +def parse_args(argv=None): + """Define and immediately parse command line arguments.""" + parser = argparse.ArgumentParser( + description="Validate and transform a tabular samplesheet.", + epilog="Example: python check_samplesheet.py samplesheet.csv samplesheet.valid.csv", + ) + parser.add_argument( + "file_in", + metavar="FILE_IN", + type=Path, + help="Tabular input samplesheet in CSV or TSV format.", + ) + parser.add_argument( + "file_out", + metavar="FILE_OUT", + type=Path, + help="Transformed output samplesheet in CSV format.", + ) + parser.add_argument( + "-l", + "--log-level", + help="The desired log level (default WARNING).", + choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"), + default="WARNING", + ) + return parser.parse_args(argv) + + +def main(argv=None): + """Coordinate argument parsing and program execution.""" + args = parse_args(argv) + logging.basicConfig(level=args.log_level, format="[%(levelname)s] %(message)s") + if not args.file_in.is_file(): + logger.error(f"The given input file {args.file_in} was not found!") + sys.exit(2) + args.file_out.parent.mkdir(parents=True, exist_ok=True) + check_samplesheet(args.file_in, args.file_out) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/bin/extract_fastqc_stats.sh b/bin/extract_fastqc_stats.sh deleted file mode 100755 index 48551fc2..00000000 --- a/bin/extract_fastqc_stats.sh +++ /dev/null @@ -1,55 +0,0 @@ -#!/bin/sh - -# Extract read statistics from fastqc output, providing a stopping point if we -# don't have enough. - -# Set options -set -e -set -o errexit -#set -o pipefail -# Assume only utf-8 -export LC_ALL=C - -usage() -{ - echo "extract_nascent_stats.sh - extract fastqc stats from output zip files" - echo "Example:" - echo " ./extract_nascent_stats.sh --srr=SRR2084556" - echo "Usage:" - echo " -h/--help -- Display this help message." - echo " --srr -- SRR / FASTA to parse" - exit 0 -} - -while [ "$1" != "" ]; do - PARAM=$(echo "$1" | awk -F= '{print $1}') - VALUE=$(echo "$1" | awk -F= '{print $2}') - case $PARAM in - -h | --help) - usage - exit - ;; - --srr) - SRR=$VALUE - ;; - *) - echo "ERROR: unknown parameter \"$PARAM\"" - usage - exit 1 - ;; - esac - shift -done -echo Extracting fastqc statistics for "$SRR" - -GC=$(unzip -c "$(find . -name *_fastqc.zip)" "$SRR"_fastqc/fastqc_data.txt \ - | grep "%GC" | grep -o "[0-9]*") -SEQ=$(unzip -c "$(find . -name *_fastqc.zip)" "$SRR"_fastqc/fastqc_data.txt | \ - grep "Total Sequences" | \ - grep -o "[0-9]*") -DEDUP=$(unzip -c "$(find . -name *_fastqc.zip)" "$SRR"_fastqc/fastqc_data.txt | \ - grep "#Total Deduplicated Percentage" | \ - grep -o "[0-9,.]*") - -echo -e "SRR\t%GC\tTotal_Sequences\t%Total_Deduplicated" -echo -e "$SRR""$(printf "\\t")""$GC""$(printf "\\t")""$SEQ""$(printf "\\t")""$DEDUP" diff --git a/bin/fasta2gtf.py b/bin/fasta2gtf.py new file mode 100755 index 00000000..7a872e7a --- /dev/null +++ b/bin/fasta2gtf.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python +""" +Read a custom fasta file and create a custom GTF containing each entry +""" +import argparse +import logging +from itertools import groupby + +# Create a logger +logging.basicConfig(format="%(name)s - %(asctime)s %(levelname)s: %(message)s") +logger = logging.getLogger(__file__) +logger.setLevel(logging.INFO) + + +def fasta_iter(fasta_name): + """ + modified from Brent Pedersen + Correct Way To Parse A Fasta File In Python + given a fasta file. yield tuples of header, sequence + + Fasta iterator from https://www.biostars.org/p/710/#120760 + """ + with open(fasta_name) as fh: + # ditch the boolean (x[0]) and just keep the header or sequence since + # we know they alternate. + faiter = (x[1] for x in groupby(fh, lambda line: line[0] == ">")) + for header in faiter: + # drop the ">" + headerStr = header.__next__()[1:].strip() + + # join all sequence lines to one. + seq = "".join(s.strip() for s in faiter.__next__()) + + yield (headerStr, seq) + + +def fasta2gtf(fasta, output): + fiter = fasta_iter(fasta) + # GTF output lines + lines = [] + attributes = 'gene_id "{name_sanitized}"; gene_name "{name_sanitized}";transcript_id "{name_sanitized}"; gene_biotype "{name_sanitized}"; gene_type "{name_sanitized}"\n' # noqa: E501 + line_template = "{name_sanitized}\ttransgene\texon\t1\t{length}\t.\t+\t.\t" + attributes + + for ff in fiter: + name, seq = ff + # Use first ID as separated by spaces as the "sequence name" + # (equivalent to "chromosome" in other cases) + seqname = name.split()[0] + # Remove all spaces + name_sanitized = seqname.replace(" ", "_") + length = len(seq) + line = line_template.format(name_sanitized=name_sanitized, length=length) + lines.append(line) + + with open(output, "w") as f: + f.write("".join(lines)) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="""Convert a custom fasta (e.g. transgene) + to a GTF annotation.""" + ) + parser.add_argument("fasta", type=str, help="Custom transgene sequence") + parser.add_argument( + "-o", + "--output", + dest="output", + default="transgenes.gtf", + type=str, + help="gene annotation GTF output", + ) + args = parser.parse_args() + fasta2gtf(args.fasta, args.output) diff --git a/bin/filter_gtf_for_genes_in_genome.py b/bin/filter_gtf_for_genes_in_genome.py new file mode 100755 index 00000000..c7eddee7 --- /dev/null +++ b/bin/filter_gtf_for_genes_in_genome.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python +from __future__ import print_function + +import argparse +import logging +from itertools import groupby + +# Create a logger +logging.basicConfig(format="%(name)s - %(asctime)s %(levelname)s: %(message)s") +logger = logging.getLogger(__file__) +logger.setLevel(logging.INFO) + + +def is_header(line): + return line[0] == ">" + + +def extract_fasta_seq_names(fasta_name): + """ + modified from Brent Pedersen + Correct Way To Parse A Fasta File In Python + given a fasta file. yield tuples of header, sequence + from https://www.biostars.org/p/710/ + """ + # first open the file outside + fh = open(fasta_name) + + # ditch the boolean (x[0]) and just keep the header or sequence since + # we know they alternate. + faiter = (x[1] for x in groupby(fh, is_header)) + + for i, header in enumerate(faiter): + line = next(header) + if is_header(line): + # drop the ">" + headerStr = line[1:].strip().split()[0] + yield headerStr + + +def extract_genes_in_genome(fasta, gtf_in, gtf_out): + seq_names_in_genome = set(extract_fasta_seq_names(fasta)) + logger.info("Extracted chromosome sequence names from : %s" % fasta) + logger.info("All chromosome names: " + ", ".join(sorted(x for x in seq_names_in_genome))) + seq_names_in_gtf = set([]) + + n_total_lines = 0 + n_lines_in_genome = 0 + with open(gtf_out, "w") as f: + with open(gtf_in) as g: + + for line in g.readlines(): + n_total_lines += 1 + seq_name_gtf = line.split("\t")[0] + seq_names_in_gtf.add(seq_name_gtf) + if seq_name_gtf in seq_names_in_genome: + n_lines_in_genome += 1 + f.write(line) + logger.info( + "Extracted %d / %d lines from %s matching sequences in %s" % (n_lines_in_genome, n_total_lines, gtf_in, fasta) + ) + logger.info("All sequence IDs from GTF: " + ", ".join(sorted(x for x in seq_name_gtf))) + + logger.info("Wrote matching lines to %s" % gtf_out) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="""Filter GTF only for features in the genome""") + parser.add_argument("--gtf", type=str, help="GTF file") + parser.add_argument("--fasta", type=str, help="Genome fasta file") + parser.add_argument( + "-o", + "--output", + dest="output", + default="genes_in_genome.gtf", + type=str, + help="GTF features on fasta genome sequences", + ) + + args = parser.parse_args() + extract_genes_in_genome(args.fasta, args.gtf, args.output) diff --git a/bin/gtf2bed b/bin/gtf2bed new file mode 100755 index 00000000..66d52306 --- /dev/null +++ b/bin/gtf2bed @@ -0,0 +1,123 @@ +#!/usr/bin/env perl + +# Copyright (c) 2011 Erik Aronesty (erik@q32.com) +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ALSO, IT WOULD BE NICE IF YOU LET ME KNOW YOU USED IT. + +use Getopt::Long; + +my $extended; +GetOptions("x"=>\$extended); + +$in = shift @ARGV; + +my $in_cmd =($in =~ /\.gz$/ ? "gunzip -c $in|" : $in =~ /\.zip$/ ? "unzip -p $in|" : "$in") || die "Can't open $in: $!\n"; +open IN, $in_cmd; + +while () { + $gff = 2 if /^##gff-version 2/; + $gff = 3 if /^##gff-version 3/; + next if /^#/ && $gff; + + s/\s+$//; + # 0-chr 1-src 2-feat 3-beg 4-end 5-scor 6-dir 7-fram 8-attr + my @f = split /\t/; + if ($gff) { + # most ver 2's stick gene names in the id field + ($id) = $f[8]=~ /\bID="([^"]+)"/; + # most ver 3's stick unquoted names in the name field + ($id) = $f[8]=~ /\bName=([^";]+)/ if !$id && $gff == 3; + } else { + ($id) = $f[8]=~ /transcript_id "([^"]+)"/; + } + + next unless $id && $f[0]; + + if ($f[2] eq 'exon') { + die "no position at exon on line $." if ! $f[3]; + # gff3 puts :\d in exons sometimes + $id =~ s/:\d+$// if $gff == 3; + push @{$exons{$id}}, \@f; + # save lowest start + $trans{$id} = \@f if !$trans{$id}; + } elsif ($f[2] eq 'start_codon') { + #optional, output codon start/stop as "thick" region in bed + $sc{$id}->[0] = $f[3]; + } elsif ($f[2] eq 'stop_codon') { + $sc{$id}->[1] = $f[4]; + } elsif ($f[2] eq 'miRNA' ) { + $trans{$id} = \@f if !$trans{$id}; + push @{$exons{$id}}, \@f; + } +} + +for $id ( + # sort by chr then pos + sort { + $trans{$a}->[0] eq $trans{$b}->[0] ? + $trans{$a}->[3] <=> $trans{$b}->[3] : + $trans{$a}->[0] cmp $trans{$b}->[0] + } (keys(%trans)) ) { + my ($chr, undef, undef, undef, undef, undef, $dir, undef, $attr, undef, $cds, $cde) = @{$trans{$id}}; + my ($cds, $cde); + ($cds, $cde) = @{$sc{$id}} if $sc{$id}; + + # sort by pos + my @ex = sort { + $a->[3] <=> $b->[3] + } @{$exons{$id}}; + + my $beg = $ex[0][3]; + my $end = $ex[-1][4]; + + if ($dir eq '-') { + # swap + $tmp=$cds; + $cds=$cde; + $cde=$tmp; + $cds -= 2 if $cds; + $cde += 2 if $cde; + } + + # not specified, just use exons + $cds = $beg if !$cds; + $cde = $end if !$cde; + + # adjust start for bed + --$beg; --$cds; + + my $exn = @ex; # exon count + my $exst = join ",", map {$_->[3]-$beg-1} @ex; # exon start + my $exsz = join ",", map {$_->[4]-$_->[3]+1} @ex; # exon size + + my $gene_id; + my $extend = ""; + if ($extended) { + ($gene_id) = $attr =~ /gene_name "([^"]+)"/; + ($gene_id) = $attr =~ /gene_id "([^"]+)"/ unless $gene_id; + $extend="\t$gene_id"; + } + # added an extra comma to make it look exactly like ucsc's beds + print "$chr\t$beg\t$end\t$id\t0\t$dir\t$cds\t$cde\t0\t$exn\t$exsz,\t$exst,$extend\n"; +} + + +close IN; diff --git a/bin/markdown_to_html.r b/bin/markdown_to_html.r deleted file mode 100755 index abe13350..00000000 --- a/bin/markdown_to_html.r +++ /dev/null @@ -1,51 +0,0 @@ -#!/usr/bin/env Rscript - -# Command line argument processing -args = commandArgs(trailingOnly=TRUE) -if (length(args) < 2) { - stop("Usage: markdown_to_html.r ", call.=FALSE) -} -markdown_fn <- args[1] -output_fn <- args[2] - -# Load / install packages -if (!require("markdown")) { - install.packages("markdown", dependencies=TRUE, repos='http://cloud.r-project.org/') - library("markdown") -} - -base_css_fn <- getOption("markdown.HTML.stylesheet") -base_css <- readChar(base_css_fn, file.info(base_css_fn)$size) -custom_css <- paste(base_css, " -body { - padding: 3em; - margin-right: 350px; - max-width: 100%; -} -#toc { - position: fixed; - right: 20px; - width: 300px; - padding-top: 20px; - overflow: scroll; - height: calc(100% - 3em - 20px); -} -#toc_header { - font-size: 1.8em; - font-weight: bold; -} -#toc > ul { - padding-left: 0; - list-style-type: none; -} -#toc > ul ul { padding-left: 20px; } -#toc > ul > li > a { display: none; } -img { max-width: 800px; } -") - -markdownToHTML( - file = markdown_fn, - output = output_fn, - stylesheet = custom_css, - options = c('toc', 'base64_images', 'highlight_code') -) diff --git a/bin/parameter_tuning.R b/bin/parameter_tuning.R new file mode 100755 index 00000000..97bb9000 --- /dev/null +++ b/bin/parameter_tuning.R @@ -0,0 +1,158 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library(argparse)) +suppressPackageStartupMessages(library(GenomicFeatures)) +suppressPackageStartupMessages(library(GenomicAlignments)) +suppressPackageStartupMessages(library(groHMM)) + +parser <- ArgumentParser(description = "Run groHMM on some bam files") + +parser$add_argument( + "-i", + "--bam_files", + type = "character", + nargs = "+", + metavar = "path", + help = "GRO SEQ data in bam files.", + required = TRUE +) +parser$add_argument( + "-t", + "--tuning_file", + type = "character", + default = NULL, + metavar = "path", + help = "File with tuning parameters and error rates." +) +parser$add_argument( + "-o", + "--outdir", + type = "character", + default = "./", + metavar = "path", + help = "Output directory." +) +parser$add_argument( + "-l", + "--ltprobb", + type = "integer", + default = -200, + metavar = "integer", + help = cat( + "Log-transformed transition probability of switching from transcribed + state to non-transcribed state" + ) +) +parser$add_argument( + "-u", + "--uts", + type = "integer", + default = 5, + metavar = "integer", + help = cat( + "Variance of the emission probability for reads in the + non-transcribed state, respectively." + ) +) +parser$add_argument( + "-p", + "--outprefix", + type = "character", + default = "grohmm", + metavar = "string", + help = "Output prefix." +) +parser$add_argument( + "-g", + "--gtf", + type = "character", + default = NULL, + metavar = "string", + help = "GTF File to create TxDb", + required = TRUE +) +parser$add_argument( + "-c", + "--cores", + type = "integer", + default = 1, + metavar = "integer", + help = "Number of cores." +) + +args <- parser$parse_args() + +setwd(args$outdir) + +if (is.null(args$bam_files)) { + print_help(args) + stop("Please provide a bam file", call. = FALSE) +} +if (is.null(args$tuning_file)) { + print_help(args) + stop("Please provide a tuning file", call. = FALSE) +} + + +# Read in bam file. +if (file.exists(args$outdir) == FALSE) { + dir.create(args$outdir, recursive = TRUE) +} +setwd(args$outdir) + +# CHANGE BASED ON PAIRED OR SINGLE END +alignments <- c() +for (bam in args$bam_files) { + alignments <- append( + alignments, + as(readGAlignments(bam), "GRanges") + ) + alignments <- keepStandardChromosomes(alignments, pruning.mode = "coarse") +} + +print("Input transcript annotations") +kg_db <- makeTxDbFromGFF(args$gtf) +kg_tx <- transcripts(kg_db, columns = c("gene_id", "tx_id", "tx_name")) +print("Collapse annotations in preparation for overlap") +kg_consensus <- makeConsensusAnnotations( + kg_tx, + keytype = "gene_id", + mc.cores = args$cores +) +print("Finished consensus annotations") + +# TUNING +tune <- read.csv(args$tuning_file) + +evals <- mclapply(seq_len(nrow(tune)), function(x) { + hmm <- detectTranscripts( + reads = alignments, + LtProbB = tune$LtProbB[x], UTS = tune$UTS[x] + ) + e <- evaluateHMMInAnnotations(hmm$transcripts, kg_consensus) + e$eval +}, mc.cores = args$cores, mc.silent = TRUE) + +tune <- cbind(tune, do.call(rbind, evals)) +write.csv(tune, file = paste0(args$outprefix, ".tuning.csv")) + + +# CITE PACKAGES USED +citation("groHMM") +citation("GenomicFeatures") +citation("GenomicAlignments") +citation("AnnotationDbi") + +## R SESSION INFO ## +################################################ +################################################ + +r_log_file <- "R_sessionInfo.log" +if (file.exists(r_log_file) == FALSE) { + sink(r_log_file) + a <- sessionInfo() + print(a) + sink() +} + +################################################################################ diff --git a/bin/rcc.py b/bin/rcc.py deleted file mode 100644 index c8e861d9..00000000 --- a/bin/rcc.py +++ /dev/null @@ -1,36 +0,0 @@ -from __future__ import division -import sys -import os - -def calmp(num_of_reads, total_reads): - mp = float(num_of_reads)/(int(total_reads)/1000000) - return mp - - -def gettotalreadsfromflagstat(flagstatfile): - f = open(flagstatfile) - lines = f.readlines() - mapped_reads = lines[0] -# mapped_reads = int(x[0]) -# print "mapped_reads", mapped_reads - return mapped_reads - - -def main(Bedgraphfile, flagstatfile, readcountcorrBedgraphfile): - total_reads = gettotalreadsfromflagstat(flagstatfile) - wf = open(readcountcorrBedgraphfile, "w") - f = open(Bedgraphfile) - for line in f: - line = line.strip("\n") - line = line.split("\t") - num_of_reads = line[-1] - mp = calmp(num_of_reads, total_reads) - line[-1] = mp - wf.write("\t".join(map(str,line))+"\n") - wf.close() - f.close() - - -if __name__=="__main__": - Bedgraphfile, flagstatfile, readcountcorrBedgraphfile = sys.argv[1],sys.argv[2], sys.argv[3] - main(Bedgraphfile, flagstatfile, readcountcorrBedgraphfile) diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py deleted file mode 100755 index 54e6c2e8..00000000 --- a/bin/scrape_software_versions.py +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/env python -from __future__ import print_function -from collections import OrderedDict -import re - -regexes = { - 'nf-core/nascent': ['v_pipeline.txt', r"(\S+)"], - 'Nextflow': ['v_nextflow.txt', r"(\S+)"], - 'FastQC': ['v_fastqc.txt', r"FastQC v(\S+)"], - 'MultiQC': ['v_multiqc.txt', r"multiqc, version (\S+)"], -} -results = OrderedDict() -results['nf-core/nascent'] = 'N/A' -results['Nextflow'] = 'N/A' -results['FastQC'] = 'N/A' -results['MultiQC'] = 'N/A' - -# Search each file using its regex -for k, v in regexes.items(): - with open(v[0]) as x: - versions = x.read() - match = re.search(v[1], versions) - if match: - results[k] = "v{}".format(match.group(1)) - -# Remove software set to false in results -for k in results: - if not results[k]: - del(results[k]) - -# Dump to YAML -print (''' -id: 'software_versions' -section_name: 'nf-core/nascent Software Versions' -section_href: 'https://github.com/nf-core/nascent' -plot_type: 'html' -description: 'are collected at run time from the software output.' -data: | -
-''') -for k,v in results.items(): - print("
{}
{}
".format(k,v)) -print ("
") - -# Write out regexes as csv file: -with open('software_versions.csv', 'w') as f: - for k,v in results.items(): - f.write("{}\t{}\n".format(k,v)) diff --git a/bin/transcriptcalling_grohmm.R b/bin/transcriptcalling_grohmm.R new file mode 100755 index 00000000..bdd4c7e7 --- /dev/null +++ b/bin/transcriptcalling_grohmm.R @@ -0,0 +1,194 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library(argparse)) +suppressPackageStartupMessages(library(GenomicFeatures)) +suppressPackageStartupMessages(library(GenomicAlignments)) +suppressPackageStartupMessages(library(groHMM)) + +parser <- ArgumentParser(description = "Run groHMM on some bam files") + +parser$add_argument( + "-i", + "--bam_files", + type = "character", + nargs = "+", + metavar = "path", + help = "GRO SEQ data in bam files.", + required = TRUE +) +parser$add_argument( + "-t", + "--tuning_file", + type = "character", + default = NULL, + metavar = "path", + help = "File with tuning parameters and error rates." +) +parser$add_argument( + "-o", + "--outdir", + type = "character", + default = "./", + metavar = "path", + help = "Output directory." +) +parser$add_argument( + "-l", + "--ltprobb", + type = "integer", + default = -200, + metavar = "integer", + help = cat( + "Log-transformed transition probability of switching from transcribed + state to non-transcribed state" + ) +) +parser$add_argument( + "-u", + "--uts", + type = "integer", + default = 5, + metavar = "integer", + help = cat( + "Variance of the emission probability for reads in the + non-transcribed state, respectively." + ) +) +parser$add_argument( + "-p", + "--outprefix", + type = "character", + default = "grohmm", + metavar = "string", + help = "Output prefix." +) +parser$add_argument( + "-g", + "--gtf", + type = "character", + default = NULL, + metavar = "string", + help = "GTF File to create TxDb", + required = TRUE +) +parser$add_argument( + "-c", + "--cores", + type = "integer", + default = 1, + metavar = "integer", + help = "Number of cores." +) + +args <- parser$parse_args() + +setwd(args$outdir) + +# Load alignment files +alignments <- c() +for (bam in args$bam_files) { + alignments <- append( + alignments, + as(readGAlignments(bam), "GRanges") + ) + alignments <- keepStandardChromosomes(alignments, pruning.mode = "coarse") +} + +# Call annotations > DEFAULT VALUES ASSIGNED +if (is.null(args$tuning_file)) { + hmm_result <- detectTranscripts( + alignments, + LtProbB = args$ltprobb, + UTS = args$uts, + threshold = 1 + ) # Uses either inputted or default values +} else { + tune <- read.csv(args$tuning_file) + # Minimum error + uts <- tune[which.min(tune$errorRate), "UTS"] + lt_probb <- tune[which.min(tune$errorRate), "LtProbB"] + hmm_result <- detectTranscripts( + alignments, + LtProbB = lt_probb, + UTS = uts, + threshold = 1 + ) +} + +tx_hmm <- hmm_result$transcripts +write.table( + tx_hmm, + file = paste(args$outprefix, + ".transcripts.txt", + sep = "" + ) +) + +print("Input transcript annotations") +kg_db <- makeTxDbFromGFF(args$gtf) +kg_tx <- transcripts(kg_db, columns = c("gene_id", "tx_id", "tx_name")) +print("Collapse annotations in preparation for overlap") +kg_consensus <- makeConsensusAnnotations( + kg_tx, + keytype = "gene_id", + mc.cores = args$cores +) +print("Finished consensus annotations") + +# Evaluate HMM Annotations +e <- evaluateHMMInAnnotations(tx_hmm, kg_consensus) +# Save as txt file +capture.output(e$eval, file = paste0(args$outprefix, ".eval.txt")) + +# TUNING IN A DIFFERENT SCRIPT + +# repairing with annotations +get_expressed_annotations <- function(features, reads) { + f_limit <- limitToXkb(features) + count <- countOverlaps(f_limit, reads) + features <- features[count != 0, ] + return(features[(quantile(width(features), .05) < width(features)) & + (width(features) < quantile(width(features), .95)), ]) +} +con_expressed <- get_expressed_annotations( + features = kg_consensus, + reads = alignments +) +b_plus <- breakTranscriptsOnGenes(tx_hmm, kg_consensus, strand = "+") +b_minus <- breakTranscriptsOnGenes(tx_hmm, kg_consensus, strand = "-") +tx_broken <- c(b_plus, b_minus) +tx_final <- combineTranscripts(tx_broken, kg_consensus) +td_final <- getTxDensity(tx_final, con_expressed, mc.cores = args$cores) +export( + tx_final, + con = paste(args$outprefix, ".final.transcripts.bed", sep = "") +) +capture.output(td_final, file = paste0(args$outprefix, ".tdFinal.txt")) +# Output plot +jpeg(file = paste0(args$outprefix, ".tdplot_mqc.jpg")) +# 2. Create the plot +td_final <- getTxDensity(tx_final, con_expressed, mc.cores = args$cores) + +# 3. Close the file +dev.off() + + +# CITE PACKAGES USED +citation("groHMM") +citation("GenomicFeatures") +citation("GenomicAlignments") +citation("AnnotationDbi") + +## R SESSION INFO ## +################################################ +################################################ + +r_log_file <- "R_sessionInfo.log" +if (file.exists(r_log_file) == FALSE) { + sink(r_log_file) + a <- sessionInfo() + print(a) + sink() +} + +################################################################################ diff --git a/conf/awsbatch.config b/conf/awsbatch.config deleted file mode 100644 index 14af5866..00000000 --- a/conf/awsbatch.config +++ /dev/null @@ -1,18 +0,0 @@ -/* - * ------------------------------------------------- - * Nextflow config file for running on AWS batch - * ------------------------------------------------- - * Base config needed for running with -profile awsbatch - */ -params { - config_profile_name = 'AWSBATCH' - config_profile_description = 'AWSBATCH Cloud Profile' - config_profile_contact = 'Alexander Peltzer (@apeltzer)' - config_profile_url = 'https://aws.amazon.com/de/batch/' -} - -aws.region = params.awsregion -process.executor = 'awsbatch' -process.queue = params.awsqueue -executor.awscli = '/home/ec2-user/miniconda/bin/aws' -params.tracedir = './' diff --git a/conf/base.config b/conf/base.config index 9a5ff9da..be0ccec8 100644 --- a/conf/base.config +++ b/conf/base.config @@ -1,92 +1,67 @@ /* - * ------------------------------------------------- - * nf-core/nascent Nextflow base config file - * ------------------------------------------------- - * A 'blank slate' config file, appropriate for general - * use on most high performace compute environments. - * Assumes that all software is installed and available - * on the PATH. Runs in `local` mode - all jobs will be - * run on the logged in environment. - */ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + nf-core/nascent Nextflow base config file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + A 'blank slate' config file, appropriate for general use on most high performance + compute environments. Assumes that all software is installed and available on + the PATH. Runs in `local` mode - all jobs will be run on the logged in environment. +---------------------------------------------------------------------------------------- +*/ process { - cpus = { check_max( 2 * task.attempt, 'cpus' ) } - memory = { check_max( 8.GB * task.attempt, 'memory' ) } - time = { check_max( 2.h * task.attempt, 'time' ) } + cpus = { check_max( 1 * task.attempt, 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } - errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } - maxRetries = 1 - maxErrors = '-1' + errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } + maxRetries = 1 + maxErrors = '-1' - // Process-specific resource requirements - withName:get_software_versions { - errorStrategy = { task.exitStatus in [0,1,127] ? 'ignore' : 'finish' } - } - withName:sra_dump { - cpus = { params.threadfqdump ? check_max( 8, 'cpus' ) : 1 } - } - withName:fastqc { - errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' } - memory = { check_max( 8.GB * task.attempt, 'memory' ) } - } - withName:gzip_fastq { - memory = { check_max( 4.GB * task.attempt, 'memory' ) } - } - withName:bbduk { - cpus = { check_max( 16, 'cpus' ) } - memory = { check_max( 20.GB * task.attempt, 'memory' ) } - } - withName:fastqc_trimmed { - memory = { check_max( 4.GB * task.attempt, 'memory' ) } - } - withName:gzip_trimmed { - memory = { check_max( 4.GB * task.attempt, 'memory' ) } - } - withName:hisat2 { - cpus = { check_max( 32, 'cpus' ) } - memory = { check_max( 100.GB * task.attempt, 'memory' ) } - time = { check_max( 2.h * task.attempt, 'time' ) } - } - withName:samtools { - cpus = { check_max( 16, 'cpus' ) } - memory = { check_max( 100.GB * task.attempt, 'memory' ) } - } - withName:preseq { - memory = { check_max( 20.GB * task.attempt, 'memory' ) } - time = { check_max( 8.h * task.attempt, 'time' ) } - } - withName:rseqc { - memory = { check_max( 40.GB * task.attempt, 'memory' ) } - time = { check_max( 8.h * task.attempt, 'time' ) } - } - withName:pileup { - memory = { check_max( 50.GB * task.attempt, 'memory' ) } - } - withName:bedgraphs { - memory = { check_max( 80.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } - } - withName:dreg_prep { - memory = { check_max( 100.GB * task.attempt, 'memory' ) } - } - withName:normalized_bigwigs { - memory = { check_max( 30.GB * task.attempt, 'memory' ) } - } - withName:igvtools { - memory = { check_max( 200.GB * task.attempt, 'memory' ) } - time = { check_max( 1.h * task.attempt, 'time' ) } - } - withName: multiqc { - errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' } - } -} - -params { - // Defaults only, expecting to be overwritten - max_memory = 128.GB - max_cpus = 16 - max_time = 240.h - igenomes_base = 's3://ngi-igenomes/igenomes/' - saveReference = true + // Process-specific resource requirements + // NOTE - Please try and re-use the labels below as much as possible. + // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. + // If possible, it would be nice to keep the same label naming convention when + // adding in your local modules too. + // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors + withLabel:process_single { + cpus = { check_max( 1 , 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + withLabel:process_low { + cpus = { check_max( 2 * task.attempt, 'cpus' ) } + memory = { check_max( 12.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + withLabel:process_medium { + cpus = { check_max( 6 * task.attempt, 'cpus' ) } + memory = { check_max( 36.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } + } + withLabel:process_high { + cpus = { check_max( 12 * task.attempt, 'cpus' ) } + memory = { check_max( 72.GB * task.attempt, 'memory' ) } + time = { check_max( 16.h * task.attempt, 'time' ) } + } + withLabel:process_long { + time = { check_max( 20.h * task.attempt, 'time' ) } + } + withLabel:process_high_memory { + memory = { check_max( 200.GB * task.attempt, 'memory' ) } + } + withLabel:error_ignore { + errorStrategy = 'ignore' + } + withLabel:error_retry { + errorStrategy = 'retry' + maxRetries = 2 + } + withName: 'FASTP'{ + cpus = { check_max( 12 * task.attempt, 'cpus' ) } + memory = { check_max( 4.GB * task.attempt, 'memory' ) } + } + withName:CUSTOM_DUMPSOFTWAREVERSIONS { + cache = false + } } diff --git a/conf/igenomes.config b/conf/igenomes.config index 08154994..3f114377 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -1,146 +1,440 @@ /* - * ------------------------------------------------- - * Nextflow config file for iGenomes paths - * ------------------------------------------------- - * Defines reference genomes, using iGenome paths - * Can be used by any config that customises the base - * path using $params.igenomes_base / --igenomes_base - */ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for iGenomes paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines reference genomes using iGenome paths. + Can be used by any config that customises the base path using: + $params.igenomes_base / --igenomes_base +---------------------------------------------------------------------------------------- +*/ params { - // illumina iGenomes reference file paths - genomes { - 'GRCh37' { - bed12 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/STARIndex/" + // illumina iGenomes reference file paths + genomes { + 'GRCh37' { + fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/README.txt" + mito_name = "MT" + macs_gsize = "2.7e9" + blacklist = "${projectDir}/assets/blacklists/GRCh37-blacklist.bed" + } + 'GRCh38' { + fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "2.7e9" + blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" + } + 'CHM13' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAIndex/" + bwamem2 = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAmem2Index/" + gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/CHM13/Annotation/Genes/genes.gtf" + gff = "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/914/755/GCF_009914755.1_T2T-CHM13v2.0/GCF_009914755.1_T2T-CHM13v2.0_genomic.gff.gz" + mito_name = "chrM" + } + 'GRCm38' { + fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/README.txt" + mito_name = "MT" + macs_gsize = "1.87e9" + blacklist = "${projectDir}/assets/blacklists/GRCm38-blacklist.bed" + } + 'TAIR10' { + fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/README.txt" + mito_name = "Mt" + } + 'EB2' { + fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/README.txt" + } + 'UMD3.1' { + fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/README.txt" + mito_name = "MT" + } + 'WBcel235' { + fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.bed" + mito_name = "MtDNA" + macs_gsize = "9e7" + } + 'CanFam3.1' { + fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/README.txt" + mito_name = "MT" + } + 'GRCz10' { + fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'BDGP6' { + fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.bed" + mito_name = "M" + macs_gsize = "1.2e8" + } + 'EquCab2' { + fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/README.txt" + mito_name = "MT" + } + 'EB1' { + fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/README.txt" + } + 'Galgal4' { + fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'Gm01' { + fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/README.txt" + } + 'Mmul_1' { + fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/README.txt" + mito_name = "MT" + } + 'IRGSP-1.0' { + fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.bed" + mito_name = "Mt" + } + 'CHIMP2.1.4' { + fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/README.txt" + mito_name = "MT" + } + 'Rnor_5.0' { + fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'Rnor_6.0' { + fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'R64-1-1' { + fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.bed" + mito_name = "MT" + macs_gsize = "1.2e7" + } + 'EF2' { + fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/README.txt" + mito_name = "MT" + macs_gsize = "1.21e7" + } + 'Sbi1' { + fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/README.txt" + } + 'Sscrofa10.2' { + fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/README.txt" + mito_name = "MT" + } + 'AGPv3' { + fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.bed" + mito_name = "Mt" + } + 'hg38' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "2.7e9" + blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" + } + 'hg19' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/README.txt" + mito_name = "chrM" + macs_gsize = "2.7e9" + blacklist = "${projectDir}/assets/blacklists/hg19-blacklist.bed" + } + 'mm10' { + fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/README.txt" + mito_name = "chrM" + macs_gsize = "1.87e9" + blacklist = "${projectDir}/assets/blacklists/mm10-blacklist.bed" + } + 'bosTau8' { + fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.bed" + mito_name = "chrM" + } + 'ce10' { + fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/README.txt" + mito_name = "chrM" + macs_gsize = "9e7" + } + 'canFam3' { + fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/README.txt" + mito_name = "chrM" + } + 'danRer10' { + fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "1.37e9" + } + 'dm6' { + fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "1.2e8" + } + 'equCab2' { + fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/README.txt" + mito_name = "chrM" + } + 'galGal4' { + fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/README.txt" + mito_name = "chrM" + } + 'panTro4' { + fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/README.txt" + mito_name = "chrM" + } + 'rn6' { + fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.bed" + mito_name = "chrM" + } + 'sacCer3' { + fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BismarkIndex/" + readme = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Annotation/README.txt" + mito_name = "chrM" + macs_gsize = "1.2e7" + } + 'susScr3' { + fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/README.txt" + mito_name = "chrM" + } } - 'GRCm38' { - bed12 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/STARIndex/" - } - 'TAIR10' { - bed12 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/STARIndex/" - } - 'EB2' { - bed12 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/STARIndex/" - } - 'UMD3.1' { - bed12 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/STARIndex/" - } - 'WBcel235' { - bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/STARIndex/" - } - 'CanFam3.1' { - bed12 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/STARIndex/" - } - 'GRCz10' { - bed12 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/STARIndex/" - } - 'BDGP6' { - bed12 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/STARIndex/" - } - 'EquCab2' { - bed12 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/STARIndex/" - } - 'EB1' { - bed12 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/STARIndex/" - } - 'Galgal4' { - bed12 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/STARIndex/" - } - 'Gm01' { - bed12 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/STARIndex/" - } - 'Mmul_1' { - bed12 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/STARIndex/" - } - 'IRGSP-1.0' { - bed12 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/STARIndex/" - } - 'CHIMP2.1.4' { - bed12 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/STARIndex/" - } - 'Rnor_6.0' { - bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/STARIndex/" - } - 'R64-1-1' { - bed12 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/STARIndex/" - } - 'EF2' { - bed12 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/STARIndex/" - } - 'Sbi1' { - bed12 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/STARIndex/" - } - 'Sscrofa10.2' { - bed12 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/STARIndex/" - } - 'AGPv3' { - bed12 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.bed" - fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.gtf" - star = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/STARIndex/" - } - } } diff --git a/conf/modules.config b/conf/modules.config new file mode 100644 index 00000000..c7460ed7 --- /dev/null +++ b/conf/modules.config @@ -0,0 +1,181 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +process { + + publishDir = [ + path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + + withName: SAMPLESHEET_CHECK { + publishDir = [ + path: { "${params.outdir}/pipeline_info" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: CUSTOM_DUMPSOFTWAREVERSIONS { + publishDir = [ + path: { "${params.outdir}/pipeline_info" }, + mode: params.publish_dir_mode, + pattern: '*_versions.yml' + ] + } + + withName: FASTQC { + ext.args = '--quiet' + } + + withName: FASTP { + ext.prefix = { "${meta.id}.trimmed" } + } + + withName: GFFREAD { + ext.args = "--keep-exon-attrs -F -T" + } + + withName: BWA_MEM { + publishDir = [ + [ + path: { "${params.outdir}/${params.aligner}/log" }, + mode: 'copy', + pattern: "out" + ], + [ + path: { "${params.outdir}/${params.aligner}/log" }, + mode: 'copy', + pattern: "tab" + ] + ] + } + + withName: 'DRAGMAP_HASHTABLE' { + ext.when = { !params.dragmap && params.aligner == "dragmap" } + publishDir = [ + enabled: params.save_reference, + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference" }, + pattern: "dragmap" + ] + } + + withName: ".*DRAGMAP_ALIGN" { + ext.when = { params.aligner == "dragmap" } + ext.args = { "--RGSM ${meta.read_group}" } + } + + if(params.with_umi) { + withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS:UMITOOLS_DEDUP' { + ext.args = { [ + meta.single_end ? '' : '--unpaired-reads=discard --chimeric-pairs=discard', + ].join(' ').trim() } + ext.prefix = { "${meta.id}.umi_dedup.sorted" } + } + + withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS:SAMTOOLS_INDEX' { + ext.prefix = { "${meta.id}.umi_dedup.sorted" } + } + + if(params.umitools_dedup_stats) { + withName: '.*:BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS:BAM_STATS_SAMTOOLS:.*' { + ext.prefix = { "${meta.id}.umi_dedup.sorted.bam" } + } + } + } + + withName: SAMTOOLS_SORT { + ext.prefix = { "${meta.id}.sorted" } + } + + withName: PRESEQ_CCURVE { + ext.args = "-B" + } + + withName: PRESEQ_LCEXTRAP { + ext.args = "-B" + } + + withName: BEDTOOLS_GENOMECOV_PLUS { + ext.args = "-bg -strand +" + ext.prefix = { "${meta.id}.plus" } + } + + withName: BEDTOOLS_GENOMECOV_MINUS { + ext.args = "-bg -strand -" + ext.prefix = { "${meta.id}.minus" } + } + + withName: DEEPTOOLS_BAMCOVERAGE_PLUS { + ext.args = "--filterRNAstrand forward" + ext.prefix = { "${meta.id}.plus.bigWig" } + } + + withName: DEEPTOOLS_BAMCOVERAGE_MINUS { + ext.args = "--filterRNAstrand reverse" + ext.prefix = { "${meta.id}.minus.bigWig" } + } + + withName: SUBREAD_FEATURECOUNTS_GENE { + ext.args = "-B -C" + publishDir = [ + path: { "${params.outdir}/${params.aligner}/featurecounts/gene" }, + mode: 'copy', + ] + } + + withName: SUBREAD_FEATURECOUNTS_PREDICTED { + ext.prefix = { "${meta.id}-group_${annotation.baseName}-transcripts" } + ext.args = '-F "SAF"' + publishDir = [ + path: { "${params.outdir}/${params.aligner}/featurecounts/predicted" }, + mode: 'copy', + ] + } + + withName: HOMER_FINDPEAKS { + // TODO Handle other assays + ext.args = "-style groseq" + } + + withName: HOMER_MAKETAGDIRECTORY { + ext.args = "-checkGC" + } + + withName: HOMER_MAKEUCSCFILE { + ext.args = "-strand separate" + } + + withName: PINTS_CALLER { + ext.args = { "--exp-type $params.assay_type" } + // FIXME The singularity image doesn't exist on biocontainers yet + // https://github.com/hyulab/PINTS/issues/3 + ext.singularity_pull_docker_container = true + } + + withName: BEDTOOLS_SORT { + ext.prefix = { "${meta.id}_sorted" } + } + + withName: BEDTOOLS_MERGE { + ext.args = "-header" + ext.prefix = { "${meta.id}_merged" } + } + + withName: BEDTOOLS_INTERSECT_FILTER { + ext.prefix = { "${meta.id}_filtered" } + ext.args = { params.no_overlap ? "-v": "" } + } + +} diff --git a/conf/multiqc_config.yaml b/conf/multiqc_config.yaml deleted file mode 100644 index 78a64de1..00000000 --- a/conf/multiqc_config.yaml +++ /dev/null @@ -1,7 +0,0 @@ -report_comment: > - This report has been generated by the nf-core/nascent - analysis pipeline. For information about how to interpret these results, please see the - documentation. -report_section_order: - nf-core/nascent-software-versions: - order: -1000 diff --git a/conf/test.config b/conf/test.config index 60b09634..a2b50210 100644 --- a/conf/test.config +++ b/conf/test.config @@ -1,23 +1,40 @@ /* - * ------------------------------------------------- - * Nextflow config file for running tests - * ------------------------------------------------- - * Defines bundled input files and everything required - * to run a fast and simple test. Use as follows: - * nextflow run nf-core/nascent -profile test - */ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/nascent -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ params { - // Limit resources so that this can run on Travis - max_cpus = 2 - max_memory = 6.GB - max_time = 48.h - - // Input data - singleEnd = true - threadfqdump = false - readPaths = [ - ['SRR4012402', ['https://raw.githubusercontent.com/nf-core/test-datasets/nascent/testdata/SRR4012402.fastq']], -] - fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/nascent/reference/chr21.fa' + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + input = "${projectDir}/assets/samplesheet.csv" + + // Genome references + fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/nascent/reference/GRCh38_chr21.fa' + gtf = 's3://ngi-igenomes/igenomes/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf' + + assay_type = "GROseq" + skip_grohmm = true // FIXME Fails due to higher memory requirements + tuning_file = "${projectDir}/tests/subworkflows/local/grohmm/tuningparams_small.csv" + filter_bed = "${projectDir}/tests/samplesheets/region.bed" +} + +process { + // FIXME https://github.com/hyulab/PINTS/issues/12 + withName: PINTS_CALLER { + ext.when = false + } } diff --git a/conf/test_copro.config b/conf/test_copro.config new file mode 100644 index 00000000..fb2db1f6 --- /dev/null +++ b/conf/test_copro.config @@ -0,0 +1,44 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/nascent -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'CoPRO PINTS Test profile' + config_profile_description = 'Test dataset to check PINTS pipeline function(https://pints.yulab.org/tre_calling#part-iv-case-2)' + + // Input data + input = "${projectDir}/tests/samplesheets/copro.csv" + + genome = 'hg38' + assay_type = 'CoPRO' + filter_bed = "https://pints.yulab.org/ref/examples/promoters_1kb_tss_centered.bed.gz" + with_umi = true + umitools_dedup_stats = true +} + +process { + withName: NFCORE_NASCENT:NASCENT:FASTP { + ext.args = [ + "--adapter_sequence TGGAATTCTCGGGTGCCAAGGAACTCCAGTCAC", + "--adapter_sequence_r2 GATCGTCGGACTGTAGAACTCTGAAC", + "--umi", + "--umi_len=6", + "--umi_loc=per_read", + "-g", + "--low_complexity_filter", + "-w 8", + "-c", + "--overlap_len_require 18", + "--low_complexity_filter", + "-l 18" + ].join(' ').trim() + } +} diff --git a/conf/test_full.config b/conf/test_full.config new file mode 100644 index 00000000..e0bdaebb --- /dev/null +++ b/conf/test_full.config @@ -0,0 +1,24 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running full-size tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a full size pipeline test. + + Use as follows: + nextflow run nf-core/nascent -profile test_full, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Full test profile' + config_profile_description = 'Full test dataset to check pipeline function' + + // Input data for full size test + // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) + // TODO nf-core: Give any required params for the test so that command line flags are not needed + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' + + // Genome references + genome = 'R64-1-1' +} diff --git a/conf/test_grocap.config b/conf/test_grocap.config new file mode 100644 index 00000000..6b312c0f --- /dev/null +++ b/conf/test_grocap.config @@ -0,0 +1,38 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/nascent -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'GROcap PINTS Test profile' + config_profile_description = 'Test dataset to check PINTS pipeline function(https://pints.yulab.org/tre_calling#part-iii-case-1)' + + // Input data + input = "${projectDir}/tests/samplesheets/grocap.csv" + + genome = 'hg38' + assay_type = 'GROcap' + filter_bed = "https://pints.yulab.org/ref/examples/promoters_1kb_tss_centered.bed.gz" +} + +process { + withName: NFCORE_NASCENT:NASCENT:FASTP { + ext.args = [ + "--adapter_sequence TGGAATTCTCGGGTGCCAAGG", + "-l 14", // only keep reads longer than 14nts after trimming + // This library was polyadenylated, + // so we are trimming the last 20nts per reads (with --trim_tail1). + // For more recent single-end PRO/GRO-cap libraries, this may not be necessary. + "--trim_tail1 20", + "--low_complexity_filter", + "-w 8" + ].join(' ').trim() + } +} diff --git a/docs/README.md b/docs/README.md index 1bb7e42d..b739723c 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,12 +1,10 @@ # nf-core/nascent: Documentation -The nf-core/nascent documentation is split into the following files: +The nf-core/nascent documentation is split into the following pages: -1. [Installation](https://nf-co.re/usage/installation) -2. Pipeline configuration - * [Local installation](https://nf-co.re/usage/local_installation) - * [Adding your own system config](https://nf-co.re/usage/adding_own_config) - * [Reference genomes](https://nf-co.re/usage/reference_genomes) -3. [Running the pipeline](usage.md) -4. [Output and how to interpret the results](output.md) -5. [Troubleshooting](https://nf-co.re/usage/troubleshooting) +- [Usage](usage.md) + - An overview of how the pipeline works, how to run it and a description of all of the different command-line flags. +- [Output](output.md) + - An overview of the different results produced by the pipeline and how to interpret them. + +You can find a lot more documentation about installing, configuring and running nf-core pipelines on the website: [https://nf-co.re](https://nf-co.re) diff --git a/docs/images/mqc_fastp_plot.png b/docs/images/mqc_fastp_plot.png new file mode 100755 index 00000000..798539ca Binary files /dev/null and b/docs/images/mqc_fastp_plot.png differ diff --git a/docs/images/mqc_fastqc_adapter.png b/docs/images/mqc_fastqc_adapter.png new file mode 100755 index 00000000..361d0e47 Binary files /dev/null and b/docs/images/mqc_fastqc_adapter.png differ diff --git a/docs/images/mqc_fastqc_counts.png b/docs/images/mqc_fastqc_counts.png new file mode 100755 index 00000000..cb39ebb8 Binary files /dev/null and b/docs/images/mqc_fastqc_counts.png differ diff --git a/docs/images/mqc_fastqc_quality.png b/docs/images/mqc_fastqc_quality.png new file mode 100755 index 00000000..a4b89bf5 Binary files /dev/null and b/docs/images/mqc_fastqc_quality.png differ diff --git a/docs/images/mqc_picard_markduplicates.png b/docs/images/mqc_picard_markduplicates.png new file mode 100755 index 00000000..33b4e3d7 Binary files /dev/null and b/docs/images/mqc_picard_markduplicates.png differ diff --git a/docs/images/mqc_preseq_plot.png b/docs/images/mqc_preseq_plot.png new file mode 100755 index 00000000..c4c98f17 Binary files /dev/null and b/docs/images/mqc_preseq_plot.png differ diff --git a/docs/images/mqc_samtools_idxstats.png b/docs/images/mqc_samtools_idxstats.png new file mode 100755 index 00000000..a3eff4c3 Binary files /dev/null and b/docs/images/mqc_samtools_idxstats.png differ diff --git a/docs/images/mqc_samtools_mapped.png b/docs/images/mqc_samtools_mapped.png new file mode 100755 index 00000000..33376009 Binary files /dev/null and b/docs/images/mqc_samtools_mapped.png differ diff --git a/docs/images/nf-core-nascent_logo_dark.png b/docs/images/nf-core-nascent_logo_dark.png new file mode 100644 index 00000000..7d225a0e Binary files /dev/null and b/docs/images/nf-core-nascent_logo_dark.png differ diff --git a/docs/images/nf-core-nascent_logo_light.png b/docs/images/nf-core-nascent_logo_light.png new file mode 100644 index 00000000..ac2a61b0 Binary files /dev/null and b/docs/images/nf-core-nascent_logo_light.png differ diff --git a/docs/output.md b/docs/output.md index e017e7f2..269f9c98 100644 --- a/docs/output.md +++ b/docs/output.md @@ -1,169 +1,388 @@ # nf-core/nascent: Output +## Introduction + This document describes the output produced by the pipeline. Most of the plots are taken from the MultiQC report, which summarises results at the end of the pipeline. +The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. + ## Pipeline overview -The pipeline is built using [Nextflow](https://www.nextflow.io/) -and processes data using the following steps: -* [fastq-dump](#fastqdump) - if needed, extract the fastq file[s] from a sample -* [SeqKit/bbduk](#seqkitbbduk) - flip reads (experiment specific) & trim reads for adapters/quality/length -* [FastQC](#fastqc) - read quality control -* [MultiQC](#multiqc) - aggregate report, describing results of the whole pipeline -* [HISAT2](#hisat2) - map reads to the reference genome -* [Samtools](#samtools) - convert the mapped reads as SAM files to BAM format -* [Preseq](#preseq) - estimate complexity of the sample -* [RSeQC](#rseqc) - analyze read distributions, infer experiment (SE/PE, whether reads need to be flipped), & read duplication -* [BBMap](#pileup) - analyze coverage -* [bedtools](#bedtools) - create both normalized and non-normalized coverage files in bedGraph format -* [igvtools](#igvtools) - create compressed files to visualize the sample in the Integrative Genomics Viewer ([IGV](http://software.broadinstitute.org/software/igv/home)) +The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: + +- [Preprocessing](#preprocessing) + - [FastQC](#fastqc) - Raw read QC + - [fastp](#fastp) - Adapter and quality trimming +- [Alignment](#alignment) + - [bwa](#bwa) - Mapping low-divergent sequences against a large reference genome + - [bwa-mem2](#bwa-mem2) - The next version of bwa-mem + - [DRAGMAP](#dragmap) - Open-source software implementation of the DRAGEN mapper +- [Alignment post-processing](#alignment-post-processing) + - [SAMtools](#samtools) - Sort and index alignments + - [UMI-tools dedup](#umi-tools-dedup) - UMI-based deduplication + - [picard MarkDuplicates](#picard-markduplicates) - Duplicate read marking +- [Quality control](#quality-control) + - [RSeQC](#rseqc) - Various RNA-seq QC metrics + - [Preseq](#preseq) - Estimation of library complexity + - [BBMap](#bbmap) - Analyzes the sequencing coverage +- [Coverage Graphs](#coverage-graphs) + - [BEDTools Genomecov](#bedtools-genomecov) - Create bedGraph coverage files + - [deepTools bamcoverage](#deeptools-bamcoverage) - Create bigWig coverage files +- [Transcript Identification](#transcript-identification) + - [GroHMM](#grohmm) - Predicts transcripts from aligned GROSeq data in the form of bed files. + - [HOMER](#homer) - Transcript identification from GROSeq data + - [PINTS](#pints) - Identifies transcriptional regulatory elements (TREs) identified from nascent-transcript sequencing. + - [BEDTools Insersect](#bedtools-intersect) - Filtering of predicted TREs +- [Quantification](#quantification) + - [featureCounts](#featurecounts) - Read counting relative to gene biotype +- [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline +- [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution + +## Preprocessing + +### FastQC + +
+Output files + +- `fastqc/` + - `*_fastqc.html`: FastQC report containing quality metrics. + - `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. + +
+ +[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). + +![MultiQC - FastQC sequence counts plot](images/mqc_fastqc_counts.png) + +![MultiQC - FastQC mean quality scores plot](images/mqc_fastqc_quality.png) + +![MultiQC - FastQC adapter content plot](images/mqc_fastqc_adapter.png) + +> **NB:** The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They may contain adapter sequence and potentially regions with low quality. + +### fastp + +
+Output files + +- `/` + - `*.fastp.html`: Trimming report in html format. + - `*.fastp.json`: Trimming report in json format. + - `*.fastp.log`: Trimming log file. + +
+ +[fastp](https://github.com/OpenGene/fastp) is a tool designed to provide fast, all-in-one preprocessing for FastQ files. It has been developed in C++ with multithreading support to achieve higher performance. fastp is used in this pipeline for standard adapter trimming and quality filtering. + +![MultiQC - fastp filtered reads plot](images/mqc_fastp_plot.png) + +## Alignment +### bwa -## fastqdump -[fastq-dump](https://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=toolkit_doc&f=fastq-dump) decompresses an SRR file obtained from the Gene Expression Omnibus ([GEO](https://www.ncbi.nlm.nih.gov/geo/)) database. This will produce one or two fastq files (in the case of paired-end reads). +
+Output files -**Output directory: `results/fastq-dump`** +- `bwa/` + - `*.bam`: The original BAM file containing read alignments to the reference genome. -* `sample.fastq` - * FastQ file to process, from the corresponding sample. +
+[BWA](https://github.com/lh3/bwa) is a software package for mapping low-divergent sequences against a large reference genome. The aligned reads are then coordinate-sorted is used for duplicate marking) with [samtools](https://www.htslib.org/doc/samtools.html). -## seqkit & bbduk -[SeqKit](https://bioinf.shenwei.me/seqkit/) is a toolkit for fasta and fastq file manipulation, used in the pipeline if the positive/negative strands need to be flipped (dependent on library prep protocol). [BBDuk](https://www.geneious.com/plugins/bbduk/) is trimming tool used to filter reads for adapters, read quality, and overall length after adapter removal. +### bwa-mem2 -**Output directory: `results/bbduk, qc/trimstats`** +
+Output files -* `sample.trim.fastq` - * Trimmed FastQ file for each sample. -* `{refstats,trimstats,ehist}.txt` - * Trimming details including adapters removed, percentages of reads removed that did not meet minimum quality/length +- `bwamem2/` + - `*.bam`: The original BAM file containing read alignments to the reference genome. +
-## FastQC -[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your reads. It provides information about the quality score distribution across your reads, the per base sequence content (%T/A/G/C). You get information about adapter contamination and other overrepresented sequences. +[BWA-mem2](https://github.com/bwa-mem2/bwa-mem2) is a software package for mapping low-divergent sequences against a large reference genome.The aligned reads are then coordinate-sorted with [samtools](https://www.htslib.org/doc/samtools.html). -For further reading and documentation see the [FastQC help](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). +### DRAGMAP -> **NB:** The FastQC plots displayed in the MultiQC report shows both untrimmed and trimmed reads. +
+Output files -**Output directory: `results/qc`** +- `dragmap/` + - `*.bam`: The original BAM file containing read alignments to the reference genome. + - `*.dragmap.log`: Log of the stderr from the aligner -* `sample_fastqc.html` - * FastQC report, containing quality metrics for your untrimmed raw fastq files & trimmed fastq files -* `zips/sample_fastqc.zip` - * zip file containing the FastQC report, tab-delimited data file and plot images +
+[DragMap](https://github.com/Illumina/dragmap) is an open-source software implementation of the DRAGEN mapper, which the Illumina team created so that we would have an open-source way to produce the same results as their proprietary DRAGEN hardware. The aligned reads are then coordinate-sorted with [samtools](https://www.htslib.org/doc/samtools.html). -## MultiQC -[MultiQC](https://multiqc.info) is a visualisation tool that generates a single HTML report summarising all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in within the report data directory. +## Alignment post-processing -The pipeline has special steps which allow the software versions used to be reported in the MultiQC output for future traceability. +### SAMtools -**Output directory: `results/multiqc`** +
+Output files -* `Project_multiqc_report.html` - * MultiQC report - a standalone HTML file that can be viewed in your web browser -* `Project_multiqc_data/` - * Directory containing parsed statistics from the different tools used in the pipeline +- `/` + - `.sorted.bam`: If `--save_align_intermeds` is specified the original coordinate sorted BAM file containing read alignments will be placed in this directory. + - `.sorted.bam.bai`: If `--save_align_intermeds` is specified the BAI index file for the original coordinate sorted BAM file will be placed in this directory. + - `.sorted.bam.csi`: If `--save_align_intermeds --bam_csi_index` is specified the CSI index file for the original coordinate sorted BAM file will be placed in this directory. +- `/samtools_stats/` + - SAMtools `.sorted.bam.flagstat`, `.sorted.bam.idxstats` and `.sorted.bam.stats` files generated from the alignment files. -For more information about how to use MultiQC reports, see [https://multiqc.info](https://multiqc.info) +
+The original BAM files generated by the selected alignment algorithm are further processed with [SAMtools](http://samtools.sourceforge.net/) to sort them by coordinate, for indexing, as well as to generate read mapping statistics. -## hisat2 -[HISAT2](https://ccb.jhu.edu/software/hisat2/index.shtml) is a sequence alignment tool to map the trimmed sequenced reads to the corresponding reference genome. Due to their size, the resulting sam files are not conserved after the pipeline has completed execution. +![MultiQC - SAMtools alignment scores plot](images/mqc_samtools_mapped.png) -If the necessary indices for mapping are not provided/present, a separate process will build them first. This step can take a few minutes, however it should only be executed once. +![MultiQC - SAMtools mapped reads per contig plot](images/mqc_samtools_idxstats.png) -## samtools -[Samtools](http://www.htslib.org/) is a suite of tools to handle format conversions, among other things, for high-throughput sequencing data. We also use Samtools to generate the list of chromosome sizes, if not provided for the desired reference genome. +### UMI-tools dedup -**Output directory: `results/mapped/bams`** +
+Output files -* `sample.trim.sorted.bam` - * Mapped sample in BAM format -* `sample.trim.sorted.bam.bai` - * Index for the `sample.trim.sorted.bam` mapped sample in BAM format +- `/` + - `.umi_dedup.sorted.bam`: If `--save_umi_intermeds` is specified the UMI deduplicated, coordinate sorted BAM file containing read alignments will be placed in this directory. + - `.umi_dedup.sorted.bam.bai`: If `--save_umi_intermeds` is specified the BAI index file for the UMI deduplicated, coordinate sorted BAM file will be placed in this directory. + - `.umi_dedup.sorted.bam.csi`: If `--save_umi_intermeds --bam_csi_index` is specified the CSI index file for the UMI deduplicated, coordinate sorted BAM file will be placed in this directory. +- `/umitools/` + - `*_edit_distance.tsv`: Reports the (binned) average edit distance between the UMIs at each position. + - `*_per_umi.tsv`: UMI-level summary statistics. + - `*_per_umi_per_position.tsv`: Tabulates the counts for unique combinations of UMI and position. -**Output directory: `results/qc/mapstats`** +The content of the files above is explained in more detail in the [UMI-tools documentation](https://umi-tools.readthedocs.io/en/latest/reference/dedup.html#dedup-specific-options). -* `sample.trim.sorted.bam.flagstat` - * Overall mapping statistics -* `sample.trim.sorted.bam.millionsmapped` - * File that contains number of uniquely mapped reads (not total multi-mapped). Used in normalization +
+After extracting the UMI information from the read sequence (see [UMI-tools extract](#umi-tools-extract)), the second step in the removal of UMI barcodes involves deduplicating the reads based on both mapping and UMI barcode information using the UMI-tools `dedup` command. This will generate a filtered BAM file after the removal of PCR duplicates. -## preseq -[Preseq](http://smithlabresearch.org/software/preseq/) plots the estimated complexity of a sample, and estimates future yields for complexity if the sample is sequenced at higher read depths. +### picard MarkDuplicates -**Output directory: `results/qc/preseq`** +
+Output files -* `sample.trim.c_curve.txt` - * Curve generated based on number of unique reads vs. total reads sequenced -* `sample.trim.lc_extrap.txt` - * Extrapolation of the c_curve that attempts to model the predicted number of unique reads if the sample was seqeunced to a greater depth +- `/` + - `.markdup.sorted.bam`: Coordinate sorted BAM file after duplicate marking. This is the final post-processed BAM file and so will be saved by default in the results directory. + - `.markdup.sorted.bam.bai`: BAI index file for coordinate sorted BAM file after duplicate marking. This is the final post-processed BAM index file and so will be saved by default in the results directory. + - `.markdup.sorted.bam.csi`: CSI index file for coordinate sorted BAM file after duplicate marking. This is the final post-processed BAM index file and so will be saved by default in the results directory. Only generated if `--bam_csi_index` is specified as a parameter. +- `/samtools_stats/` + - SAMtools `.markdup.sorted.bam.flagstat`, `.markdup.sorted.bam.idxstats` and `.markdup.sorted.bam.stats` files generated from the duplicate marked alignment files. +- `/picard_metrics/` + - `.markdup.sorted.MarkDuplicates.metrics.txt`: Metrics file from MarkDuplicates. +
-## rseqc -[RSeQC](http://dldcc-web.brc.bcm.edu/lilab/liguow/CGI/rseqc/_build/html/) provides a number of useful modules that can comprehensively evaluate high throughput sequence data. We use it on this pipeline to analyze read distributions. +Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method-explorer/kits-and-arrays/umi.html) it is not possible to establish whether the fragments you have sequenced from your sample were derived via true biological duplication (i.e. sequencing independent template fragments) or as a result of PCR biases introduced during the library preparation. By default, the pipeline uses [picard MarkDuplicates](https://broadinstitute.github.io/picard/command-line-overview.html#MarkDuplicates) to _mark_ the duplicate reads identified amongst the alignments to allow you to guage the overall level of duplication in your samples. However, for RNA-seq data it is not recommended to physically remove duplicate reads from the alignments (unless you are using UMIs) because you expect a significant level of true biological duplication that arises from the same fragments being sequenced from for example highly expressed genes. You can skip this step via the `--skip_markduplicates` parameter. -**Output directory: `results/qc/rseqc`** +![MultiQC - Picard MarkDuplicates metrics plot](images/mqc_picard_markduplicates.png) -* `sample.trim.read_dist.txt` - * Relative distribution of reads relative to a gene reference file +## Quality control +### RSeQC + +[RSeQC](<(http://rseqc.sourceforge.net/)>) is a package of scripts designed to evaluate the quality of RNA-seq data. This pipeline runs several, but not all RSeQC scripts. You can tweak the supported scripts you would like to run by adjusting the `--rseqc_modules` parameter which by default will run all of the following: `bam_stat.py`, `inner_distance.py`, `infer_experiment.py`, `junction_annotation.py`, `junction_saturation.py`,`read_distribution.py` and `read_duplication.py`. + +The majority of RSeQC scripts generate output files which can be plotted and summarised in the MultiQC report. + +### Preseq + +
+Output files + +- `/preseq/` + - `*.lc_extrap.txt`: Preseq expected future yield file. +- `/preseq/log/` + - `*.command.log`: Standard error output from command. + +
+ +The [Preseq](http://smithlabresearch.org/software/preseq/) package is aimed at predicting and estimating the complexity of a genomic sequencing library, equivalent to predicting and estimating the number of redundant reads from a given sequencing depth and how many will be expected from additional sequencing using an initial sequencing experiment. The estimates can then be used to examine the utility of further sequencing, optimize the sequencing depth, or to screen multiple libraries to avoid low complexity samples. A shallow curve indicates that the library has reached complexity saturation and further sequencing would likely not add further unique reads. The dashed line shows a perfectly complex library where total reads = unique reads. Note that these are predictive numbers only, not absolute. The MultiQC plot can sometimes give extreme sequencing depth on the X axis - click and drag from the left side of the plot to zoom in on more realistic numbers. + +![MultiQC - Preseq library complexity plot](images/mqc_preseq_plot.png) + +### BBMap + +
+Output files + +- `bbmap/` + - `*.coverage.hist.txt`: Histogram of read coverage over each chromosome + - `*.coverage.stats.txt`: Coverage stats broken down by chromosome including %GC, pos/neg read coverage, total coverage, etc. + +
-## pileup [BBMap](https://github.com/BioInfoTools/BBMap/blob/master/sh/pileup.sh) includes a tool called `pileup`, which analyzes the sequencing coverage for each sample. -**Output directory: `results/qc/pileup`** +## Coverage Graphs + +### BEDTools Genomecov + +
+Output files + +- `bedtools/` + - `*.minus.bedGraph`: Sample coverage file (negative strand only) in bedGraph format + - `*.plus.bedGraph`: Sample coverage file (positive strand only) in bedGraph format + +
+ +### deepTools bamcoverage + +
+Output files + +- `deeptools/` + - `*.minus.bigWig`: Sample coverage file (negative strand only) in bigWig format + - `*.plus.bigWig`: Sample coverage file (positive strand only) in bigWig format + +
+ +## Transcript Identification + +### HOMER + +
+Output files + +- `homer/` + - `*.bed`: HOMER Nascent RNA (GroSeq) transcripts after pos2bed + - `*.peaks.txt`: HOMER Nascent RNA (GroSeq) transcripts + - `*.bedGraph.gz`: UCSC bedGraph + - `*_tagdir`: homer tagdir + +
+ +[HOMER](http://homer.ucsd.edu) HOMER (Hypergeometric Optimization of Motif EnRichment) is a suite of tools for Motif Discovery and next-gen sequencing analysis. It is a collection of command line programs for UNIX-style operating systems written in Perl and C++. HOMER was primarily written as a de novo motif discovery algorithm and is well suited for finding 8-20 bp motifs in large scale genomics data. HOMER contains many useful tools for analyzing ChIP-Seq, GRO-Seq, RNA-Seq, DNase-Seq, Hi-C and numerous other types of functional genomics sequencing data sets. + +For now the pipeline only supports the HOMER groseq workflow, feel free to open an issue or PR if you'd like to see others. For more information about how to use HOMER, see the [GRO-Seq Analysis Tutorial](http://homer.ucsd.edu/homer/ngs/groseq/groseq.html). + +### PINTS + +
+Output files + +- `pints/` + - `*_bidirectional_peaks.bed`: Bidirectional TREs (divergent + convergent) + - `*_divergent_peaks.bed`: Divergent TREs + - `*_unidirectional_peaks.bed`: Unidirectional TREs, maybe lncRNAs transcribed from enhancers (e-lncRNAs) + +
+ +[PINTS](https://pints.yulab.org/) (Peak Identifier for Nascent Transcript Starts) is a tool used to identify narrower regions for potential regulatory elements (mainly promoters and enhancers, often referred to as a peak caller). PINTS was inspired by [MACS2](https://github.com/macs3-project/MACS) with modifications specifically implemented for identifying eRNA TSSs from genome-wide TSS-assays. + +For more information about how PINTS works, see the paper [A comparison of experimental assays and analytical methods for genome-wide identification of active enhancers](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9288987/). + +### GroHMM + +
+Output files + +- `grohmm/` + - `*.eval.txt`: Evaluation of HMM Annotations + - `*.final.transcripts.bed`: Predicted transcripts + - `*.tdFinal.txt`: Final quality metrics + - `*.tdplot_mqc.jpg`: TD plot included in multiqc + - `*.transcripts.txt`: Predicted transcripts in txt form + - `*.tuning.csv`: The tuning csv that was used + +
+ +[GroHMM](https://www.bioconductor.org/packages/release/bioc/html/groHMM.html) is a computational tool for identifying unannotated and cell type-specific transcription units from GRO-seq data. The pipeline will predict, and then repair transcripts based on known errors to generate a final set of transcripts (in the form of a bed file) for further analysis. +By default, tuning will be performed by inputting a preset comma-separated values file with two columns, each identifying tuning parameters - LtProbB and UTS. These refer to the log-transformed transition probability of switching from transcribed state to non-transcribed state and variance of the emission probability for reads in the non-transcribed state, respectively. The output of the tuning file, also a comma-separated values file, will list out the sum of errors and error rate per called transcript, which will enable Nextflow to specify optimal UTS and LtProbB values for the subsequent transcript identification step. The user may also choose to provide their own list of hold-out parameters to test (in the format of a .csv file), or skip the tuning process altogether due to time constraints. If the tuning process is skipped ('--skip_tuning') then the user may indicate the specific holdout parameters to use ('--uts' and '--ltprobb') or choose to use the default parameters. +The transcript calling step will use the two-state hidden Markov model (HMM) which GroHMM employs in order to identify boundaries of transcription across the genome in a de-novo manner. The output is a .bed file of transcripts used in downstream analysis. + +For more information about how to use GROHMM, see the [tutorial](https://www.bioconductor.org/packages/release/bioc/vignettes/groHMM/inst/doc/groHMM.pdf) or [documentation](https://www.bioconductor.org/packages/release/bioc/manuals/groHMM/man/groHMM.pdf). + +### BEDTools intersect + +
+Output files + +- `bedtools/` + - `*_filtered.bed`: + +
+ +The pipeline optionally takes a `filter_bed`, which can then be used to filter the predicted transcripts before counting is performed. This could be promoter regions to drop or histone modifications, such as **H3K4me1** and **H3K27ac**, which are often associated with enhancers. + +From the [PINTS documentation](https://pints.yulab.org/tre_calling): + +> We assume distal bidirectional transcripts are mainly from enhancer RNA transcription. To extract candidate enhancers from the pool of all TREs, we need a bed file that defines proximal TREs (promoters), then we can use bedtools to extract distal TREs as follows: + +> `bedtools intersect -a SampleA_1_bidirectional_peaks.bed -b promoters.bed -v > SampleA_1_bidirectional_peaks.distalTREs.bed` + +They've also created some bed files that might be useful for analysis. + +> We have prepared promoter reference bed files (500 bp regions flanking protein-coding transcripts) for human and mouse genomes: + +- [promoter for hg38](https://pints.yulab.org/ref/examples/promoters_1kb_tss_centered.bed.gz): based on GENCODE annotation (v24) +- [promoter for hg19](https://pints.yulab.org/ref/examples/hg19_promoters_1kb_tss_centered.bed.gz): based on GENCODE annotation (v19) +- [promoter for mm10](https://pints.yulab.org/ref/examples/mm10_promoters_1kb_tss_centered.bed.gz): based on GENCODE annotation (m23) + +## Quantification + +### featureCounts + +
+Output files + +- `/featurecounts/` + - `*.featureCounts.txt`: featureCounts biotype-level quantification results for each sample. + - `*.featureCounts.txt.summary`: featureCounts summary file containing overall statistics about the counts. + - `*_mqc.tsv`: MultiQC custom content files used to plot biotypes in report. + +
+ +[featureCounts](http://bioinf.wehi.edu.au/featureCounts/) from the [Subread](http://subread.sourceforge.net/) package is a quantification tool used to summarise the mapped read distribution over genomic features such as genes, exons, promotors, gene bodies, genomic bins and chromosomal locations. We can also use featureCounts to count overlaps with different classes of genomic features. This provides an additional QC to check which features are most abundant in the sample, and to highlight potential problems such as rRNA contamination. + + + +## Workflow reporting and genomes -* `sample.trim.coverage.hist.txt` - * Histogram of read coverage over each chromosome -* `sample.trim.coverage.stats.txt` - * Coverage stats broken down by chromosome including %GC, pos/neg read coverage, total coverage, etc. +### Reference genome files +
+Output files -## bedtools -[bedtools](https://bedtools.readthedocs.io/en/latest/) is an extensive toolkit for BED and bedGraph format manipulation, like sorting, intersecting and joining these files. The files produced here are useful to be processed later using [Tfit](https://github.com/Dowell-Lab/Tfit) or [dReg](https://github.com/Danko-Lab/dREG) to find regions of active transcription, and transcription regulatory elements. +- `genome/` + - `*.fa`, `*.gtf`, `*.gff`, `*.bed`, `.tsv`: If the `--save_reference` parameter is provided then all of the genome reference files will be placed in this directory. +- `genome/index/` + - `bwa/`: Directory containing bwa indices. + - `bwa-mem2/`: Directory containing bwa-mem2 indices. + - `dragmap/`: Directory containing DRAGMAP indices. -**Output directory: `results/mapped/bedgraphs`** +
-* `sample.trim.bedGraph` - * Sample coverage file in bedGraph format -* `sample.trim.pos.bedGraph` - * Sample coverage file (positive strand only) in bedGraph format -* `sample.trim.neg.bedGraph` - * Sample coverage file (negative strand only) in bedGraph format +A number of genome-specific files are generated by the pipeline because they are required for the downstream processing of the results. If the `--save_reference` parameter is provided then these will be saved in the `genome/` directory. It is recommended to use the `--save_reference` parameter if you are using the pipeline to build new indices so that you can save them somewhere locally. The index building step can be quite a time-consuming process and it permits their reuse for future runs of the pipeline to save disk space. -**Output directory: `results/mapped/rcc_bedgraphs`** +### MultiQC -* `sample.trim.rcc.bedGraph` - * Normalized sample coverage file in bedGraph format -* `sample.pos.trim.rcc.bedGraph` - * Normalized sample coverage file (positive strand only) in bedGraph format -* `sample.neg.trim.rcc.bedGraph` - * Normalized sample coverage file (negative strand only) in bedGraph format +
+Output files -**Output directory: `results/mapped/dreg_input`** +- `multiqc/` + - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. + - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. + - `multiqc_plots/`: directory containing static images from the report in various formats. -* `sample.trim.pos.rcc.bw` - * Sample coverage file (positive strand only) in BigWig format -* `sample.trim.neg.rcc.bw` - * Sample coverage file (negative strand only) in BigWig format +
-**Output directory: `results/mapped/rcc_bigwig`** +[MultiQC](http://multiqc.info) is a visualization tool that generates a single HTML report summarising all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in the report data directory. -* `sample.trim.pos.rcc.bw` - * Normalized sample coverage file (positive strand only) in BigWig format -* `sample.trim.neg.rcc.bw` - * Normalized sample coverage file (negative strand only) in BigWig format +Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQC. The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see . +### Pipeline information -## igvtools -[igvtools](https://software.broadinstitute.org/software/igv/igvtools) is a commandline tool we use to produce a compressed version of the sample coverage file in order to visualize it on IGV more efficiently (with a significantly smaller memory footprint). +
+Output files -**Output directory: `results/mapped/tdfs`** +- `pipeline_info/` + - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. + - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. + - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. -* `sample.trim.rpkm.tdf` - * Sample coverage file in TDF format +
+[Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage. diff --git a/docs/usage.md b/docs/usage.md index 4ec32ec3..8a12a1c6 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -1,61 +1,86 @@ # nf-core/nascent: Usage -## Table of contents -* [Introduction](#general-nextflow-info) -* [Running the pipeline](#running-the-pipeline) -* [Updating the pipeline](#updating-the-pipeline) -* [Reproducibility](#reproducibility) -* [Main arguments](#main-arguments) - * [`-profile`](#-profile-single-dash) - * [`docker`](#docker) - * [`awsbatch`](#awsbatch) - * [`standard`](#standard) - * [`binac`](#binac) - * [`cfc`](#cfc) - * [`none`](#none) - * [`--reads`](#--reads) - * [`--singleEnd`](#--singleend) -* [Reference Genomes](#reference-genomes) - * [`--genome`](#--genome) - * [`--fasta`](#--fasta) -* [Job Resources](#job-resources) -* [Automatic resubmission](#automatic-resubmission) -* [Custom resource requests](#custom-resource-requests) -* [AWS batch specific parameters](#aws-batch-specific-parameters) - * [`-awsbatch`](#-awsbatch) - * [`--awsqueue`](#--awsqueue) - * [`--awsregion`](#--awsregion) -* [Other command line parameters](#other-command-line-parameters) - * [`--outdir`](#--outdir) - * [`--email`](#--email) - * [`-name`](#-name-single-dash) - * [`-resume`](#-resume-single-dash) - * [`-c`](#-c-single-dash) - * [`--max_memory`](#--max_memory) - * [`--max_time`](#--max_time) - * [`--max_cpus`](#--max_cpus) - * [`--plaintext_emails`](#--plaintext_emails) - * [`--sampleLevel`](#--sampleLevel) - * [`--multiqc_config`](#--multiqc_config) - * [`--chrom_sizes`](#--chrom_sizes) - * [`--hisat_indices`](#--hisat_indices) - * [`--genome_refseq`](#--genome_refseq) - * [`--sras`](#--sras) - -## General Nextflow info -Nextflow handles job submissions on SLURM or other environments, and supervises running the jobs. Thus the Nextflow process must run until the pipeline is finished. We recommend that you put the process running in the background through `screen` / `tmux` or similar tool. Alternatively you can run nextflow within a cluster job submitted your job scheduler. - -It is recommended to limit the Nextflow Java virtual machines memory. We recommend adding the following line to your environment (typically in `~/.bashrc` or `~./bash_profile`): +## :warning: Please read this documentation on the nf-core website: [https://nf-co.re/nascent/usage](https://nf-co.re/nascent/usage) + +> _Documentation of pipeline parameters is generated automatically from the pipeline schema and can no longer be found in markdown files._ + +## Introduction + +## Samplesheet input + +You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below. ```bash -NXF_OPTS='-Xms1g -Xmx4g' +--input '[path to samplesheet file]' +``` + +### Multiple runs of the same sample + +The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes: + +```console +sample,fastq_1,fastq_2 +CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz +CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz +CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz ``` +### Full samplesheet + +The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 3 columns to match those defined in the table below. + +A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice. + +```console +sample,fastq_1,fastq_2 +CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz +CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz +CONTROL_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz +TREATMENT_REP1,AEG588A4_S4_L003_R1_001.fastq.gz, +TREATMENT_REP2,AEG588A5_S5_L003_R1_001.fastq.gz, +TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz, +TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz, +``` + +| Column | Description | +| --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | +| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | + +An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. + +## Alignment Options + +By default, the pipeline uses [BWA](https://bio-bwa.sourceforge.net/) (i.e. `--aligner bwa`) to map the raw FastQ reads to the reference genome. Research as to which aligner works best with Nascent Transcript and Transcription Start Site assays is pending. + +## Reference genome files + +The minimum reference genome requirements are a FASTA and GTF file, all other files required to run the pipeline can be generated from these files. However, it is more storage and compute friendly if you are able to re-use reference genome files as efficiently as possible. It is recommended to use the `--save_reference` parameter if you are using the pipeline to build new indices (e.g. those unavailable on [AWS iGenomes](https://nf-co.re/usage/reference_genomes)) so that you can save them somewhere locally. The index building step can be quite a time-consuming process and it permits their reuse for future runs of the pipeline to save disk space. You can then either provide the appropriate reference genome files on the command-line via the appropriate parameters (e.g. `--star_index '/path/to/BWA/index/'`) or via a custom config file. + +- If `--genome` is provided then the FASTA and GTF files (and existing indices) will be automatically obtained from AWS-iGenomes unless these have already been downloaded locally in the path specified by `--igenomes_base`. +- If `--gff` is provided as input then this will be converted to a GTF file, or the latter will be used if both are provided. + +## Quantification Options + +Currently only featureCounts is supported for quantification. It counts both the genes, and the predicted transcripts. + +## Transcript Identification Options + +The current options for transcript identification include [GroHMM](https://bioconductor.org/packages/release/bioc/html/groHMM.html), [HOMER](http://homer.ucsd.edu/), and [PINTS](https://pints.yulab.org/). + +The default transcript identification option is PINTS, and HOMER if the transcript `assay_type` is `GROseq` but this may change in future releases. + +### GroHMM + +When selecting GroHMM as an option, the pipeline by default tests a list of preset hold-out parameters to select for the combination of arguments which would result in the lowest possible error rate during the transcript identification process. The user may also choose to provide their own list of hold-out parameters to test, or skip the tuning process altogether due to time constraints. If the tuning process is skipped ('--skip_tuning') then the user may indicate the specific holdout parameters to use ('--uts' and '--ltprobb') or choose to use the default parameters. + ## Running the pipeline + The typical command for running the pipeline is as follows: ```bash -nextflow run nf-core/nascent --reads '*_R{1,2}.fastq.gz' -profile standard,docker +nextflow run nf-core/nascent --input samplesheet.csv --outdir --genome GRCh37 -profile docker ``` This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. @@ -63,13 +88,14 @@ This will launch the pipeline with the `docker` configuration profile. See below Note that the pipeline will create the following files in your working directory: ```bash -work # Directory containing the nextflow working files -results # Finished results (configurable, see below) -.nextflow_log # Log file from Nextflow +work # Directory containing the nextflow working files + # Finished results in specified location (defined with --outdir) +.nextflow_log # Log file from Nextflow # Other nextflow hidden files, eg. history of pipeline runs and old logs. ``` ### Updating the pipeline + When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: ```bash @@ -77,209 +103,185 @@ nextflow pull nf-core/nascent ``` ### Reproducibility -It's a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. + +It is a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. First, go to the [nf-core/nascent releases page](https://github.com/nf-core/nascent/releases) and find the latest version number - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. +## Core Nextflow arguments -## Main Arguments +> **NB:** These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). ### `-profile` -Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. - -* `docker` - * A generic configuration profile to be used with [Docker](http://docker.com/) - * Pulls software from dockerhub: [`nfcore/nascent`](http://hub.docker.com/r/nfcore/nascent/) -* `singularity` - * A generic configuration profile to be used with [Singularity](http://singularity.lbl.gov/) - * Pulls software from singularity-hub -* `conda` - * A generic configuration profile to be used with [conda](https://conda.io/docs/) - * Pulls most software from [Bioconda](https://bioconda.github.io/) -* `awsbatch` - * A generic configuration profile to be used with AWS Batch. -* `test` - * A profile with a complete configuration for automated testing - * Includes links to test data so needs no other parameters - -### `--reads` -Use this to specify the location of your input FastQ files. For example: -```bash ---reads 'path/to/data/sample_*_{1,2}.fastq' -``` - -Please note the following requirements: - -1. The path must be enclosed in quotes -2. The path must have at least one `*` wildcard character -3. When using the pipeline with paired end data, the path must use `{1,2}` notation to specify read pairs. - -If left unspecified, a default pattern is used: `data/*{1,2}.fastq.gz` +Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. -### `--singleEnd` -By default, the pipeline expects paired-end data. If you have single-end data, you need to specify `--singleEnd` on the command line when you launch the pipeline. A normal glob pattern, enclosed in quotation marks, can then be used for `--reads`. For example: +Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Conda) - see below. When using Biocontainers, most of these software packaging methods pull Docker containers from quay.io e.g [FastQC](https://quay.io/repository/biocontainers/fastqc) except for Singularity which directly downloads Singularity images via https hosted by the [Galaxy project](https://depot.galaxyproject.org/singularity/) and Conda which downloads and installs software locally from [Bioconda](https://bioconda.github.io/). -```bash ---singleEnd --reads '*.fastq' -``` +> We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. -It is not possible to run a mixture of single-end and paired-end files in one run. +The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to see if your system is available in these configs please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). +Note that multiple profiles can be loaded, for example: `-profile test,docker` - the order of arguments is important! +They are loaded in sequence, so later profiles can overwrite earlier profiles. -## Reference genomes +If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended. -The pipeline config files come bundled with paths to the illumina iGenomes reference index files. If running with docker or AWS, the configuration is set up to use the [AWS-iGenomes](https://ewels.github.io/AWS-iGenomes/) resource. +- `docker` + - A generic configuration profile to be used with [Docker](https://docker.com/) +- `singularity` + - A generic configuration profile to be used with [Singularity](https://sylabs.io/docs/) +- `podman` + - A generic configuration profile to be used with [Podman](https://podman.io/) +- `shifter` + - A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) +- `charliecloud` + - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) +- `conda` + - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter or Charliecloud. +- `test` + - A profile with a complete configuration for automated testing + - Includes links to test data so needs no other parameters -### `--genome` (using iGenomes) -There are 31 different species supported in the iGenomes references. To run the pipeline, you must specify which to use with the `--genome` flag. +### `-resume` -You can find the keys to specify the genomes in the [iGenomes config file](../conf/igenomes.config). Common genomes that are supported are: +Specify this when restarting a pipeline. Nextflow will use cached results from any pipeline steps where the inputs are the same, continuing from where it got to previously. For input to be considered the same, not only the names must be identical but the files' contents as well. For more info about this parameter, see [this blog post](https://www.nextflow.io/blog/2019/demystifying-nextflow-resume.html). -* Human - * `--genome GRCh37` -* Mouse - * `--genome GRCm38` -* _Drosophila_ - * `--genome BDGP6` -* _S. cerevisiae_ - * `--genome 'R64-1-1'` +You can also supply a run name to resume a specific run: `-resume [run-name]`. Use the `nextflow log` command to show previous run names. -> There are numerous others - check the config file for more. +### `-c` -Note that you can use the same configuration setup to save sets of reference files for your own use, even if they are not part of the iGenomes resource. See the [Nextflow documentation](https://www.nextflow.io/docs/latest/config.html) for instructions on where to save such a file. +Specify the path to a specific config file (this is a core Nextflow command). See the [nf-core website documentation](https://nf-co.re/usage/configuration) for more information. -The syntax for this reference configuration is as follows: +## Custom configuration -```nextflow -params { - genomes { - 'GRCh37' { - fasta = '' // Used if no star index given - } - // Any number of additional genomes, key is used with --genome - } -} -``` +### Resource requests -### `--fasta` -If you prefer, you can specify the full path to your reference genome when you run the pipeline: +Whilst the default requirements set within the pipeline will hopefully work for most people and with most input data, you may find that you want to customise the compute resources that the pipeline requests. Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with any of the error codes specified [here](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L18) it will automatically be resubmitted with higher requests (2 x original, then 3 x original). If it still fails after the third attempt then the pipeline execution is stopped. -```bash ---fasta '[path to Fasta reference]' -``` +For example, if the nf-core/rnaseq pipeline is failing after multiple re-submissions of the `STAR_ALIGN` process due to an exit code of `137` this would indicate that there is an out of memory issue: -### `--igenomesIgnore` -Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`. +```console +[62/149eb0] NOTE: Process `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) -- Execution is retried (1) +Error executing process > 'NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)' -## Job resources -### Automatic resubmission -Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with an error code of `143` (exceeded requested resources) it will automatically resubmit with higher requests (2 x original, then 3 x original). If it still fails after three times then the pipeline is stopped. +Caused by: + Process `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) -### Custom resource requests -Wherever process-specific requirements are set in the pipeline, the default value can be changed by creating a custom config file. See the files hosted at [`nf-core/configs`](https://github.com/nf-core/configs/tree/master/conf) for examples. +Command executed: + STAR \ + --genomeDir star \ + --readFilesIn WT_REP1_trimmed.fq.gz \ + --runThreadN 2 \ + --outFileNamePrefix WT_REP1. \ + -If you are likely to be running `nf-core` pipelines regularly it may be a good idea to request that your custom config file is uploaded to the `nf-core/configs` git repository. Before you do this please can you test that the config file works with your pipeline of choice using the `-c` parameter (see definition below). You can then create a pull request to the `nf-core/configs` repository with the addition of your config file, associated documentation file (see examples in [`nf-core/configs/docs`](https://github.com/nf-core/configs/tree/master/docs)), and amending [`nfcore_custom.config`](https://github.com/nf-core/configs/blob/master/nfcore_custom.config) to include your custom profile. +Command exit status: + 137 -If you have any questions or issues please send us a message on [Slack](https://nf-core-invite.herokuapp.com/). +Command output: + (empty) -## AWS Batch specific parameters -Running the pipeline on AWS Batch requires a couple of specific parameters to be set according to your AWS Batch configuration. Please use the `-awsbatch` profile and then specify all of the following parameters. -### `--awsqueue` -The JobQueue that you intend to use on AWS Batch. -### `--awsregion` -The AWS region to run your job in. Default is set to `eu-west-1` but can be adjusted to your needs. +Command error: + .command.sh: line 9: 30 Killed STAR --genomeDir star --readFilesIn WT_REP1_trimmed.fq.gz --runThreadN 2 --outFileNamePrefix WT_REP1. +Work dir: + /home/pipelinetest/work/9d/172ca5881234073e8d76f2a19c88fb -Please make sure to also set the `-w/--work-dir` and `--outdir` parameters to a S3 storage bucket of your choice - you'll get an error message notifying you if you didn't. +Tip: you can replicate the issue by changing to the process work dir and entering the command `bash .command.run` +``` -## Other command line parameters +To bypass this error you would need to find exactly which resources are set by the `STAR_ALIGN` process. The quickest way is to search for `process STAR_ALIGN` in the [nf-core/rnaseq Github repo](https://github.com/nf-core/rnaseq/search?q=process+STAR_ALIGN). +We have standardised the structure of Nextflow DSL2 pipelines such that all module files will be present in the `modules/` directory and so, based on the search results, the file we want is `modules/nf-core/software/star/align/main.nf`. +If you click on the link to that file you will notice that there is a `label` directive at the top of the module that is set to [`label process_high`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/modules/nf-core/software/star/align/main.nf#L9). +The [Nextflow `label`](https://www.nextflow.io/docs/latest/process.html#label) directive allows us to organise workflow processes in separate groups which can be referenced in a configuration file to select and configure subset of processes having similar computing requirements. +The default values for the `process_high` label are set in the pipeline's [`base.config`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L33-L37) which in this case is defined as 72GB. +Providing you haven't set any other standard nf-core parameters to **cap** the [maximum resources](https://nf-co.re/usage/configuration#max-resources) used by the pipeline then we can try and bypass the `STAR_ALIGN` process failure by creating a custom config file that sets at least 72GB of memory, in this case increased to 100GB. +The custom config below can then be provided to the pipeline via the [`-c`](#-c) parameter as highlighted in previous sections. -### `--outdir` -The output directory where the results will be saved. +```nextflow +process { + withName: 'NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN' { + memory = 100.GB + } +} +``` -### `--email` -Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run. +> **NB:** We specify the full process name i.e. `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN` in the config file because this takes priority over the short name (`STAR_ALIGN`) and allows existing configuration using the full process name to be correctly overridden. +> +> If you get a warning suggesting that the process selector isn't recognised check that the process name has been specified correctly. -### `-name` -Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic. +### Updating containers -This is used in the MultiQC report (if not default) and in the summary HTML / e-mail (always). +The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. If for some reason you need to use a different version of a particular tool with the pipeline then you just need to identify the `process` name and override the Nextflow `container` definition for that process using the `withName` declaration. For example, in the [nf-core/viralrecon](https://nf-co.re/viralrecon) pipeline a tool called [Pangolin](https://github.com/cov-lineages/pangolin) has been used during the COVID-19 pandemic to assign lineages to SARS-CoV-2 genome sequenced samples. Given that the lineage assignments change quite frequently it doesn't make sense to re-release the nf-core/viralrecon everytime a new version of Pangolin has been released. However, you can override the default container used by the pipeline by creating a custom config file and passing it as a command-line argument via `-c custom.config`. -**NB:** Single hyphen (core Nextflow option) +1. Check the default version used by the pipeline in the module file for [Pangolin](https://github.com/nf-core/viralrecon/blob/a85d5969f9025409e3618d6c280ef15ce417df65/modules/nf-core/software/pangolin/main.nf#L14-L19) +2. Find the latest version of the Biocontainer available on [Quay.io](https://quay.io/repository/biocontainers/pangolin?tag=latest&tab=tags) +3. Create the custom config accordingly: -### `-resume` -Specify this when restarting a pipeline. Nextflow will used cached results from any pipeline steps where the inputs are the same, continuing from where it got to previously. + - For Docker: -You can also supply a run name to resume a specific run: `-resume [run-name]`. Use the `nextflow log` command to show previous run names. + ```nextflow + process { + withName: PANGOLIN { + container = 'quay.io/biocontainers/pangolin:3.0.5--pyhdfd78af_0' + } + } + ``` -**NB:** Single hyphen (core Nextflow option) + - For Singularity: -### `-c` -Specify the path to a specific config file (this is a core NextFlow command). + ```nextflow + process { + withName: PANGOLIN { + container = 'https://depot.galaxyproject.org/singularity/pangolin:3.0.5--pyhdfd78af_0' + } + } + ``` -**NB:** Single hyphen (core Nextflow option) + - For Conda: -Note - you can use this to override pipeline defaults. + ```nextflow + process { + withName: PANGOLIN { + conda = 'bioconda::pangolin=3.0.5' + } + } + ``` -### `--custom_config_version` -Provide git commit id for custom Institutional configs hosted at `nf-core/configs`. This was implemented for reproducibility purposes. Default is set to `master`. +> **NB:** If you wish to periodically update individual tool-specific results (e.g. Pangolin) generated by the pipeline then you must ensure to keep the `work/` directory otherwise the `-resume` ability of the pipeline will be compromised and it will restart from scratch. -```bash -## Download and use config file with following git commid id ---custom_config_version d52db660777c4bf36546ddb188ec530c3ada1b96 -``` +### nf-core/configs -### `--custom_config_base` -If you're running offline, nextflow will not be able to fetch the institutional config files -from the internet. If you don't need them, then this is not a problem. If you do need them, -you should download the files from the repo and tell nextflow where to find them with the -`custom_config_base` option. For example: +In most cases, you will only need to create a custom config as a one-off but if you and others within your organisation are likely to be running nf-core pipelines regularly and need to use the same settings regularly it may be a good idea to request that your custom config file is uploaded to the `nf-core/configs` git repository. Before you do this please can you test that the config file works with your pipeline of choice using the `-c` parameter. You can then create a pull request to the `nf-core/configs` repository with the addition of your config file, associated documentation file (see examples in [`nf-core/configs/docs`](https://github.com/nf-core/configs/tree/master/docs)), and amending [`nfcore_custom.config`](https://github.com/nf-core/configs/blob/master/nfcore_custom.config) to include your custom profile. -```bash -## Download and unzip the config files -cd /path/to/my/configs -wget https://github.com/nf-core/configs/archive/master.zip -unzip master.zip - -## Run the pipeline -cd /path/to/my/data -nextflow run /path/to/pipeline/ --custom_config_base /path/to/my/configs/configs-master/ -``` +See the main [Nextflow documentation](https://www.nextflow.io/docs/latest/config.html) for more information about creating your own configuration files. -> Note that the nf-core/tools helper package has a `download` command to download all required pipeline -> files + singularity containers + institutional configs in one go for you, to make this process easier. +If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack) on the [`#configs` channel](https://nfcore.slack.com/channels/configs). -### `--max_memory` -Use to set a top-limit for the default memory requirement for each process. -Should be a string in the format integer-unit. eg. `--max_memory '8.GB'` +## Azure Resource Requests -### `--max_time` -Use to set a top-limit for the default time requirement for each process. -Should be a string in the format integer-unit. eg. `--max_time '2.h'` +To be used with the `azurebatch` profile by specifying the `-profile azurebatch`. +We recommend providing a compute `params.vm_type` of `Standard_D16_v3` VMs by default but these options can be changed if required. -### `--max_cpus` -Use to set a top-limit for the default CPU requirement for each process. -Should be a string in the format integer-unit. eg. `--max_cpus 1` +Note that the choice of VM size depends on your quota and the overall workload during the analysis. +For a thorough list, please refer the [Azure Sizes for virtual machines in Azure](https://docs.microsoft.com/en-us/azure/virtual-machines/sizes). -### `--plaintext_email` -Set to receive plain-text e-mails instead of HTML formatted. +## Running in the background -### `--monochrome_logs` -Set to disable colourful command line output and live life in monochrome. +Nextflow handles job submissions and supervises the running jobs. The Nextflow process must run until the pipeline is finished. -### `--multiqc_config` -Specify a path to a custom MultiQC configuration file. +The Nextflow `-bg` flag launches Nextflow in the background, detached from your terminal so that the workflow does not stop if you log out of your session. The logs are saved to a file. -### `--chrom_sizes` -Specify a path to a file listing the number of nucleotides on each chromosome, for the reference quenome in question. +Alternatively, you can use `screen` / `tmux` or similar tool to create a detached session which you can log back into at a later time. +Some HPC setups also allow you to run nextflow within a cluster job submitted your job scheduler (from where it submits more jobs). -### `--hisat_indices` -Specify a path to the Hisat2 index directory. If not provided, hese indices will be generated the first time this pipeline is executed. +## Nextflow memory requirements -### `--genome_refseq` -Specify a path to the RefSeq genome annotation file. Optional, but useful to collect stats via RseQC. +In some cases, the Nextflow Java virtual machines can start to request a large amount of memory. +We recommend adding the following line to your environment to limit this (typically in `~/.bashrc` or `~./bash_profile`): -### `--sras` -Specify a path to a directory (can use regular expressions) containing SRR files obtained from the Gene Expression Omnibus (GEO) platform. This is an alternative to providing fastq files if re-analizing existing public datasets. +```bash +NXF_OPTS='-Xms1g -Xmx4g' +``` diff --git a/environment.yml b/environment.yml deleted file mode 100644 index 3f2240bc..00000000 --- a/environment.yml +++ /dev/null @@ -1,18 +0,0 @@ -name: nf-core-nascent-1.0 -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - fastqc=0.11.8 - - multiqc=1.7 - - hisat2=2.1.0 - - samtools=1.9 - - preseq=2.0.3 - - seqkit=0.10.1 - - bedtools=2.28.0 - - igvtools=2.3.93 - - bbmap=38.22 - - fastx_toolkit=0.0.14 - - sra-tools=2.9.1 - - rseqc=3.0.0 diff --git a/lib/Headers.groovy b/lib/Headers.groovy new file mode 100644 index 00000000..15d1d388 --- /dev/null +++ b/lib/Headers.groovy @@ -0,0 +1,43 @@ +/* + * This file holds several functions used to render the nf-core ANSI header. + */ + +class Headers { + + private static Map log_colours(Boolean monochrome_logs) { + Map colorcodes = [:] + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['yellow_bold'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + colorcodes['red'] = monochrome_logs ? '' : "\033[1;91m" + return colorcodes + } + + static String dashed_line(monochrome_logs) { + Map colors = log_colours(monochrome_logs) + return "-${colors.dim}----------------------------------------------------${colors.reset}-" + } + + static String nf_core(workflow, monochrome_logs) { + Map colors = log_colours(monochrome_logs) + String.format( + """\n + ${dashed_line(monochrome_logs)} + ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} + ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} + ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} + ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} + ${colors.green}`._,._,\'${colors.reset} + ${colors.purple} ${workflow.manifest.name} v${workflow.manifest.version}${colors.reset} + ${dashed_line(monochrome_logs)} + """.stripIndent() + ) + } +} diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy new file mode 100755 index 00000000..b3d092f8 --- /dev/null +++ b/lib/NfcoreSchema.groovy @@ -0,0 +1,529 @@ +// +// This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. +// + +import org.everit.json.schema.Schema +import org.everit.json.schema.loader.SchemaLoader +import org.everit.json.schema.ValidationException +import org.json.JSONObject +import org.json.JSONTokener +import org.json.JSONArray +import groovy.json.JsonSlurper +import groovy.json.JsonBuilder + +class NfcoreSchema { + + // + // Resolve Schema path relative to main workflow directory + // + public static String getSchemaPath(workflow, schema_filename='nextflow_schema.json') { + return "${workflow.projectDir}/${schema_filename}" + } + + // + // Function to loop over all parameters defined in schema and check + // whether the given parameters adhere to the specifications + // + /* groovylint-disable-next-line UnusedPrivateMethodParameter */ + public static void validateParameters(workflow, params, log, schema_filename='nextflow_schema.json') { + def has_error = false + //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + // Check for nextflow core params and unexpected params + def json = new File(getSchemaPath(workflow, schema_filename=schema_filename)).text + def Map schemaParams = (Map) new JsonSlurper().parseText(json).get('definitions') + def nf_params = [ + // Options for base `nextflow` command + 'bg', + 'c', + 'C', + 'config', + 'd', + 'D', + 'dockerize', + 'h', + 'log', + 'q', + 'quiet', + 'syslog', + 'v', + 'version', + + // Options for `nextflow run` command + 'ansi', + 'ansi-log', + 'bg', + 'bucket-dir', + 'c', + 'cache', + 'config', + 'dsl2', + 'dump-channels', + 'dump-hashes', + 'E', + 'entry', + 'latest', + 'lib', + 'main-script', + 'N', + 'name', + 'offline', + 'params-file', + 'pi', + 'plugins', + 'poll-interval', + 'pool-size', + 'profile', + 'ps', + 'qs', + 'queue-size', + 'r', + 'resume', + 'revision', + 'stdin', + 'stub', + 'stub-run', + 'test', + 'w', + 'with-charliecloud', + 'with-conda', + 'with-dag', + 'with-docker', + 'with-mpi', + 'with-notification', + 'with-podman', + 'with-report', + 'with-singularity', + 'with-timeline', + 'with-tower', + 'with-trace', + 'with-weblog', + 'without-docker', + 'without-podman', + 'work-dir' + ] + def unexpectedParams = [] + + // Collect expected parameters from the schema + def expectedParams = [] + def enums = [:] + for (group in schemaParams) { + for (p in group.value['properties']) { + expectedParams.push(p.key) + if (group.value['properties'][p.key].containsKey('enum')) { + enums[p.key] = group.value['properties'][p.key]['enum'] + } + } + } + + for (specifiedParam in params.keySet()) { + // nextflow params + if (nf_params.contains(specifiedParam)) { + log.error "ERROR: You used a core Nextflow option with two hyphens: '--${specifiedParam}'. Please resubmit with '-${specifiedParam}'" + has_error = true + } + // unexpected params + def params_ignore = params.schema_ignore_params.split(',') + 'schema_ignore_params' + def expectedParamsLowerCase = expectedParams.collect{ it.replace("-", "").toLowerCase() } + def specifiedParamLowerCase = specifiedParam.replace("-", "").toLowerCase() + def isCamelCaseBug = (specifiedParam.contains("-") && !expectedParams.contains(specifiedParam) && expectedParamsLowerCase.contains(specifiedParamLowerCase)) + if (!expectedParams.contains(specifiedParam) && !params_ignore.contains(specifiedParam) && !isCamelCaseBug) { + // Temporarily remove camelCase/camel-case params #1035 + def unexpectedParamsLowerCase = unexpectedParams.collect{ it.replace("-", "").toLowerCase()} + if (!unexpectedParamsLowerCase.contains(specifiedParamLowerCase)){ + unexpectedParams.push(specifiedParam) + } + } + } + + //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + // Validate parameters against the schema + InputStream input_stream = new File(getSchemaPath(workflow, schema_filename=schema_filename)).newInputStream() + JSONObject raw_schema = new JSONObject(new JSONTokener(input_stream)) + + // Remove anything that's in params.schema_ignore_params + raw_schema = removeIgnoredParams(raw_schema, params) + + Schema schema = SchemaLoader.load(raw_schema) + + // Clean the parameters + def cleanedParams = cleanParameters(params) + + // Convert to JSONObject + def jsonParams = new JsonBuilder(cleanedParams) + JSONObject params_json = new JSONObject(jsonParams.toString()) + + // Validate + try { + schema.validate(params_json) + } catch (ValidationException e) { + println '' + log.error 'ERROR: Validation of pipeline parameters failed!' + JSONObject exceptionJSON = e.toJSON() + printExceptions(exceptionJSON, params_json, log, enums) + println '' + has_error = true + } + + // Check for unexpected parameters + if (unexpectedParams.size() > 0) { + Map colors = NfcoreTemplate.logColours(params.monochrome_logs) + println '' + def warn_msg = 'Found unexpected parameters:' + for (unexpectedParam in unexpectedParams) { + warn_msg = warn_msg + "\n* --${unexpectedParam}: ${params[unexpectedParam].toString()}" + } + log.warn warn_msg + log.info "- ${colors.dim}Ignore this warning: params.schema_ignore_params = \"${unexpectedParams.join(',')}\" ${colors.reset}" + println '' + } + + if (has_error) { + System.exit(1) + } + } + + // + // Beautify parameters for --help + // + public static String paramsHelp(workflow, params, command, schema_filename='nextflow_schema.json') { + Map colors = NfcoreTemplate.logColours(params.monochrome_logs) + Integer num_hidden = 0 + String output = '' + output += 'Typical pipeline command:\n\n' + output += " ${colors.cyan}${command}${colors.reset}\n\n" + Map params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) + Integer max_chars = paramsMaxChars(params_map) + 1 + Integer desc_indent = max_chars + 14 + Integer dec_linewidth = 160 - desc_indent + for (group in params_map.keySet()) { + Integer num_params = 0 + String group_output = colors.underlined + colors.bold + group + colors.reset + '\n' + def group_params = params_map.get(group) // This gets the parameters of that particular group + for (param in group_params.keySet()) { + if (group_params.get(param).hidden && !params.show_hidden_params) { + num_hidden += 1 + continue; + } + def type = '[' + group_params.get(param).type + ']' + def description = group_params.get(param).description + def defaultValue = group_params.get(param).default != null ? " [default: " + group_params.get(param).default.toString() + "]" : '' + def description_default = description + colors.dim + defaultValue + colors.reset + // Wrap long description texts + // Loosely based on https://dzone.com/articles/groovy-plain-text-word-wrap + if (description_default.length() > dec_linewidth){ + List olines = [] + String oline = "" // " " * indent + description_default.split(" ").each() { wrd -> + if ((oline.size() + wrd.size()) <= dec_linewidth) { + oline += wrd + " " + } else { + olines += oline + oline = wrd + " " + } + } + olines += oline + description_default = olines.join("\n" + " " * desc_indent) + } + group_output += " --" + param.padRight(max_chars) + colors.dim + type.padRight(10) + colors.reset + description_default + '\n' + num_params += 1 + } + group_output += '\n' + if (num_params > 0){ + output += group_output + } + } + if (num_hidden > 0){ + output += colors.dim + "!! Hiding $num_hidden params, use --show_hidden_params to show them !!\n" + colors.reset + } + output += NfcoreTemplate.dashedLine(params.monochrome_logs) + return output + } + + // + // Groovy Map summarising parameters/workflow options used by the pipeline + // + public static LinkedHashMap paramsSummaryMap(workflow, params, schema_filename='nextflow_schema.json') { + // Get a selection of core Nextflow workflow options + def Map workflow_summary = [:] + if (workflow.revision) { + workflow_summary['revision'] = workflow.revision + } + workflow_summary['runName'] = workflow.runName + if (workflow.containerEngine) { + workflow_summary['containerEngine'] = workflow.containerEngine + } + if (workflow.container) { + workflow_summary['container'] = workflow.container + } + workflow_summary['launchDir'] = workflow.launchDir + workflow_summary['workDir'] = workflow.workDir + workflow_summary['projectDir'] = workflow.projectDir + workflow_summary['userName'] = workflow.userName + workflow_summary['profile'] = workflow.profile + workflow_summary['configFiles'] = workflow.configFiles.join(', ') + + // Get pipeline parameters defined in JSON Schema + def Map params_summary = [:] + def params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) + for (group in params_map.keySet()) { + def sub_params = new LinkedHashMap() + def group_params = params_map.get(group) // This gets the parameters of that particular group + for (param in group_params.keySet()) { + if (params.containsKey(param)) { + def params_value = params.get(param) + def schema_value = group_params.get(param).default + def param_type = group_params.get(param).type + if (schema_value != null) { + if (param_type == 'string') { + if (schema_value.contains('$projectDir') || schema_value.contains('${projectDir}')) { + def sub_string = schema_value.replace('\$projectDir', '') + sub_string = sub_string.replace('\${projectDir}', '') + if (params_value.contains(sub_string)) { + schema_value = params_value + } + } + if (schema_value.contains('$params.outdir') || schema_value.contains('${params.outdir}')) { + def sub_string = schema_value.replace('\$params.outdir', '') + sub_string = sub_string.replace('\${params.outdir}', '') + if ("${params.outdir}${sub_string}" == params_value) { + schema_value = params_value + } + } + } + } + + // We have a default in the schema, and this isn't it + if (schema_value != null && params_value != schema_value) { + sub_params.put(param, params_value) + } + // No default in the schema, and this isn't empty + else if (schema_value == null && params_value != "" && params_value != null && params_value != false) { + sub_params.put(param, params_value) + } + } + } + params_summary.put(group, sub_params) + } + return [ 'Core Nextflow options' : workflow_summary ] << params_summary + } + + // + // Beautify parameters for summary and return as string + // + public static String paramsSummaryLog(workflow, params) { + Map colors = NfcoreTemplate.logColours(params.monochrome_logs) + String output = '' + def params_map = paramsSummaryMap(workflow, params) + def max_chars = paramsMaxChars(params_map) + for (group in params_map.keySet()) { + def group_params = params_map.get(group) // This gets the parameters of that particular group + if (group_params) { + output += colors.bold + group + colors.reset + '\n' + for (param in group_params.keySet()) { + output += " " + colors.blue + param.padRight(max_chars) + ": " + colors.green + group_params.get(param) + colors.reset + '\n' + } + output += '\n' + } + } + output += "!! Only displaying parameters that differ from the pipeline defaults !!\n" + output += NfcoreTemplate.dashedLine(params.monochrome_logs) + return output + } + + // + // Loop over nested exceptions and print the causingException + // + private static void printExceptions(ex_json, params_json, log, enums, limit=5) { + def causingExceptions = ex_json['causingExceptions'] + if (causingExceptions.length() == 0) { + def m = ex_json['message'] =~ /required key \[([^\]]+)\] not found/ + // Missing required param + if (m.matches()) { + log.error "* Missing required parameter: --${m[0][1]}" + } + // Other base-level error + else if (ex_json['pointerToViolation'] == '#') { + log.error "* ${ex_json['message']}" + } + // Error with specific param + else { + def param = ex_json['pointerToViolation'] - ~/^#\// + def param_val = params_json[param].toString() + if (enums.containsKey(param)) { + def error_msg = "* --${param}: '${param_val}' is not a valid choice (Available choices" + if (enums[param].size() > limit) { + log.error "${error_msg} (${limit} of ${enums[param].size()}): ${enums[param][0..limit-1].join(', ')}, ... )" + } else { + log.error "${error_msg}: ${enums[param].join(', ')})" + } + } else { + log.error "* --${param}: ${ex_json['message']} (${param_val})" + } + } + } + for (ex in causingExceptions) { + printExceptions(ex, params_json, log, enums) + } + } + + // + // Remove an element from a JSONArray + // + private static JSONArray removeElement(json_array, element) { + def list = [] + int len = json_array.length() + for (int i=0;i + if(raw_schema.keySet().contains('definitions')){ + raw_schema.definitions.each { definition -> + for (key in definition.keySet()){ + if (definition[key].get("properties").keySet().contains(ignore_param)){ + // Remove the param to ignore + definition[key].get("properties").remove(ignore_param) + // If the param was required, change this + if (definition[key].has("required")) { + def cleaned_required = removeElement(definition[key].required, ignore_param) + definition[key].put("required", cleaned_required) + } + } + } + } + } + if(raw_schema.keySet().contains('properties') && raw_schema.get('properties').keySet().contains(ignore_param)) { + raw_schema.get("properties").remove(ignore_param) + } + if(raw_schema.keySet().contains('required') && raw_schema.required.contains(ignore_param)) { + def cleaned_required = removeElement(raw_schema.required, ignore_param) + raw_schema.put("required", cleaned_required) + } + } + return raw_schema + } + + // + // Clean and check parameters relative to Nextflow native classes + // + private static Map cleanParameters(params) { + def new_params = params.getClass().newInstance(params) + for (p in params) { + // remove anything evaluating to false + if (!p['value']) { + new_params.remove(p.key) + } + // Cast MemoryUnit to String + if (p['value'].getClass() == nextflow.util.MemoryUnit) { + new_params.replace(p.key, p['value'].toString()) + } + // Cast Duration to String + if (p['value'].getClass() == nextflow.util.Duration) { + new_params.replace(p.key, p['value'].toString().replaceFirst(/d(?!\S)/, "day")) + } + // Cast LinkedHashMap to String + if (p['value'].getClass() == LinkedHashMap) { + new_params.replace(p.key, p['value'].toString()) + } + } + return new_params + } + + // + // This function tries to read a JSON params file + // + private static LinkedHashMap paramsLoad(String json_schema) { + def params_map = new LinkedHashMap() + try { + params_map = paramsRead(json_schema) + } catch (Exception e) { + println "Could not read parameters settings from JSON. $e" + params_map = new LinkedHashMap() + } + return params_map + } + + // + // Method to actually read in JSON file using Groovy. + // Group (as Key), values are all parameters + // - Parameter1 as Key, Description as Value + // - Parameter2 as Key, Description as Value + // .... + // Group + // - + private static LinkedHashMap paramsRead(String json_schema) throws Exception { + def json = new File(json_schema).text + def Map schema_definitions = (Map) new JsonSlurper().parseText(json).get('definitions') + def Map schema_properties = (Map) new JsonSlurper().parseText(json).get('properties') + /* Tree looks like this in nf-core schema + * definitions <- this is what the first get('definitions') gets us + group 1 + title + description + properties + parameter 1 + type + description + parameter 2 + type + description + group 2 + title + description + properties + parameter 1 + type + description + * properties <- parameters can also be ungrouped, outside of definitions + parameter 1 + type + description + */ + + // Grouped params + def params_map = new LinkedHashMap() + schema_definitions.each { key, val -> + def Map group = schema_definitions."$key".properties // Gets the property object of the group + def title = schema_definitions."$key".title + def sub_params = new LinkedHashMap() + group.each { innerkey, value -> + sub_params.put(innerkey, value) + } + params_map.put(title, sub_params) + } + + // Ungrouped params + def ungrouped_params = new LinkedHashMap() + schema_properties.each { innerkey, value -> + ungrouped_params.put(innerkey, value) + } + params_map.put("Other parameters", ungrouped_params) + + return params_map + } + + // + // Get maximum number of characters across all parameter names + // + private static Integer paramsMaxChars(params_map) { + Integer max_chars = 0 + for (group in params_map.keySet()) { + def group_params = params_map.get(group) // This gets the parameters of that particular group + for (param in group_params.keySet()) { + if (param.size() > max_chars) { + max_chars = param.size() + } + } + } + return max_chars + } +} diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy new file mode 100755 index 00000000..27feb009 --- /dev/null +++ b/lib/NfcoreTemplate.groovy @@ -0,0 +1,313 @@ +// +// This file holds several functions used within the nf-core pipeline template. +// + +import org.yaml.snakeyaml.Yaml + +class NfcoreTemplate { + + // + // Check AWS Batch related parameters have been specified correctly + // + public static void awsBatch(workflow, params) { + if (workflow.profile.contains('awsbatch')) { + // Check params.awsqueue and params.awsregion have been set if running on AWSBatch + assert (params.awsqueue && params.awsregion) : "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" + // Check outdir paths to be S3 buckets if running on AWSBatch + assert params.outdir.startsWith('s3:') : "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" + } + } + + // + // Warn if a -profile or Nextflow config has not been provided to run the pipeline + // + public static void checkConfigProvided(workflow, log) { + if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { + log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + + "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + + " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + + " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + + " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + + "Please refer to the quick start section and usage docs for the pipeline.\n " + } + } + + // + // Construct and send completion email + // + public static void email(workflow, params, summary_params, projectDir, log, multiqc_report=[]) { + + // Set up the e-mail variables + def subject = "[$workflow.manifest.name] Successful: $workflow.runName" + if (!workflow.success) { + subject = "[$workflow.manifest.name] FAILED: $workflow.runName" + } + + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['Date Started'] = workflow.start + misc_fields['Date Completed'] = workflow.complete + misc_fields['Pipeline script file path'] = workflow.scriptFile + misc_fields['Pipeline script hash ID'] = workflow.scriptId + if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository + if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId + if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision + misc_fields['Nextflow Version'] = workflow.nextflow.version + misc_fields['Nextflow Build'] = workflow.nextflow.build + misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + + def email_fields = [:] + email_fields['version'] = workflow.manifest.version + email_fields['runName'] = workflow.runName + email_fields['success'] = workflow.success + email_fields['dateComplete'] = workflow.complete + email_fields['duration'] = workflow.duration + email_fields['exitStatus'] = workflow.exitStatus + email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + email_fields['errorReport'] = (workflow.errorReport ?: 'None') + email_fields['commandLine'] = workflow.commandLine + email_fields['projectDir'] = workflow.projectDir + email_fields['summary'] = summary << misc_fields + + // On success try attach the multiqc report + def mqc_report = null + try { + if (workflow.success) { + mqc_report = multiqc_report.getVal() + if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { + if (mqc_report.size() > 1) { + log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" + } + mqc_report = mqc_report[0] + } + } + } catch (all) { + if (multiqc_report) { + log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" + } + } + + // Check if we are only sending emails on failure + def email_address = params.email + if (!params.email && params.email_on_fail && !workflow.success) { + email_address = params.email_on_fail + } + + // Render the TXT template + def engine = new groovy.text.GStringTemplateEngine() + def tf = new File("$projectDir/assets/email_template.txt") + def txt_template = engine.createTemplate(tf).make(email_fields) + def email_txt = txt_template.toString() + + // Render the HTML template + def hf = new File("$projectDir/assets/email_template.html") + def html_template = engine.createTemplate(hf).make(email_fields) + def email_html = html_template.toString() + + // Render the sendmail template + def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit + def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] + def sf = new File("$projectDir/assets/sendmail_template.txt") + def sendmail_template = engine.createTemplate(sf).make(smail_fields) + def sendmail_html = sendmail_template.toString() + + // Send the HTML e-mail + Map colors = logColours(params.monochrome_logs) + if (email_address) { + try { + if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } + // Try to send HTML e-mail using sendmail + [ 'sendmail', '-t' ].execute() << sendmail_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" + } catch (all) { + // Catch failures and try with plaintext + def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] + if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) { + mail_cmd += [ '-A', mqc_report ] + } + mail_cmd.execute() << email_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" + } + } + + // Write summary e-mail HTML to a file + def output_d = new File("${params.outdir}/pipeline_info/") + if (!output_d.exists()) { + output_d.mkdirs() + } + def output_hf = new File(output_d, "pipeline_report.html") + output_hf.withWriter { w -> w << email_html } + def output_tf = new File(output_d, "pipeline_report.txt") + output_tf.withWriter { w -> w << email_txt } + } + + // + // Construct and send adaptive card + // https://adaptivecards.io + // + public static void adaptivecard(workflow, params, summary_params, projectDir, log) { + def hook_url = params.hook_url + + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) misc_fields['repository'] = workflow.repository + if (workflow.commitId) misc_fields['commitid'] = workflow.commitId + if (workflow.revision) misc_fields['revision'] = workflow.revision + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + + def msg_fields = [:] + msg_fields['version'] = workflow.manifest.version + msg_fields['runName'] = workflow.runName + msg_fields['success'] = workflow.success + msg_fields['dateComplete'] = workflow.complete + msg_fields['duration'] = workflow.duration + msg_fields['exitStatus'] = workflow.exitStatus + msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + msg_fields['errorReport'] = (workflow.errorReport ?: 'None') + msg_fields['commandLine'] = workflow.commandLine + msg_fields['projectDir'] = workflow.projectDir + msg_fields['summary'] = summary << misc_fields + + // Render the JSON template + def engine = new groovy.text.GStringTemplateEngine() + def hf = new File("$projectDir/assets/adaptivecard.json") + def json_template = engine.createTemplate(hf).make(msg_fields) + def json_message = json_template.toString() + + // POST + def post = new URL(hook_url).openConnection(); + post.setRequestMethod("POST") + post.setDoOutput(true) + post.setRequestProperty("Content-Type", "application/json") + post.getOutputStream().write(json_message.getBytes("UTF-8")); + def postRC = post.getResponseCode(); + if (! postRC.equals(200)) { + log.warn(post.getErrorStream().getText()); + } + } + + // + // Print pipeline summary on completion + // + public static void summary(workflow, params, log) { + Map colors = logColours(params.monochrome_logs) + if (workflow.success) { + if (workflow.stats.ignoredCount == 0) { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" + } + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" + } + } + + // + // ANSII Colours used for terminal logging + // + public static Map logColours(Boolean monochrome_logs) { + Map colorcodes = [:] + + // Reset / Meta + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" + colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" + colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" + colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" + + // Regular Colors + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + + // Bold + colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" + colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" + colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" + colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" + colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" + colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" + colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" + colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" + + // Underline + colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" + colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" + colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" + colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" + colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" + colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" + colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" + colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" + + // High Intensity + colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" + colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" + colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" + colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" + colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" + colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" + colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" + colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" + + // Bold High Intensity + colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" + colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" + colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" + colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" + colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" + colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" + colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" + + return colorcodes + } + + // + // Does what is says on the tin + // + public static String dashedLine(monochrome_logs) { + Map colors = logColours(monochrome_logs) + return "-${colors.dim}----------------------------------------------------${colors.reset}-" + } + + // + // nf-core logo + // + public static String logo(workflow, monochrome_logs) { + Map colors = logColours(monochrome_logs) + String.format( + """\n + ${dashedLine(monochrome_logs)} + ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} + ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} + ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} + ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} + ${colors.green}`._,._,\'${colors.reset} + ${colors.purple} ${workflow.manifest.name} v${workflow.manifest.version}${colors.reset} + ${dashedLine(monochrome_logs)} + """.stripIndent() + ) + } +} diff --git a/lib/Schema.groovy b/lib/Schema.groovy new file mode 100644 index 00000000..d45d0005 --- /dev/null +++ b/lib/Schema.groovy @@ -0,0 +1,228 @@ +/* + * This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. + */ + +import groovy.json.JsonSlurper + +class Schema { + /* + * This method tries to read a JSON params file + */ + private static LinkedHashMap params_load(String json_schema) { + def params_map = new LinkedHashMap() + try { + params_map = params_read(json_schema) + } catch (Exception e) { + println "Could not read parameters settings from JSON. $e" + params_map = new LinkedHashMap() + } + return params_map + } + + /* + Method to actually read in JSON file using Groovy. + Group (as Key), values are all parameters + - Parameter1 as Key, Description as Value + - Parameter2 as Key, Description as Value + .... + Group + - + */ + private static LinkedHashMap params_read(String json_schema) throws Exception { + def json = new File(json_schema).text + def Map json_params = (Map) new JsonSlurper().parseText(json).get('definitions') + /* Tree looks like this in nf-core schema + * definitions <- this is what the first get('definitions') gets us + group 1 + title + description + properties + parameter 1 + type + description + parameter 2 + type + description + group 2 + title + description + properties + parameter 1 + type + description + */ + def params_map = new LinkedHashMap() + json_params.each { key, val -> + def Map group = json_params."$key".properties // Gets the property object of the group + def title = json_params."$key".title + def sub_params = new LinkedHashMap() + group.each { innerkey, value -> + sub_params.put(innerkey, value) + } + params_map.put(title, sub_params) + } + return params_map + } + + /* + * Get maximum number of characters across all parameter names + */ + private static Integer params_max_chars(params_map) { + Integer max_chars = 0 + for (group in params_map.keySet()) { + def group_params = params_map.get(group) // This gets the parameters of that particular group + for (param in group_params.keySet()) { + if (param.size() > max_chars) { + max_chars = param.size() + } + } + } + return max_chars + } + + /* + * Beautify parameters for --help + */ + private static String params_help(workflow, params, json_schema, command) { + String output = Headers.nf_core(workflow, params.monochrome_logs) + "\n" + output += "Typical pipeline command:\n\n" + output += " ${command}\n\n" + def params_map = params_load(json_schema) + def max_chars = params_max_chars(params_map) + 1 + for (group in params_map.keySet()) { + output += group + "\n" + def group_params = params_map.get(group) // This gets the parameters of that particular group + for (param in group_params.keySet()) { + def type = "[" + group_params.get(param).type + "]" + def description = group_params.get(param).description + output += " \u001B[1m--" + param.padRight(max_chars) + "\u001B[1m" + type.padRight(10) + description + "\n" + } + output += "\n" + } + output += Headers.dashed_line(params.monochrome_logs) + output += "\n\n" + Checks.citation(workflow) + output += "\n\n" + Headers.dashed_line(params.monochrome_logs) + return output + } + + /* + * Groovy Map summarising parameters/workflow options used by the pipeline + */ + private static LinkedHashMap params_summary_map(workflow, params, json_schema) { + // Get a selection of core Nextflow workflow options + def Map workflow_summary = [:] + if (workflow.revision) { + workflow_summary['revision'] = workflow.revision + } + workflow_summary['runName'] = workflow.runName + if (workflow.containerEngine) { + workflow_summary['containerEngine'] = "$workflow.containerEngine" + } + if (workflow.container) { + workflow_summary['container'] = "$workflow.container" + } + workflow_summary['launchDir'] = workflow.launchDir + workflow_summary['workDir'] = workflow.workDir + workflow_summary['projectDir'] = workflow.projectDir + workflow_summary['userName'] = workflow.userName + workflow_summary['profile'] = workflow.profile + workflow_summary['configFiles'] = workflow.configFiles.join(', ') + + // Get pipeline parameters defined in JSON Schema + def Map params_summary = [:] + def blacklist = ['hostnames'] + def params_map = params_load(json_schema) + for (group in params_map.keySet()) { + def sub_params = new LinkedHashMap() + def group_params = params_map.get(group) // This gets the parameters of that particular group + for (param in group_params.keySet()) { + if (params.containsKey(param) && !blacklist.contains(param)) { + def params_value = params.get(param) + def schema_value = group_params.get(param).default + def param_type = group_params.get(param).type + if (schema_value == null) { + if (param_type == 'boolean') { + schema_value = false + } + if (param_type == 'string') { + schema_value = '' + } + if (param_type == 'integer') { + schema_value = 0 + } + } else { + if (param_type == 'string') { + if (schema_value.contains('$projectDir') || schema_value.contains('${projectDir}')) { + def sub_string = schema_value.replace('\$projectDir','') + sub_string = sub_string.replace('\${projectDir}','') + if (params_value.contains(sub_string)) { + schema_value = params_value + } + } + if (schema_value.contains('$params.outdir') || schema_value.contains('${params.outdir}')) { + def sub_string = schema_value.replace('\$params.outdir','') + sub_string = sub_string.replace('\${params.outdir}','') + if ("${params.outdir}${sub_string}" == params_value) { + schema_value = params_value + } + } + } + } + + if (params_value != schema_value) { + sub_params.put("$param", params_value) + } + } + } + params_summary.put(group, sub_params) + } + return [ 'Core Nextflow options' : workflow_summary ] << params_summary + } + + /* + * Beautify parameters for summary and return as string + */ + private static String params_summary_log(workflow, params, json_schema) { + String output = Headers.nf_core(workflow, params.monochrome_logs) + "\n" + def params_map = params_summary_map(workflow, params, json_schema) + def max_chars = params_max_chars(params_map) + for (group in params_map.keySet()) { + def group_params = params_map.get(group) // This gets the parameters of that particular group + if (group_params) { + output += group + "\n" + for (param in group_params.keySet()) { + output += " \u001B[1m" + param.padRight(max_chars) + ": \u001B[1m" + group_params.get(param) + "\n" + } + output += "\n" + } + } + output += Headers.dashed_line(params.monochrome_logs) + output += "\n\n" + Checks.citation(workflow) + output += "\n\n" + Headers.dashed_line(params.monochrome_logs) + return output + } + + static String params_summary_multiqc(workflow, summary) { + String summary_section = '' + for (group in summary.keySet()) { + def group_params = summary.get(group) // This gets the parameters of that particular group + if (group_params) { + summary_section += "

$group

\n" + summary_section += "
\n" + for (param in group_params.keySet()) { + summary_section += "
$param
${group_params.get(param) ?: 'N/A'}
\n" + } + summary_section += "
\n" + } + } + + String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" + yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" + yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" + yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" + yaml_file_text += "plot_type: 'html'\n" + yaml_file_text += "data: |\n" + yaml_file_text += "${summary_section}" + return yaml_file_text + } +} diff --git a/lib/Utils.groovy b/lib/Utils.groovy new file mode 100644 index 00000000..8d030f4e --- /dev/null +++ b/lib/Utils.groovy @@ -0,0 +1,47 @@ +// +// This file holds several Groovy functions that could be useful for any Nextflow pipeline +// + +import org.yaml.snakeyaml.Yaml + +class Utils { + + // + // When running with -profile conda, warn if channels have not been set-up appropriately + // + public static void checkCondaChannels(log) { + Yaml parser = new Yaml() + def channels = [] + try { + def config = parser.load("conda config --show channels".execute().text) + channels = config.channels + } catch(NullPointerException | IOException e) { + log.warn "Could not verify conda channel configuration." + return + } + + // Check that all channels are present + // This channel list is ordered by required channel priority. + def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] + def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean + + // Check that they are in the right order + def channel_priority_violation = false + def n = required_channels_in_order.size() + for (int i = 0; i < n - 1; i++) { + channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) + } + + if (channels_missing | channel_priority_violation) { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " There is a problem with your Conda configuration!\n\n" + + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + + " Please refer to https://bioconda.github.io/\n" + + " The observed channel order is \n" + + " ${channels}\n" + + " but the following channel order is required:\n" + + " ${required_channels_in_order}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + } + } +} diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy new file mode 100755 index 00000000..de39ca38 --- /dev/null +++ b/lib/WorkflowMain.groovy @@ -0,0 +1,93 @@ +// +// This file holds several functions specific to the main.nf workflow in the nf-core/nascent pipeline +// + +class WorkflowMain { + + // + // Citation string for pipeline + // + public static String citation(workflow) { + return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + + // TODO nf-core: Add Zenodo DOI for pipeline after first release + //"* The pipeline\n" + + //" https://doi.org/10.5281/zenodo.XXXXXXX\n\n" + + "* The nf-core framework\n" + + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + + "* Software dependencies\n" + + " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" + } + + // + // Print help to screen if required + // + public static String help(workflow, params, log) { + def command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" + def help_string = '' + help_string += NfcoreTemplate.logo(workflow, params.monochrome_logs) + help_string += NfcoreSchema.paramsHelp(workflow, params, command) + help_string += '\n' + citation(workflow) + '\n' + help_string += NfcoreTemplate.dashedLine(params.monochrome_logs) + return help_string + } + + // + // Print parameter summary log to screen + // + public static String paramsSummaryLog(workflow, params, log) { + def summary_log = '' + summary_log += NfcoreTemplate.logo(workflow, params.monochrome_logs) + summary_log += NfcoreSchema.paramsSummaryLog(workflow, params) + summary_log += '\n' + citation(workflow) + '\n' + summary_log += NfcoreTemplate.dashedLine(params.monochrome_logs) + return summary_log + } + + // + // Validate parameters and print summary to screen + // + public static void initialise(workflow, params, log) { + // Print help to screen if required + if (params.help) { + log.info help(workflow, params, log) + System.exit(0) + } + + // Validate workflow parameters via the JSON schema + if (params.validate_params) { + NfcoreSchema.validateParameters(workflow, params, log) + } + + // Print parameter summary log to screen + + log.info paramsSummaryLog(workflow, params, log) + + // Check that a -profile or Nextflow config has been provided to run the pipeline + NfcoreTemplate.checkConfigProvided(workflow, log) + + // Check that conda channels are set-up correctly + if (params.enable_conda) { + Utils.checkCondaChannels(log) + } + + // Check AWS batch settings + NfcoreTemplate.awsBatch(workflow, params) + + // Check input has been provided + if (!params.input) { + log.error "Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'" + System.exit(1) + } + } + // + // Get attribute from genome config file e.g. fasta + // + public static Object getGenomeAttribute(params, attribute) { + if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { + if (params.genomes[ params.genome ].containsKey(attribute)) { + return params.genomes[ params.genome ][ attribute ] + } + } + return null + } +} diff --git a/lib/WorkflowNascent.groovy b/lib/WorkflowNascent.groovy new file mode 100755 index 00000000..8f4e2af3 --- /dev/null +++ b/lib/WorkflowNascent.groovy @@ -0,0 +1,77 @@ +// +// This file holds several functions specific to the workflow/nascent.nf in the nf-core/nascent pipeline +// + +import groovy.text.SimpleTemplateEngine + +class WorkflowNascent { + + // + // Check and validate parameters + // + public static void initialise(params, log) { + genomeExistsError(params, log) + + + if (!params.fasta) { + log.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file." + System.exit(1) + } + } + + // + // Get workflow summary for MultiQC + // + public static String paramsSummaryMultiqc(workflow, summary) { + String summary_section = '' + for (group in summary.keySet()) { + def group_params = summary.get(group) // This gets the parameters of that particular group + if (group_params) { + summary_section += "

$group

\n" + summary_section += "
\n" + for (param in group_params.keySet()) { + summary_section += "
$param
${group_params.get(param) ?: 'N/A'}
\n" + } + summary_section += "
\n" + } + } + + String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" + yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" + yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" + yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" + yaml_file_text += "plot_type: 'html'\n" + yaml_file_text += "data: |\n" + yaml_file_text += "${summary_section}" + return yaml_file_text + } + + public static String methodsDescriptionText(run_workflow, mqc_methods_yaml) { + // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file + def meta = [:] + meta.workflow = run_workflow.toMap() + meta["manifest_map"] = run_workflow.manifest.toMap() + + meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" + meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + + def methods_text = mqc_methods_yaml.text + + def engine = new SimpleTemplateEngine() + def description_html = engine.createTemplate(methods_text).make(meta) + + return description_html + }// + // Exit pipeline if incorrect --genome key provided + // + private static void genomeExistsError(params, log) { + if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { + log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + + " Currently, the available genome keys are:\n" + + " ${params.genomes.keySet().join(", ")}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + System.exit(1) + } + } +} diff --git a/lib/nfcore_external_java_deps.jar b/lib/nfcore_external_java_deps.jar new file mode 100644 index 00000000..805c8bb5 Binary files /dev/null and b/lib/nfcore_external_java_deps.jar differ diff --git a/main.nf b/main.nf index 0cb4009c..6ccd1b92 100644 --- a/main.nf +++ b/main.nf @@ -1,1207 +1,70 @@ #!/usr/bin/env nextflow /* -======================================================================================== - nf-core/nascent -======================================================================================== - nf-core/nascent Analysis Pipeline. - #### Homepage / Documentation - https://github.com/nf-core/nascent ----------------------------------------------------------------------------------------- - #### Authors - Ignacio Tripodi - Margaret Gruca -======================================================================================== - -Pipeline steps: - - 1. Pre-processing sra/fastq - 1a. SRA tools -- fastq-dump sra to generate fastq file - 1b. FastQC (pre-trim) -- perform pre-trim FastQC on fastq files - 1c. Gzip fastq -- compress fastq files for storage - - 2. Trimming - 2a. BBDuk -- trim fastq files for quality and adapters - 2b. FastQC (post-trim) -- perform post-trim FastQC on fastq files (ensure trimming performs as expected) - - 3. Mapping w/ HISAT2 -- map to genome reference file - - 4. SAMtools -- convert SAM file to BAM, index BAM, flagstat BAM - - 5. Quality control - 5a. preseq -- estimate library complexity - 5b. RSeQC -- calculate genomic coverage relative to a reference file, infer experiement (single- v. paired-end), read duplication - 5c. Pileup.sh : BBMap Suite -- genomic coverage by chromosome, GC content, pos/neg reads, intron/exon ratio - - 6. Coverage files - 6a. deepTools : normalized bigwigs - 6b. BEDTools and kentUtils : 5' bigwigs for dREG - 6c. deepTools : normalized bedgraphs - 6d. BEDTools : non-normalized bedgraphs - - 7. IGV Tools : bedGraph --> tdf +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + nf-core/nascent +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Github : https://github.com/nf-core/nascent - 8. MultiQC : generate QC report for pipeline - - 9. Pipeline report - -======= + Website: https://nf-co.re/nascent + Slack : https://nfcore.slack.com/channels/nascent +---------------------------------------------------------------------------------------- */ - -def helpMessage() { - log.info nfcoreHeader() - log.info""" - - Usage: - - The typical command for running the pipeline is as follows: - - nextflow run nf-core/nascent -profile slurm --reads '/project/*_{R1,R2}*.fastq' --outdir '/project/' - nextflow run nf-core/nascent --reads '*_R{1,2}.fastq.gz' -profile standard,docker - - Required arguments: - -profile Configuration profile to use. - --reads Directory pattern for fastq files: /project/*{R1,R2}*.fastq (Required if --sras not specified) - --sras Directory pattern for SRA files: /project/*.sras (Required if --reads not specified) - --workdir Nextflow working directory where all intermediate files are saved. - --email Where to send workflow report email. - - Performance options: - --threadfqdump Runs multi-threading for fastq-dump for sra processing. - - Input File options: - --singleEnd Specifies that the input files are not paired reads (default is paired-end). - --flip Reverse complements each strand. Necessary for some library preps. - - Save options: - --outdir Specifies where to save the output from the nextflow run. - --savefq Compresses and saves raw fastq reads. - --saveTrim Compresses and saves trimmed fastq reads. - --saveAll Compresses and saves all fastq reads. - - References If not specified in the configuration file or you wish to overwrite any of the references. - --saveReference Save the generated reference files the the Results directory. - - QC Options: - --skipMultiQC Skip running MultiQC report. - - """.stripIndent() -} - -/* - * SET UP CONFIGURATION VARIABLES - */ - -// Show help emssage -if (params.help){ - helpMessage() - exit 0 -} - -// Configurable variables -params.name = false -params.multiqc_config = "$baseDir/conf/multiqc_config.yaml" -params.email = false -params.plaintext_email = false -params.bbmap_adapters = "$baseDir/assets/adapters.fa" -params.bedGraphToBigWig = "$baseDir/bin/bedGraphToBigWig" -params.rcc = "$baseDir/bin/rcc.py" -params.workdir = "./nextflowTemp" - - -// Stage config files -ch_multiqc_config = Channel.fromPath(params.multiqc_config) -ch_output_docs = Channel.fromPath("$baseDir/docs/output.md") - -// Validate inputs - -if ( params.fasta ){ - // genome_fasta = file(params.fasta) - // if( !genome_fasta.exists() ) exit 1, "Genome directory not found: ${params.fasta}" - Channel.fromPath(params.fasta) - .ifEmpty { exit 1, "Fasta file not found: ${params.fasta}" } - .into { genome_fasta; ch_fasta_for_hisat_index} -} -else { - params.fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false -} - -if( params.chrom_sizes ){ - Channel - .fromPath(params.chrom_sizes, checkIfExists: true) - .ifEmpty { exit 1, "Chrom sizes file not found: ${params.chrom_sizes}" } - .into { chrom_sizes_for_bed; - chrom_sizes_for_bigwig; - chrom_sizes_for_igv } -} -else { - params.chrom_sizes = null -} - -if ( params.bbmap_adapters ){ - bbmap_adapters = file("${params.bbmap_adapters}") -} - -if ( params.hisat2_indices ){ - hisat2_indices = file("${params.hisat2_indices}") -} -else { - hisat2_indices = null -} - -if ( params.genome_refseq ){ - genome_refseq = file("${params.genome_refseq}") -} -else { - genome_refseq = null -} - -// Has the run name been specified by the user? -// this has the bonus effect of catching both -name and --name -custom_runName = params.name -if( !(workflow.runName ==~ /[a-z]+_[a-z]+/) ){ - custom_runName = workflow.runName -} - - -if( workflow.profile == 'awsbatch') { - // Check outdir paths to be S3 buckets if running on AWSBatch - // related: https://github.com/nextflow-io/nextflow/issues/813 - if (!params.outdir.startsWith('s3:')) exit 1, "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" - // Prevent trace files to be stored on S3 since S3 does not support rolling files. - if (workflow.tracedir.startsWith('s3:')) exit 1, "Specify a local tracedir or run without trace! S3 cannot be used for tracefiles." -} - -/* - * Create a channel for input read files - */ - -if(params.readPaths){ - if(params.singleEnd){ - Channel - .from(params.readPaths) - .map { row -> [ row[0], [file(row[1][0])]] } - .dump() - .ifEmpty { exit 1, "params.readPaths was empty - no input files supplied" } - .into { fastq_reads_qc; fastq_reads_trim; fastq_reads_gzip } - } else { - Channel - .from(params.readPaths) - .map { row -> [ row[0], [file(row[1][0]), file(row[1][1])]] } - .ifEmpty { exit 1, "params.readPaths was empty - no input files supplied" } - .dump() - .into { fastq_reads_qc; fastq_reads_trim; fastq_reads_gzip } - } -} else { - if (params.singleEnd) { - fastq_reads_qc = Channel - .fromPath(params.reads) - .map { file -> tuple(file.baseName, file) } - fastq_reads_trim = Channel - .fromPath(params.reads) - .map { file -> tuple(file.baseName, file) } - fastq_reads_gzip = Channel - .fromPath(params.reads) - .map { file -> tuple(file.baseName, file) } - } else { - Channel - .fromFilePairs( params.reads, size: params.singleEnd ? 1 : 2 ) - .ifEmpty { exit 1, "Cannot find any reads matching: ${params.reads}\nNB: Path needs to be enclosed in quotes!\nIf this is single-end data, please specify --singleEnd on the command line." } - .into { fastq_reads_qc; fastq_reads_trim; fastq_reads_gzip } - } -} - -if (params.sras) { - if (params.singleEnd) { - println("Pattern for SRAs provided") - read_files_sra = Channel - .fromPath(params.sras) - .map { file -> tuple(file.baseName, file) } - } else { - Channel - .fromFilePairs( params.sras, size: params.singleEnd ? 1 : 2 ) - .ifEmpty { exit 1, "Cannot find any reads matching: ${params.reads}\nNB: Path needs to be enclosed in quotes!\nIf this is single-end data, please specify --singleEnd on the command line." } - .into { fastq_reads_qc; fastq_reads_trim; fastq_reads_gzip } - } -} - -else { - read_files_sra = Channel.empty() - } - -// Header log info -log.info nfcoreHeader() -def summary = [:] -if(workflow.revision) summary['Pipeline Release'] = workflow.revision -summary['Run Name'] = custom_runName ?: workflow.runName -summary['Save Reference'] = params.saveReference ? 'Yes' : 'No' -if(params.reads) summary['Fastqs'] = params.reads -if(params.sras) summary['SRAs'] = params.sras -summary['Genome Ref'] = params.fasta -summary['Thread fqdump'] = params.threadfqdump ? 'YES' : 'NO' -summary['Data Type'] = params.singleEnd ? 'Single-End' : 'Paired-End' -summary['Save All fastq'] = params.saveAllfq ? 'YES' : 'NO' -summary['Save fastq'] = params.savefq ? 'YES' : 'NO' -summary['Save Trimmed'] = params.saveTrim ? 'YES' : 'NO' -summary['Reverse Comp'] = params.flip ? 'YES' : 'NO' -summary['Run MultiQC'] = params.skipMultiQC ? 'NO' : 'YES' -summary['Max Resources'] = "$params.max_memory memory, $params.max_cpus cpus, $params.max_time time per job" -if(workflow.containerEngine) summary['Container'] = "$workflow.containerEngine - $workflow.container" -summary['Output dir'] = params.outdir -summary['Launch dir'] = workflow.launchDir -summary['Working dir'] = workflow.workDir -summary['Script dir'] = workflow.projectDir -summary['User'] = workflow.userName -if(workflow.profile == 'awsbatch'){ - summary['AWS Region'] = params.awsregion - summary['AWS Queue'] = params.awsqueue -} -summary['Config Profile'] = workflow.profile -if(params.config_profile_description) summary['Config Description'] = params.config_profile_description -if(params.config_profile_contact) summary['Config Contact'] = params.config_profile_contact -if(params.config_profile_url) summary['Config URL'] = params.config_profile_url -if(params.email) { - summary['E-mail Address'] = params.email - summary['MultiQC maxsize'] = params.maxMultiqcEmailFileSize -} -log.info summary.collect { k,v -> "${k.padRight(18)}: $v" }.join("\n") -log.info "\033[2m----------------------------------------------------\033[0m" - -// Check the hostnames against configured profiles -checkHostname() - -def create_workflow_summary(summary) { - def yaml_file = workDir.resolve('workflow_summary_mqc.yaml') - yaml_file.text = """ - id: 'nf-core-nascent-summary' - description: " - this information is collected when the pipeline is started." - section_name: 'nf-core/nascent Workflow Summary' - section_href: 'https://github.com/nf-core/nascent' - plot_type: 'html' - data: | -
    -${summary.collect { k,v -> "
    $k
    ${v ?: 'N/A'}
    " }.join("\n")} -
    - """.stripIndent() - - return yaml_file -} - - -/* - * Parse software version numbers - */ -process get_software_versions { - validExitStatus 0,1,127 - publishDir "${params.outdir}/software_versions/", mode: 'copy', pattern: '*.txt' - - output: - file 'software_versions_mqc.yaml' into software_versions_yaml - - script: - """ - echo $workflow.manifest.version > v_pipeline.txt - echo $workflow.nextflow.version > v_nextflow.txt - fastqc --version > v_fastqc.txt - bbversion.sh --version > v_bbduk.txt - hisat2 --version > v_hisat2.txt - samtools --version > v_samtools.txt - fastq-dump --version > v_fastq-dump.txt - preseq > v_preseq.txt - seqkit version > v_seqkit.txt - bedtools --version > v_bedtools.txt - export LC_ALL=C - igvtools version > v_igv-tools.txt - - # Can't call this before running MultiQC or it breaks it - read_distribution.py --version > v_rseqc.txt - - for X in `ls *.txt`; do - cat \$X >> all_versions.txt; - done - scrape_software_versions.py > software_versions_mqc.yaml - """ -} - -/* - * Step 1a -- get fastq files from downloaded sras - */ - -process sra_dump { - tag "$prefix" - - input: - set val(prefix), file(reads) from read_files_sra - - output: - set val(prefix), file("*.fastq") into fastq_reads_qc_sra, fastq_reads_trim_sra, fastq_reads_gzip_sra - - script: - prefix = reads.baseName - if (!params.threadfqdump) { - """ - echo ${prefix} - - fastq-dump ${reads} - """ - } else if (!params.singleEnd) { - """ - export PATH=~/.local/bin:$PATH - - parallel-fastq-dump \ - --threads ${task.cpus} \ - --split-3 \ - --sra-id ${reads} - """ - } else if (!params.threadfqdump && !params.singleEnd) { - """ - echo ${prefix} - - fastq-dump --split-3 ${reads} - """ - } else { - """ - export PATH=~/.local/bin:$PATH - - parallel-fastq-dump \ - --threads ${task.cpus} \ - --sra-id ${reads} - """ - } -} - - -/* - * PREPROCESSING - Build HISAT2 index (borrowed from nf-core/rnaseq) - */ -if(!params.hisat2_indices && params.fasta){ - process make_hisat_index { - tag "$fasta" - publishDir path: { params.saveReference ? "${params.outdir}/reference_genome" : params.outdir }, - saveAs: { params.saveReference ? it : null }, mode: 'copy' - - input: - file fasta from ch_fasta_for_hisat_index - - output: - file "*.ht2" into hisat2_indices - - script: - if( !task.memory ){ - log.info "[HISAT2 index build] Available memory not known - defaulting to 0. Specify process memory requirements to change this." - avail_mem = 0 - } else { - log.info "[HISAT2 index build] Available memory: ${task.memory}" - avail_mem = task.memory.toGiga() - } - """ - hisat2-build -p ${task.cpus} ${fasta} ${fasta.baseName}-hisat2_index - """ - } -} - - -/* - * STEP 1b - FastQC - */ - -process fastqc { - tag "$prefix" - publishDir "${params.outdir}/qc/fastqc/", mode: 'copy', - saveAs: {filename -> filename.indexOf(".zip") > 0 ? "zips/$filename" : "$filename"} - - input: - set val(prefix), file(reads) from fastq_reads_qc.mix(fastq_reads_qc_sra) - - output: - file "*.{zip,html,txt}" into fastqc_results - - script: - prefix = reads.baseName - """ - fastqc $reads - """ -} - - -/* - *STEP 1c - Compress fastq files for storage - */ - -process gzip_fastq { - tag "$name" - publishDir "${params.outdir}/fastq", mode: 'copy' - - when: - params.savefq || params.saveAllfq - - input: - set val(name), file(fastq_reads) from fastq_reads_gzip.mix(fastq_reads_gzip_sra) - - output: - set val(name), file("*.gz") into compressed_fastq - - script: - """ - gzip -c ${name}.fastq > ${name}.fastq.gz - """ - } - - -/* - * STEP 2a - Trimming - */ - -process bbduk { - validExitStatus 0,1 - tag "$name" - publishDir "${params.outdir}/qc/trimstats", mode: 'copy', pattern: "*.txt" - - input: - set val(name), file(reads) from fastq_reads_trim.mix(fastq_reads_trim_sra) - - output: - set val(name), file ("*.trim.fastq") into trimmed_reads_fastqc, trimmed_reads_hisat2, trimmed_reads_gzip - file "*.txt" into trim_stats - - script: - bbduk_mem = task.memory.toGiga() - if (!params.singleEnd && params.flip) { - """ - echo ${name} - - seqkit seq -j 16 -r -p \ - ${name}_R1.flip.fastq \ - -o ${name}.flip.fastq - - seqkit seq -j 16 -r -p \ - ${name}_R2.flip.fastq \ - -o ${name}.flip.fastq - - - - bbduk.sh -Xmx${bbduk_mem}g \ - t=${task.cpus} \ - in=${name}_R1.flip.fastq \ - in2=${name}_R2.flip.fastq \ - out=${name}_R1.flip.trim.fastq \ - out2=${name}_R2.flip.trim.fastq \ - ref=${bbmap_adapters} \ - ktrim=r qtrim=10 k=23 mink=11 hdist=1 \ - maq=10 minlen=25 \ - tpe tbo \ - literal=AAAAAAAAAAAAAAAAAAAAAAA \ - stats=${name}.trimstats.txt \ - refstats=${name}.refstats.txt \ - ehist=${name}.ehist.txt - """ - } else if (params.flip) { - """ - echo ${name} - - - seqkit seq -j 16 -r -p \ - ${name}.fastq \ - -o ${name}.flip.fastq - - - bbduk.sh -Xmx${bbduk_mem}g \ - t=${task.cpus} \ - in=${name}.flip.fastq \ - out=${name}.flip.trim.fastq \ - ref=${bbmap_adapters} \ - ktrim=r qtrim=10 k=23 mink=11 hdist=1 \ - maq=10 minlen=25 \ - tpe tbo \ - literal=AAAAAAAAAAAAAAAAAAAAAAA \ - stats=${name}.trimstats.txt \ - refstats=${name}.refstats.txt \ - ehist=${name}.ehist.txt - """ - }else if (!params.singleEnd) { - """ - echo ${name} - - bbduk.sh -Xmx${bbduk_mem}g \ - t=${task.cpus} \ - in=${name}_R1.fastq \ - in2=${name}_R2.fastq \ - out=${name}_R1.trim.fastq \ - out2=${name}_R2.trim.fastq \ - ref=${bbmap_adapters} \ - ktrim=r qtrim=10 k=23 mink=11 hdist=1 \ - maq=10 minlen=25 \ - tpe tbo \ - literal=AAAAAAAAAAAAAAAAAAAAAAA \ - stats=${name}.trimstats.txt \ - refstats=${name}.refstats.txt \ - ehist=${name}.ehist.txt - """ - } else { - """ - echo ${name} - echo ${bbduk_mem} - - bbduk.sh -Xmx${bbduk_mem}g \ - t=${task.cpus} \ - in=${name}.fastq \ - out=${name}.trim.fastq \ - ref=${bbmap_adapters} \ - ktrim=r qtrim=10 k=23 mink=11 hdist=1 \ - maq=10 minlen=25 \ - tpe tbo \ - literal=AAAAAAAAAAAAAAAAAAAAAAA \ - stats=${name}.trimstats.txt \ - refstats=${name}.refstats.txt \ - ehist=${name}.ehist.txt - """ - } -} - - -/* - * STEP 2b - Trimmed FastQC - */ - -process fastqc_trimmed { - validExitStatus 0,1 - tag "$prefix" - publishDir "${params.outdir}/qc/fastqc/", mode: 'copy', - saveAs: {filename -> filename.indexOf(".zip") > 0 ? "zips/$filename" : "$filename"} - - input: - set val(prefix), file(trimmed_reads) from trimmed_reads_fastqc - - output: - file "*_fastqc.{zip,html,txt}" into trimmed_fastqc_results - - script: - prefix = trimmed_reads.baseName - """ - echo ${prefix} - - fastqc $trimmed_reads - extract_fastqc_stats.sh --srr=${prefix} > ${prefix}_stats_fastqc.txt - """ -} - -/* - *STEP 2c - Compress trimmed fastq files for storage - */ - -process gzip_trimmed { - tag "$prefix" - publishDir "${params.outdir}/trimmed", mode: 'copy' - - when: - params.saveTrim || params.saveAllfq - - input: - file(trimmed_reads) from trimmed_reads_gzip - - output: - set val(prefix), file("*.gz") into trimmed_gzip - - script: - prefix = trimmed_reads.baseName - """ - gzip -c $trimmed_reads > ${prefix}.fastq.gz - """ - } - - -/* - * STEP 3 - Map reads to reference genome - */ - -process hisat2 { - // NOTE: this tool sends output there even in successful (exit code 0) - // termination, so we have to ignore errors for now, and the next - // process will blow up from missing a SAM file instead. - tag "$name" - validExitStatus 0,143 - - input: - val(indices) from hisat2_indices.first() - set val(name), file(trimmed_reads) from trimmed_reads_hisat2 - - output: - set val(name), file("*.sam") into hisat2_sam - - script: - index_base = indices[0].toString() - ~/.\d.ht2/ - if (!params.singleEnd) { - """ - echo ${name} - - hisat2 -p ${task.cpus} \ - --very-sensitive \ - --no-spliced-alignment \ - -x ${index_base} \ - -1 ${name}_R1.trim.fastq \ - -2 ${name}_R2.trim.fastq - > ${name}.sam - """ - } else { - """ - echo ${name} - - hisat2 -p ${task.cpus} \ - --very-sensitive \ - --no-spliced-alignment \ - -x ${index_base}\ - -U ${trimmed_reads} \ - > ${name}.sam - """ - } -} - +nextflow.enable.dsl = 2 /* - * STEP 4 - Convert to BAM format and sort - */ - -/* - * STEP 4 - Convert to BAM format and sort - */ - -process samtools { - tag "$prefix" - publishDir "${params.outdir}/mapped/bams", mode: 'copy', pattern: "${prefix}.sorted.bam" - publishDir "${params.outdir}/mapped/bams", mode: 'copy', pattern: "${prefix}.sorted.bam.bai" - publishDir "${params.outdir}/qc/mapstats", mode: 'copy', pattern: "${prefix}.sorted.bam.flagstat" - publishDir "${params.outdir}/qc/mapstats", mode: 'copy', pattern: "${prefix}.sorted.bam.millionsmapped" - - input: - set val(name), file(mapped_sam) from hisat2_sam - - output: - set val(name), file("${prefix}.sorted.bam") into sorted_bam_ch - set val(name), file("${prefix}.sorted.bam.bai") into sorted_bam_indices_ch - set val(name), file("${prefix}.sorted.bam.flagstat") into bam_flagstat - set val(name), file("${prefix}.sorted.bam.millionsmapped") into bam_milmapped_bedgraph - - script: - prefix = mapped_sam.baseName - // Note that the millionsmapped arugments below are only good for SE data. When PE is added, it will need to be changed to: - // -F 0x40 rootname.sorted.bam | cut -f1 | sort | uniq | wc -l > rootname.bam.millionsmapped - if (!params.singleEnd) { - """ - - samtools view -@ ${task.cpus} -bS -o ${prefix}.bam ${mapped_sam} - samtools sort -@ ${task.cpus} ${prefix}.bam > ${prefix}.sorted.bam - samtools flagstat ${prefix}.sorted.bam > ${prefix}.sorted.bam.flagstat - samtools view -@ ${task.cpus} -F 0x40 ${prefix}.sorted.bam | cut -f1 | sort | uniq | wc -l > ${prefix}.sorted.bam.millionsmapped - samtools index ${prefix}.sorted.bam ${prefix}.sorted.bam.bai - """ - } else { - """ - - samtools view -@ ${task.cpus} -bS -o ${prefix}.bam ${mapped_sam} - samtools sort -@ ${task.cpus} ${prefix}.bam > ${prefix}.sorted.bam - samtools flagstat ${prefix}.sorted.bam > ${prefix}.sorted.bam.flagstat - samtools view -@ ${task.cpus} -F 0x904 -c ${prefix}.sorted.bam > ${prefix}.sorted.bam.millionsmapped - samtools index ${prefix}.sorted.bam ${prefix}.sorted.bam.bai - """ - } -} - -sorted_bam_ch - .into {sorted_bams_for_bedtools_bedgraph; sorted_bams_for_preseq; sorted_bams_for_rseqc; sorted_bams_for_dreg_prep; sorted_bams_for_pileup} - -sorted_bam_indices_ch - .into {sorted_bam_indices_for_bedtools_bedgraph; sorted_bam_indices_for_bedtools_normalized_bedgraph; sorted_bam_indicies_for_pileup; sorted_bam_indices_for_preseq; sorted_bam_indices_for_rseqc} - -/* - *STEP 5a - Plot the estimated complexity of a sample, and estimate future yields - * for complexity if the sample is sequenced at higher read depths. - */ - -process preseq { - tag "$name" - errorStrategy 'ignore' - publishDir "${params.outdir}/qc/preseq/", mode: 'copy', pattern: "*.txt" - - input: - set val(name), file(bam_file) from sorted_bams_for_preseq - file(bam_indices) from sorted_bam_indices_for_preseq - - output: - file("*.txt") into preseq_results - - script: - """ - - preseq c_curve -B -o ${name}.c_curve.txt \ - ${bam_file} - - preseq lc_extrap -B -o ${name}.lc_extrap.txt \ - ${bam_file} - """ - } - - -/* - *STEP 5b - Analyze read distributions using RSeQC - */ - -process rseqc { - tag "$name" - validExitStatus 0,143 - publishDir "${params.outdir}/qc/rseqc" , mode: 'copy', - saveAs: {filename -> - if (filename.indexOf("infer_experiment.txt") > 0) "infer_experiment/$filename" - else if (filename.indexOf("read_distribution.txt") > 0) "read_distribution/$filename" - else if (filename.indexOf("read_duplication.DupRate_plot.pdf") > 0) "read_duplication/$filename" - else if (filename.indexOf("read_duplication.DupRate_plot.r") > 0) "read_duplication/rscripts/$filename" - else if (filename.indexOf("read_duplication.pos.DupRate.xls") > 0) "read_duplication/dup_pos/$filename" - else if (filename.indexOf("read_duplication.seq.DupRate.xls") > 0) "read_duplication/dup_seq/$filename" - else if (filename.indexOf("RPKM_saturation.eRPKM.xls") > 0) "RPKM_saturation/rpkm/$filename" - else if (filename.indexOf("RPKM_saturation.rawCount.xls") > 0) "RPKM_saturation/counts/$filename" - else if (filename.indexOf("RPKM_saturation.saturation.pdf") > 0) "RPKM_saturation/$filename" - else if (filename.indexOf("RPKM_saturation.saturation.r") > 0) "RPKM_saturation/rscripts/$filename" - else filename - } - - input: - set val(name), file(bam_file) from sorted_bams_for_rseqc - file(bam_indices) from sorted_bam_indices_for_rseqc - - output: - file "*.{txt,pdf,r,xls}" into rseqc_results - - script: - """ - - read_distribution.py -i ${bam_file} \ - -r ${genome_refseq} \ - > ${name}.read_dist.txt - - read_duplication.py -i ${bam_file} \ - -o ${name}.read_duplication - - infer_experiment.py -i ${bam_file} \ - -r ${genome_refseq} \ - > ${name}.infer_experiment.txt - """ - } - - - -/* - *STEP 5c - Analyze coverage using pileup.sh - */ - -process pileup { - tag "$name" - publishDir "${params.outdir}/qc/pileup", mode: 'copy', pattern: "*.txt" - - input: - set val(name), file(bam_file) from sorted_bams_for_pileup - file(bam_indices) from sorted_bam_indicies_for_pileup - - output: - file("*.txt") into pileup_results - - script: - pileup_mem = task.memory.toGiga() - """ - - pileup.sh -Xmx${pileup_mem}g \ - in=${bam_file} \ - out=${name}.coverage.stats.txt \ - hist=${name}.coverage.hist.txt - """ - } - -/* - *STEP 6a - Create non-normalzied bedGraphs for analysis using FStitch/Tfit - */ - -process bedgraphs { - validExitStatus 0,143 - tag "$name" - publishDir "${params.outdir}/mapped/bedgraphs", mode: 'copy', pattern: "*{neg,pos}.bedGraph" - publishDir "${params.outdir}/mapped/bedgraphs", mode: 'copy', pattern: "${name}.bedGraph" - publishDir "${params.outdir}/mapped/rcc_bedgraphs", mode: 'copy', pattern: "${name}.rcc.bedGraph" - - input: - set val(name), file(bam_file) from sorted_bams_for_bedtools_bedgraph - set val(name), file(bam_indices) from sorted_bam_indices_for_bedtools_bedgraph - set val(name), file(millions_mapped) from bam_milmapped_bedgraph - - output: - set val(name), file("*.bedGraph") into non_normalized_bedgraphs - set val(name), file("${name}.rcc.bedGraph") into bedgraph_tdf - set val(name), file("${name}.pos.rcc.bedGraph") into bedgraph_bigwig_pos - set val(name), file("${name}.neg.rcc.bedGraph") into bedgraph_bigwig_neg - - script: - """ - - genomeCoverageBed \ - -bg \ - -strand + \ - -g hg38 \ - -ibam ${bam_file} \ - > ${name}.pos.bedGraph - - genomeCoverageBed \ - -bg \ - -strand - \ - -g hg38 \ - -ibam ${bam_file} \ - > ${name}.tmp.neg.bedGraph - - awk 'BEGIN{FS=OFS="\t"} {\$4=-\$4}1' ${name}.tmp.neg.bedGraph \ - > ${name}.neg.bedGraph - rm ${name}.tmp.neg.bedGraph - - cat ${name}.pos.bedGraph \ - ${name}.neg.bedGraph \ - > ${name}.unsorted.bedGraph - - sortBed \ - -i ${name}.unsorted.bedGraph \ - > ${name}.bedGraph - - rm ${name}.unsorted.bedGraph - - python ${params.rcc} \ - ${name}.bedGraph \ - ${millions_mapped} \ - ${name}.rcc.bedGraph \ - - python ${params.rcc} \ - ${name}.pos.bedGraph \ - ${millions_mapped} \ - ${name}.unsorted.pos.rcc.bedGraph - - sortBed -i ${name}.unsorted.pos.rcc.bedGraph > ${name}.pos.rcc.bedGraph - rm ${name}.unsorted.pos.rcc.bedGraph - - python ${params.rcc} \ - ${name}.neg.bedGraph \ - ${millions_mapped} \ - ${name}.unsorted.neg.rcc.bedGraph - - sortBed -i ${name}.unsorted.neg.rcc.bedGraph > ${name}.neg.rcc.bedGraph - rm ${name}.unsorted.neg.rcc.bedGraph - - """ - } - - -/* Idea borrowed from the nf-core/atacseq workflow: - * Just generate the chromosome sizes file using samtools, if not provided. - */ -if(!params.chrom_sizes) { - process make_chromosome_sizes { - tag "$fasta" - publishDir path: { params.saveReference ? "${params.outdir}/reference_genome" : params.outdir }, - saveAs: { params.saveReference ? it : null }, mode: 'copy' - - input: - file fasta from genome_fasta - - output: - file("${fasta}.sizes") into chrom_sizes_ch - - script: - """ - samtools faidx $fasta - cut -f 1,2 ${fasta}.fai > ${fasta}.sizes - """ - } -} - -chrom_sizes_ch.into{chrom_sizes_for_bed; chrom_sizes_for_bigwig; chrom_sizes_for_igv} +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + GENOME PARAMETER VALUES +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ +params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') +params.gtf = WorkflowMain.getGenomeAttribute(params, 'gtf') +params.gff = WorkflowMain.getGenomeAttribute(params, 'gff') +params.gene_bed = WorkflowMain.getGenomeAttribute(params, 'bed12') +params.bwa_index = WorkflowMain.getGenomeAttribute(params, 'bwa') +params.bwamem2_index = WorkflowMain.getGenomeAttribute(params, 'bwamem2') +params.dragmap = WorkflowMain.getGenomeAttribute(params, 'dragmap') /* - *STEP 6b - Create bedGraphs and bigwigs for dREG - */ - -process dreg_prep { - validExitStatus 0,143 - errorStrategy 'ignore' - tag "$name" - publishDir "${params.outdir}/mapped/dreg_input", mode: 'copy', pattern: "*.bw" - - input: - set val(name), file(bam_file) from sorted_bams_for_dreg_prep - file(chr_sizes) from chrom_sizes_for_bed - - output: - set val(name), file("*.bw") into dreg_bigwig - - script: - """ - - echo "Creating BigWigs suitable as inputs to dREG" - - bedtools bamtobed -i ${bam_file} | awk 'BEGIN{OFS="\t"} (\$5 > 0){print \$0}' | \ - awk 'BEGIN{OFS="\t"} (\$6 == "+") {print \$1,\$2,\$2+1,\$4,\$5,\$6}; (\$6 == "-") {print \$1, \$3-1,\$3,\$4,\$5,\$6}' \ - > ${name}.dreg.bed - sortBed -i ${name}.dreg.bed > ${name}.dreg.sort.bed - - echo positive strand processed to bedGraph - - bedtools genomecov -bg -i ${name}.dreg.sort.bed -g ${chr_sizes} -strand + > ${name}.pos.bedGraph - sortBed -i ${name}.pos.bedGraph > ${name}.pos.sort.bedGraph - bedtools genomecov -bg -i ${name}.dreg.sort.bed -g ${chr_sizes} -strand - \ - | awk 'BEGIN{OFS="\t"} {print \$1,\$2,\$3,-1*\$4}' > ${name}.neg.bedGraph - sortBed -i ${name}.neg.bedGraph > ${name}.neg.sort.bedGraph - - echo negative strand processed to bedGraph - - ${params.bedGraphToBigWig} ${name}.pos.sort.bedGraph ${chr_sizes} ${name}.pos.bw - ${params.bedGraphToBigWig} ${name}.neg.sort.bedGraph ${chr_sizes} ${name}.neg.bw +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + VALIDATE & PRINT PARAMETER SUMMARY +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ - echo bedGraph to bigwig done - """ - } +WorkflowMain.initialise(workflow, params, log) /* - *STEP 7 - Normalize bigWigs by millions of reads mapped for visualization on nascent2.0 - */ - -process normalized_bigwigs { - validExitStatus 0 - tag "$name" - publishDir "${params.outdir}/mapped/rcc_bigwig", mode: 'copy' - - input: - set val(name), file(neg_bedgraph) from bedgraph_bigwig_neg - set val(name), file(pos_bedgraph) from bedgraph_bigwig_pos - file chrom_sizes from chrom_sizes_for_bigwig - - output: - set val(name), file("*.rcc.bw") into normalized_bigwig +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + NAMED WORKFLOW FOR PIPELINE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ - script: - """ - ${params.bedGraphToBigWig} ${pos_bedgraph} ${chrom_sizes} ${name}.pos.rcc.bw - ${params.bedGraphToBigWig} ${neg_bedgraph} ${chrom_sizes} ${name}.neg.rcc.bw +include { NASCENT } from './workflows/nascent' - """ +// +// WORKFLOW: Run main nf-core/nascent analysis pipeline +// +workflow NFCORE_NASCENT { + NASCENT () } /* - *STEP 8 - IGV Tools : generate tdfs for optimal visualization in Integrative Genomics Viewer (IGV) - */ - -process igvtools { - tag "$name" - // This often blows up due to a ceiling in memory usage, so we can ignore - // and re-run later as it's non-essential. - errorStrategy 'ignore' - publishDir "${params.outdir}/mapped/tdfs", mode: 'copy', pattern: "*.tdf" - - input: - set val(name), file(normalized_bg) from bedgraph_tdf - file chrom_sizes from chrom_sizes_for_igv - - output: - set val(name), file("*.tdf") into tiled_data_ch - - script: - """ - export LC_ALL=C - igvtools toTDF ${normalized_bg} ${name}.rcc.tdf ${chrom_sizes} - """ - } - - - -/* - * STEP 9 - MultiQC - */ -process multiqc { - validExitStatus 0,1,143 - errorStrategy 'ignore' - publishDir "${params.outdir}/multiqc/", mode: 'copy', pattern: "multiqc_report.html" - publishDir "${params.outdir}/multiqc/", mode: 'copy', pattern: "*_data" - - when: - !params.skipMultiQC - - input: - file multiqc_config from ch_multiqc_config.collect() - file (fastqc:'qc/fastqc/*') from fastqc_results.collect() - file ('qc/fastqc/*') from trimmed_fastqc_results.collect() - file ('qc/trimstats/*') from trim_stats.collect() - file ('qc/mapstats/*') from bam_flagstat.collect() - file ('qc/rseqc/*') from rseqc_results.collect() - file ('qc/preseq/*') from preseq_results.collect() - file ('software_versions/*') from software_versions_yaml - - output: - file "*multiqc_report.html" into multiqc_report - file "*_data" into multiqc_report_files - - script: - rtitle = custom_runName ? "--title \"$custom_runName\"" : '' - rfilename = custom_runName ? "--filename " + custom_runName.replaceAll('\\W','_').replaceAll('_+','_') + "_multiqc_report" : '' - - - """ - export PATH=~/.local/bin:$PATH +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN ALL WORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ - multiqc . -f $rtitle $rfilename --config $multiqc_config - """ +// +// WORKFLOW: Execute a single named workflow for the pipeline +// See: https://github.com/nf-core/rnaseq/issues/619 +// +workflow { + NFCORE_NASCENT () } /* - * Completion e-mail notification - */ -workflow.onComplete { - - // Set up the e-mail variables - def subject = "[nf-core/nascent] Successful: $workflow.runName" - if(!workflow.success){ - subject = "[nf-core/nascent] FAILED: $workflow.runName" - } - def email_fields = [:] - email_fields['version'] = workflow.manifest.version - email_fields['runName'] = custom_runName ?: workflow.runName - email_fields['success'] = workflow.success - email_fields['dateComplete'] = workflow.complete - email_fields['duration'] = workflow.duration - email_fields['exitStatus'] = workflow.exitStatus - email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - email_fields['errorReport'] = (workflow.errorReport ?: 'None') - email_fields['commandLine'] = workflow.commandLine - email_fields['projectDir'] = workflow.projectDir - email_fields['summary'] = summary - email_fields['summary']['Date Started'] = workflow.start - email_fields['summary']['Date Completed'] = workflow.complete - email_fields['summary']['Pipeline script file path'] = workflow.scriptFile - email_fields['summary']['Pipeline script hash ID'] = workflow.scriptId - if(workflow.repository) email_fields['summary']['Pipeline repository Git URL'] = workflow.repository - if(workflow.commitId) email_fields['summary']['Pipeline repository Git Commit'] = workflow.commitId - if(workflow.revision) email_fields['summary']['Pipeline Git branch/tag'] = workflow.revision - if(workflow.container) email_fields['summary']['Docker image'] = workflow.container - email_fields['summary']['Nextflow Version'] = workflow.nextflow.version - email_fields['summary']['Nextflow Build'] = workflow.nextflow.build - email_fields['summary']['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp - - // On success try attach the multiqc report - def mqc_report = null - try { - if (workflow.success) { - mqc_report = multiqc_report.getVal() - if (mqc_report.getClass() == ArrayList){ - log.warn "[nf-core/nascent] Found multiple reports from process 'multiqc', will use only one" - mqc_report = mqc_report[0] - } - } - } catch (all) { - log.warn "[nf-core/nascent] Could not attach MultiQC report to summary email" - } - - // Render the TXT template - def engine = new groovy.text.GStringTemplateEngine() - def tf = new File("$baseDir/assets/email_template.txt") - def txt_template = engine.createTemplate(tf).make(email_fields) - def email_txt = txt_template.toString() - - // Render the HTML template - def hf = new File("$baseDir/assets/email_template.html") - def html_template = engine.createTemplate(hf).make(email_fields) - def email_html = html_template.toString() - - // Render the sendmail template - def smail_fields = [ email: params.email, subject: subject, email_txt: email_txt, email_html: email_html, baseDir: "$baseDir", mqcFile: mqc_report, mqcMaxSize: params.maxMultiqcEmailFileSize.toBytes() ] - def sf = new File("$baseDir/assets/sendmail_template.txt") - def sendmail_template = engine.createTemplate(sf).make(smail_fields) - def sendmail_html = sendmail_template.toString() - - // Send the HTML e-mail - if (params.email) { - try { - if( params.plaintext_email ){ throw GroovyException('Send plaintext e-mail, not HTML') } - // Try to send HTML e-mail using sendmail - [ 'sendmail', '-t' ].execute() << sendmail_html - log.info "[nf-core/nascent] Sent summary e-mail to $params.email (sendmail)" - } catch (all) { - // Catch failures and try with plaintext - [ 'mail', '-s', subject, params.email ].execute() << email_txt - log.info "[nf-core/nascent] Sent summary e-mail to $params.email (mail)" - } - } - - // Write summary e-mail HTML to a file - def output_d = new File( "${params.outdir}/pipeline_info/" ) - if( !output_d.exists() ) { - output_d.mkdirs() - } - def output_hf = new File( output_d, "pipeline_report.html" ) - output_hf.withWriter { w -> w << email_html } - def output_tf = new File( output_d, "pipeline_report.txt" ) - output_tf.withWriter { w -> w << email_txt } - - c_reset = params.monochrome_logs ? '' : "\033[0m"; - c_purple = params.monochrome_logs ? '' : "\033[0;35m"; - c_green = params.monochrome_logs ? '' : "\033[0;32m"; - c_red = params.monochrome_logs ? '' : "\033[0;31m"; - - if (workflow.stats.ignoredCountFmt > 0 && workflow.success) { - log.info "${c_purple}Warning, pipeline completed, but with errored process(es) ${c_reset}" - log.info "${c_red}Number of ignored errored process(es) : ${workflow.stats.ignoredCountFmt} ${c_reset}" - log.info "${c_green}Number of successfully ran process(es) : ${workflow.stats.succeedCountFmt} ${c_reset}" - } - - if(workflow.success){ - log.info "${c_purple}[nf-core/nascent]${c_green} Pipeline completed successfully${c_reset}" - } else { - checkHostname() - log.info "${c_purple}[nf-core/nascent]${c_red} Pipeline completed with errors${c_reset}" - } - -} - - -def nfcoreHeader(){ - // Log colors ANSI codes - c_reset = params.monochrome_logs ? '' : "\033[0m"; - c_dim = params.monochrome_logs ? '' : "\033[2m"; - c_black = params.monochrome_logs ? '' : "\033[0;30m"; - c_green = params.monochrome_logs ? '' : "\033[0;32m"; - c_yellow = params.monochrome_logs ? '' : "\033[0;33m"; - c_blue = params.monochrome_logs ? '' : "\033[0;34m"; - c_purple = params.monochrome_logs ? '' : "\033[0;35m"; - c_cyan = params.monochrome_logs ? '' : "\033[0;36m"; - c_white = params.monochrome_logs ? '' : "\033[0;37m"; - - return """ ${c_dim}----------------------------------------------------${c_reset} - ${c_green},--.${c_black}/${c_green},-.${c_reset} - ${c_blue} ___ __ __ __ ___ ${c_green}/,-._.--~\'${c_reset} - ${c_blue} |\\ | |__ __ / ` / \\ |__) |__ ${c_yellow}} {${c_reset} - ${c_blue} | \\| | \\__, \\__/ | \\ |___ ${c_green}\\`-._,-`-,${c_reset} - ${c_green}`._,._,\'${c_reset} - ${c_purple} nf-core/nascent v${workflow.manifest.version}${c_reset} - ${c_dim}----------------------------------------------------${c_reset} - """.stripIndent() -} - -def checkHostname(){ - def c_reset = params.monochrome_logs ? '' : "\033[0m" - def c_white = params.monochrome_logs ? '' : "\033[0;37m" - def c_red = params.monochrome_logs ? '' : "\033[1;91m" - def c_yellow_bold = params.monochrome_logs ? '' : "\033[1;93m" - if(params.hostnames){ - def hostname = "hostname".execute().text.trim() - params.hostnames.each { prof, hnames -> - hnames.each { hname -> - if(hostname.contains(hname) && !workflow.profile.contains(prof)){ - log.error "====================================================\n" + - " ${c_red}WARNING!${c_reset} You are running with `-profile $workflow.profile`\n" + - " but your machine hostname is ${c_white}'$hostname'${c_reset}\n" + - " ${c_yellow_bold}It's highly recommended that you use `-profile $prof${c_reset}`\n" + - "============================================================" - } - } - } - } -} +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ diff --git a/modules.json b/modules.json new file mode 100644 index 00000000..fef13e53 --- /dev/null +++ b/modules.json @@ -0,0 +1,192 @@ +{ + "name": "nf-core/nascent", + "homePage": "https://github.com/nf-core/nascent", + "repos": { + "https://github.com/nf-core/modules.git": { + "modules": { + "nf-core": { + "bbmap/pileup": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "bedtools/genomecov": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "bedtools/intersect": { + "branch": "master", + "git_sha": "4bb1d4e362a38642e877afe41aaf58ded9e56c86" + }, + "bedtools/merge": { + "branch": "master", + "git_sha": "4bb1d4e362a38642e877afe41aaf58ded9e56c86" + }, + "bedtools/sort": { + "branch": "master", + "git_sha": "4bb1d4e362a38642e877afe41aaf58ded9e56c86" + }, + "bwa/index": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "bwa/mem": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "bwamem2/index": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "bwamem2/mem": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "cat/cat": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "cat/fastq": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "custom/dumpsoftwareversions": { + "branch": "master", + "git_sha": "8022c68e7403eecbd8ba9c49496f69f8c49d50f0" + }, + "custom/getchromsizes": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "deeptools/bamcoverage": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "dragmap/align": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "dragmap/hashtable": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "fastp": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "fastqc": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "gffread": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "gunzip": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "homer/findpeaks": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "homer/maketagdirectory": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "homer/makeucscfile": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "homer/pos2bed": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "multiqc": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "pints/caller": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "preseq/ccurve": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "preseq/lcextrap": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "rseqc/inferexperiment": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "rseqc/readdistribution": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "rseqc/readduplication": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "samtools/faidx": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "samtools/flagstat": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "samtools/idxstats": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "samtools/index": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "samtools/sort": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "samtools/stats": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "subread/featurecounts": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "umitools/dedup": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "untar": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + } + } + }, + "subworkflows": { + "nf-core": { + "bam_dedup_stats_samtools_umitools": { + "branch": "master", + "git_sha": "41891ec2c3704911cd68b9317f26545b95a1c48d" + }, + "bam_sort_stats_samtools": { + "branch": "master", + "git_sha": "3911652a6b24249358f79e8b8466338d63efb2a2" + }, + "bam_stats_samtools": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "homer/groseq": { + "branch": "master", + "git_sha": "07c9080d7b66a325e8e67d9c57e10083ca8b9db4" + } + } + } + } + } +} diff --git a/modules/local/bed2saf.nf b/modules/local/bed2saf.nf new file mode 100644 index 00000000..313706ff --- /dev/null +++ b/modules/local/bed2saf.nf @@ -0,0 +1,31 @@ +process BED2SAF { + tag "$meta.id" + label 'process_single' + + conda (params.enable_conda ? "conda-forge::gawk=5.1.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'ubuntu:20.04' }" + + input: + tuple val(meta), path(bed) + + output: + tuple val(meta), path("*.saf"), emit: saf + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + awk 'OFS="\\t" {print \$1"."\$2"."\$3, \$1, \$2, \$3, "."}' \\ + $bed \\ + > ${bed.baseName}.saf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/grohmm/parametertuning/main.nf b/modules/local/grohmm/parametertuning/main.nf new file mode 100644 index 00000000..6ee73423 --- /dev/null +++ b/modules/local/grohmm/parametertuning/main.nf @@ -0,0 +1,42 @@ +process GROHMM_PARAMETERTUNING { + tag "$meta.id" + label 'process_high' + label 'process_long' + + conda (params.enable_conda ? "conda-forge::r-base=4.1.1 conda-forge::r-optparse=1.7.1 conda-forge::r-argparse=2.1.3 bioconda::bioconductor-genomicfeatures=1.46.1 bioconda::bioconductor-grohmm=1.28.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-e9a6cb7894dd2753aff7d9446ea95c962cce8c46:0a46dae3241b1c4f02e46468f5d54eadcf64beca-0' : + 'quay.io/biocontainers/mulled-v2-e9a6cb7894dd2753aff7d9446ea95c962cce8c46:0a46dae3241b1c4f02e46468f5d54eadcf64beca-0' }" + + input: + tuple val(meta), path(bam) + path gtf + path tune_parameter_file + + output: + path "*.tuning.csv" , emit: tuning + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + parameter_tuning.R \\ + --bam_file ${bam} \\ + --tuning_file ${tune_parameter_file} \\ + --outprefix ${prefix} \\ + --gtf $gtf \\ + --outdir ./ \\ + --cores $task.cpus \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') + bioconductor-grohmm: \$(Rscript -e "library(groHMM); cat(as.character(packageVersion('groHMM')))") + END_VERSIONS + """ +} diff --git a/modules/local/grohmm/transcriptcalling/main.nf b/modules/local/grohmm/transcriptcalling/main.nf new file mode 100644 index 00000000..f4016bb6 --- /dev/null +++ b/modules/local/grohmm/transcriptcalling/main.nf @@ -0,0 +1,47 @@ +process GROHMM_TRANSCRIPTCALLING { + tag "$meta.id" + label 'process_high' + label 'process_long' + + conda (params.enable_conda ? "conda-forge::r-base=4.1.1 conda-forge::r-optparse=1.7.1 conda-forge::r-argparse=2.1.3 bioconda::bioconductor-genomicfeatures=1.46.1 bioconda::bioconductor-grohmm=1.28.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-e9a6cb7894dd2753aff7d9446ea95c962cce8c46:0a46dae3241b1c4f02e46468f5d54eadcf64beca-0' : + 'quay.io/biocontainers/mulled-v2-e9a6cb7894dd2753aff7d9446ea95c962cce8c46:0a46dae3241b1c4f02e46468f5d54eadcf64beca-0' }" + + input: + tuple val(meta), path(bams) + path gtf + path tuning_file + + output: + tuple val(meta), path("*.transcripts.txt"), emit: transcripts + tuple val(meta), path("*.eval.txt") , emit: eval + tuple val(meta), path("*.transcripts.bed"), emit: transcripts_bed + tuple val(meta), path("*.tdFinal.txt") , emit: td + tuple val(meta), path("*.tdplot_mqc.jpg") , emit: td_plot + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def tuning = tuning_file ? "--tuning_file ${tuning_file}" : "" + """ + transcriptcalling_grohmm.R \\ + --bam_file ${bams} \\ + $tuning \\ + --outprefix ${prefix} \\ + --gtf $gtf \\ + --outdir ./ \\ + --cores $task.cpus \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') + bioconductor-grohmm: \$(Rscript -e "library(groHMM); cat(as.character(packageVersion('groHMM')))") + END_VERSIONS + """ +} diff --git a/modules/local/gtf2bed.nf b/modules/local/gtf2bed.nf new file mode 100644 index 00000000..9f90dbe6 --- /dev/null +++ b/modules/local/gtf2bed.nf @@ -0,0 +1,31 @@ +process GTF2BED { + tag "$gtf" + label 'process_low' + + conda (params.enable_conda ? "conda-forge::perl=5.26.2" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/perl:5.26.2' : + 'quay.io/biocontainers/perl:5.26.2' }" + + input: + path gtf + + output: + path '*.bed' , emit: bed + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: // This script is bundled with the pipeline, in nf-core/nascent/bin/ + """ + gtf2bed \\ + $gtf \\ + > ${gtf.baseName}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + perl: \$(echo \$(perl --version 2>&1) | sed 's/.*v\\(.*\\)) built.*/\\1/') + END_VERSIONS + """ +} diff --git a/modules/local/gtf_gene_filter.nf b/modules/local/gtf_gene_filter.nf new file mode 100644 index 00000000..e025c958 --- /dev/null +++ b/modules/local/gtf_gene_filter.nf @@ -0,0 +1,33 @@ +process GTF_GENE_FILTER { + tag "$fasta" + label 'process_low' + + conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/python:3.8.3' : + 'quay.io/biocontainers/python:3.8.3' }" + + input: + path fasta + path gtf + + output: + path "*.gtf", emit: gtf + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: // filter_gtf_for_genes_in_genome.py is bundled with the pipeline, borrowed from nf-core/rnaseq/bin/ + """ + filter_gtf_for_genes_in_genome.py \\ + --gtf $gtf \\ + --fasta $fasta \\ + -o ${fasta.baseName}_genes.gtf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ +} diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf new file mode 100644 index 00000000..93b41a22 --- /dev/null +++ b/modules/local/samplesheet_check.nf @@ -0,0 +1,31 @@ +process SAMPLESHEET_CHECK { + tag "$samplesheet" + label 'process_single' + + conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/python:3.8.3' : + 'quay.io/biocontainers/python:3.8.3' }" + + input: + path samplesheet + + output: + path '*.csv' , emit: csv + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: // This script is bundled with the pipeline, in nf-core/nascent/bin/ + """ + check_samplesheet.py \\ + $samplesheet \\ + samplesheet.valid.csv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bbmap/pileup/main.nf b/modules/nf-core/bbmap/pileup/main.nf new file mode 100644 index 00000000..1f34efc5 --- /dev/null +++ b/modules/nf-core/bbmap/pileup/main.nf @@ -0,0 +1,39 @@ +process BBMAP_PILEUP { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::bbmap=38.92 bioconda::samtools=1.15.1 pigz=2.6" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-008daec56b7aaf3f162d7866758142b9f889d690:2fee0e0facec1dfe32a1ee4aa516aef7d0296ebf-0' : + 'quay.io/biocontainers/mulled-v2-008daec56b7aaf3f162d7866758142b9f889d690:2fee0e0facec1dfe32a1ee4aa516aef7d0296ebf-0' }" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("*.stats.txt"), emit: covstats + tuple val(meta), path("*.hist.txt") , emit: hist + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + pileup.sh \\ + -Xmx${task.memory.toGiga()}g \\ + in=${bam} \\ + out=${prefix}.coverage.stats.txt \\ + hist=${prefix}.coverage.hist.txt \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bbmap: \$(bbversion.sh) + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/bbmap/pileup/meta.yml b/modules/nf-core/bbmap/pileup/meta.yml new file mode 100644 index 00000000..5cd85f9a --- /dev/null +++ b/modules/nf-core/bbmap/pileup/meta.yml @@ -0,0 +1,47 @@ +name: "bbmap_pileup" +description: Calculates per-scaffold or per-base coverage information from an unsorted sam or bam file. +keywords: + - fasta + - genome + - coverage +tools: + - bbmap: + description: BBMap is a short read aligner, as well as various other bioinformatic tools. + homepage: https://jgi.doe.gov/data-and-tools/bbtools/bb-tools-user-guide/ + documentation: https://jgi.doe.gov/data-and-tools/bbtools/bb-tools-user-guide/ + tool_dev_url: "https://github.com/BioInfoTools/BBMap/blob/master/sh/pileup.sh" + doi: "" + licence: ["UC-LBL license (see package)"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - stats: + type: file + description: Per-scaffold coverage info + pattern: "*.stats.txt" + - hist: + type: file + description: "Histogram of # occurrences of each depth level" + pattern: "*.hist.txt" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@Emiller88" diff --git a/modules/nf-core/bedtools/genomecov/main.nf b/modules/nf-core/bedtools/genomecov/main.nf new file mode 100644 index 00000000..05e359c9 --- /dev/null +++ b/modules/nf-core/bedtools/genomecov/main.nf @@ -0,0 +1,59 @@ +process BEDTOOLS_GENOMECOV { + tag "$meta.id" + label 'process_single' + + conda (params.enable_conda ? "bioconda::bedtools=2.30.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0' : + 'quay.io/biocontainers/bedtools:2.30.0--hc088bd4_0' }" + + input: + tuple val(meta), path(intervals), val(scale) + path sizes + val extension + + output: + tuple val(meta), path("*.${extension}"), emit: genomecov + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args_list = args.tokenize() + args += (scale > 0 && scale != 1) ? " -scale $scale" : "" + if (!args_list.contains('-bg') && (scale > 0 && scale != 1)) { + args += " -bg" + } + + def prefix = task.ext.prefix ?: "${meta.id}" + if (intervals.name =~ /\.bam/) { + """ + bedtools \\ + genomecov \\ + -ibam $intervals \\ + $args \\ + > ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ + } else { + """ + bedtools \\ + genomecov \\ + -i $intervals \\ + -g $sizes \\ + $args \\ + > ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ + } +} diff --git a/modules/nf-core/bedtools/genomecov/meta.yml b/modules/nf-core/bedtools/genomecov/meta.yml new file mode 100644 index 00000000..83bfab98 --- /dev/null +++ b/modules/nf-core/bedtools/genomecov/meta.yml @@ -0,0 +1,51 @@ +name: bedtools_genomecov +description: Computes histograms (default), per-base reports (-d) and BEDGRAPH (-bg) summaries of feature coverage (e.g., aligned sequences) for a given genome. +keywords: + - bed + - bam + - genomecov +tools: + - bedtools: + description: | + A set of tools for genomic analysis tasks, specifically enabling genome arithmetic (merge, count, complement) on various file types. + documentation: https://bedtools.readthedocs.io/en/latest/content/tools/genomecov.html + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - intervals: + type: file + description: BAM/BED/GFF/VCF + pattern: "*.{bam|bed|gff|vcf}" + - scale: + type: value + description: Number containing the scale factor for the output. Set to 1 to disable. Setting to a value other than 1 will also get the -bg bedgraph output format as this is required for this command switch + - sizes: + type: file + description: Tab-delimited table of chromosome names in the first column and chromosome sizes in the second column + - extension: + type: string + description: Extension of the output file (e. g., ".bg", ".bedgraph", ".txt", ".tab", etc.) It is set arbitrarily by the user and corresponds to the file format which depends on arguments. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - genomecov: + type: file + description: Computed genome coverage file + pattern: "*.${extension}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@Emiller88" + - "@sruthipsuresh" + - "@drpatelh" + - "@sidorov-si" + - "@chris-cheshire" diff --git a/modules/nf-core/bedtools/intersect/main.nf b/modules/nf-core/bedtools/intersect/main.nf new file mode 100644 index 00000000..0b06a1ad --- /dev/null +++ b/modules/nf-core/bedtools/intersect/main.nf @@ -0,0 +1,40 @@ +process BEDTOOLS_INTERSECT { + tag "$meta.id" + label 'process_single' + + conda (params.enable_conda ? "bioconda::bedtools=2.30.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0' : + 'quay.io/biocontainers/bedtools:2.30.0--hc088bd4_0' }" + + input: + tuple val(meta), path(intervals1), path(intervals2) + val extension + + output: + tuple val(meta), path("*.${extension}"), emit: intersect + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if ("$intervals1" == "${prefix}.${extension}" || + "$intervals2" == "${prefix}.${extension}") + error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + bedtools \\ + intersect \\ + -a $intervals1 \\ + -b $intervals2 \\ + $args \\ + > ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/bedtools/intersect/meta.yml b/modules/nf-core/bedtools/intersect/meta.yml new file mode 100644 index 00000000..6e21e928 --- /dev/null +++ b/modules/nf-core/bedtools/intersect/meta.yml @@ -0,0 +1,47 @@ +name: bedtools_intersect +description: Allows one to screen for overlaps between two sets of genomic features. +keywords: + - bed + - intersect +tools: + - bedtools: + description: | + A set of tools for genomic analysis tasks, specifically enabling genome arithmetic (merge, count, complement) on various file types. + documentation: https://bedtools.readthedocs.io/en/latest/content/tools/intersect.html + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - intervals1: + type: file + description: BAM/BED/GFF/VCF + pattern: "*.{bam|bed|gff|vcf}" + - intervals2: + type: file + description: BAM/BED/GFF/VCF + pattern: "*.{bam|bed|gff|vcf}" + - extension: + type: value + description: Extension of the output file. It is set by the user and corresponds to the file format which depends on arguments (e. g., ".bed", ".bam", ".txt", etc.). +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - intersect: + type: file + description: File containing the description of overlaps found between the two features + pattern: "*.${extension}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@Emiller88" + - "@sruthipsuresh" + - "@drpatelh" + - "@sidorov-si" diff --git a/modules/nf-core/bedtools/merge/main.nf b/modules/nf-core/bedtools/merge/main.nf new file mode 100644 index 00000000..06dad822 --- /dev/null +++ b/modules/nf-core/bedtools/merge/main.nf @@ -0,0 +1,36 @@ +process BEDTOOLS_MERGE { + tag "$meta.id" + label 'process_single' + + conda (params.enable_conda ? "bioconda::bedtools=2.30.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0' : + 'quay.io/biocontainers/bedtools:2.30.0--hc088bd4_0' }" + + input: + tuple val(meta), path(bed) + + output: + tuple val(meta), path('*.bed'), emit: bed + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if ("$bed" == "${prefix}.bed") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + bedtools \\ + merge \\ + -i $bed \\ + $args \\ + > ${prefix}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/bedtools/merge/meta.yml b/modules/nf-core/bedtools/merge/meta.yml new file mode 100644 index 00000000..76743679 --- /dev/null +++ b/modules/nf-core/bedtools/merge/meta.yml @@ -0,0 +1,39 @@ +name: bedtools_merge +description: combines overlapping or “book-ended” features in an interval file into a single feature which spans all of the combined features. +keywords: + - bed + - merge +tools: + - bedtools: + description: | + A set of tools for genomic analysis tasks, specifically enabling genome arithmetic (merge, count, complement) on various file types. + documentation: https://bedtools.readthedocs.io/en/latest/content/tools/merge.html + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bed: + type: file + description: Input BED file + pattern: "*.{bed}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bed: + type: file + description: Overlapped bed file with combined features + pattern: "*.{bed}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@Emiller88" + - "@sruthipsuresh" + - "@drpatelh" diff --git a/modules/nf-core/bedtools/sort/main.nf b/modules/nf-core/bedtools/sort/main.nf new file mode 100644 index 00000000..331c129a --- /dev/null +++ b/modules/nf-core/bedtools/sort/main.nf @@ -0,0 +1,37 @@ +process BEDTOOLS_SORT { + tag "$meta.id" + label 'process_single' + + conda (params.enable_conda ? "bioconda::bedtools=2.30.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0' : + 'quay.io/biocontainers/bedtools:2.30.0--hc088bd4_0' }" + + input: + tuple val(meta), path(intervals) + val extension + + output: + tuple val(meta), path("*.${extension}"), emit: sorted + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if ("$intervals" == "${prefix}.${extension}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + bedtools \\ + sort \\ + -i $intervals \\ + $args \\ + > ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/bedtools/sort/meta.yml b/modules/nf-core/bedtools/sort/meta.yml new file mode 100644 index 00000000..369e51ff --- /dev/null +++ b/modules/nf-core/bedtools/sort/meta.yml @@ -0,0 +1,46 @@ +name: bedtools_sort +description: Sorts a feature file by chromosome and other criteria. +keywords: + - bed + - sort +tools: + - bedtools: + description: | + A set of tools for genomic analysis tasks, specifically enabling genome arithmetic (merge, count, complement) on various file types. + documentation: https://bedtools.readthedocs.io/en/latest/content/tools/sort.html + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - intervals: + type: file + description: BED/BEDGRAPH + pattern: "*.{bed|bedGraph}" + + - extension: + type: string + description: Extension of the output file (e. g., ".bg", ".bedgraph", ".txt", ".tab", etc.) It is set arbitrarily by the user and corresponds to the file format which depends on arguments. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + + - sorted: + type: file + description: Sorted output file + pattern: "*.${extension}" + + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@Emiller88" + - "@sruthipsuresh" + - "@drpatelh" + - "@chris-cheshire" diff --git a/modules/nf-core/bwa/index/main.nf b/modules/nf-core/bwa/index/main.nf new file mode 100644 index 00000000..aa75ae5d --- /dev/null +++ b/modules/nf-core/bwa/index/main.nf @@ -0,0 +1,35 @@ +process BWA_INDEX { + tag "$fasta" + label 'process_single' + + conda (params.enable_conda ? "bioconda::bwa=0.7.17" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bwa:0.7.17--hed695b0_7' : + 'quay.io/biocontainers/bwa:0.7.17--hed695b0_7' }" + + input: + path fasta + + output: + path "bwa" , emit: index + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + mkdir bwa + bwa \\ + index \\ + $args \\ + -p bwa/${fasta.baseName} \\ + $fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bwa/index/meta.yml b/modules/nf-core/bwa/index/meta.yml new file mode 100644 index 00000000..2bbd81d9 --- /dev/null +++ b/modules/nf-core/bwa/index/meta.yml @@ -0,0 +1,32 @@ +name: bwa_index +description: Create BWA index for reference genome +keywords: + - index + - fasta + - genome + - reference +tools: + - bwa: + description: | + BWA is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: http://bio-bwa.sourceforge.net/ + documentation: http://www.htslib.org/doc/samtools.html + arxiv: arXiv:1303.3997 + licence: ["GPL-3.0-or-later"] +input: + - fasta: + type: file + description: Input genome fasta file +output: + - index: + type: file + description: BWA genome index files + pattern: "*.{amb,ann,bwt,pac,sa}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@maxulysse" diff --git a/modules/nf-core/bwa/mem/main.nf b/modules/nf-core/bwa/mem/main.nf new file mode 100644 index 00000000..f55af944 --- /dev/null +++ b/modules/nf-core/bwa/mem/main.nf @@ -0,0 +1,43 @@ +process BWA_MEM { + tag "$meta.id" + label 'process_high' + + conda (params.enable_conda ? "bioconda::bwa=0.7.17 bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:8110a70be2bfe7f75a2ea7f2a89cda4cc7732095-0' : + 'quay.io/biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:8110a70be2bfe7f75a2ea7f2a89cda4cc7732095-0' }" + + input: + tuple val(meta), path(reads) + path index + val sort_bam + + output: + tuple val(meta), path("*.bam"), emit: bam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def samtools_command = sort_bam ? 'sort' : 'view' + """ + INDEX=`find -L ./ -name "*.amb" | sed 's/.amb//'` + + bwa mem \\ + $args \\ + -t $task.cpus \\ + \$INDEX \\ + $reads \\ + | samtools $samtools_command $args2 --threads $task.cpus -o ${prefix}.bam - + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bwa/mem/meta.yml b/modules/nf-core/bwa/mem/meta.yml new file mode 100644 index 00000000..f84c5227 --- /dev/null +++ b/modules/nf-core/bwa/mem/meta.yml @@ -0,0 +1,50 @@ +name: bwa_mem +description: Performs fastq alignment to a fasta reference using BWA +keywords: + - mem + - bwa + - alignment + - map + - fastq + - bam + - sam +tools: + - bwa: + description: | + BWA is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: http://bio-bwa.sourceforge.net/ + documentation: http://www.htslib.org/doc/samtools.html + arxiv: arXiv:1303.3997 + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - index: + type: file + description: BWA genome index files + pattern: "Directory containing BWA index *.{amb,ann,bwt,pac,sa}" + - sort_bam: + type: boolean + description: use samtools sort (true) or samtools view (false) + pattern: "true or false" +output: + - bam: + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@jeremy1805" diff --git a/modules/nf-core/bwamem2/index/main.nf b/modules/nf-core/bwamem2/index/main.nf new file mode 100644 index 00000000..0b7ad199 --- /dev/null +++ b/modules/nf-core/bwamem2/index/main.nf @@ -0,0 +1,49 @@ +process BWAMEM2_INDEX { + tag "$fasta" + label 'process_single' + + conda (params.enable_conda ? "bioconda::bwa-mem2=2.2.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bwa-mem2:2.2.1--he513fc3_0' : + 'quay.io/biocontainers/bwa-mem2:2.2.1--he513fc3_0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("bwamem2"), emit: index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + mkdir bwamem2 + bwa-mem2 \\ + index \\ + $args \\ + $fasta -p bwamem2/${fasta} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwamem2: \$(echo \$(bwa-mem2 version 2>&1) | sed 's/.* //') + END_VERSIONS + """ + + stub: + """ + mkdir bwamem2 + touch bwamem2/${fasta}.0123 + touch bwamem2/${fasta}.ann + touch bwamem2/${fasta}.pac + touch bwamem2/${fasta}.amb + touch bwamem2/${fasta}.bwt.2bit.64 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwamem2: \$(echo \$(bwa-mem2 version 2>&1) | sed 's/.* //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bwamem2/index/meta.yml b/modules/nf-core/bwamem2/index/meta.yml new file mode 100644 index 00000000..a6b11ae5 --- /dev/null +++ b/modules/nf-core/bwamem2/index/meta.yml @@ -0,0 +1,40 @@ +name: bwamem2_index +description: Create BWA-mem2 index for reference genome +keywords: + - index + - fasta + - genome + - reference +tools: + - bwa: + description: | + BWA-mem2 is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: https://github.com/bwa-mem2/bwa-mem2 + documentation: https://github.com/bwa-mem2/bwa-mem2#usage + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Input genome fasta file +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - index: + type: file + description: BWA genome index files + pattern: "*.{0123,amb,ann,bwt.2bit.64,pac}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@maxulysse" diff --git a/modules/nf-core/bwamem2/mem/main.nf b/modules/nf-core/bwamem2/mem/main.nf new file mode 100644 index 00000000..08dc5dfb --- /dev/null +++ b/modules/nf-core/bwamem2/mem/main.nf @@ -0,0 +1,55 @@ +process BWAMEM2_MEM { + tag "$meta.id" + label 'process_high' + + conda (params.enable_conda ? "bioconda::bwa-mem2=2.2.1 bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:38aed4501da19db366dc7c8d52d31d94e760cfaf-0' : + 'quay.io/biocontainers/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:38aed4501da19db366dc7c8d52d31d94e760cfaf-0' }" + + input: + tuple val(meta), path(reads) + tuple val(meta2), path(index) + val sort_bam + + output: + tuple val(meta), path("*.bam"), emit: bam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def samtools_command = sort_bam ? 'sort' : 'view' + """ + INDEX=`find -L ./ -name "*.amb" | sed 's/.amb//'` + + bwa-mem2 \\ + mem \\ + $args \\ + -t $task.cpus \\ + \$INDEX \\ + $reads \\ + | samtools $samtools_command $args2 -@ $task.cpus -o ${prefix}.bam - + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwamem2: \$(echo \$(bwa-mem2 version 2>&1) | sed 's/.* //') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bam + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwamem2: \$(echo \$(bwa-mem2 version 2>&1) | sed 's/.* //') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bwamem2/mem/meta.yml b/modules/nf-core/bwamem2/mem/meta.yml new file mode 100644 index 00000000..a4655510 --- /dev/null +++ b/modules/nf-core/bwamem2/mem/meta.yml @@ -0,0 +1,54 @@ +name: bwamem2_mem +description: Performs fastq alignment to a fasta reference using BWA +keywords: + - mem + - bwa + - alignment + - map + - fastq + - bam + - sam +tools: + - bwa: + description: | + BWA-mem2 is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: https://github.com/bwa-mem2/bwa-mem2 + documentation: http://www.htslib.org/doc/samtools.html + arxiv: arXiv:1303.3997 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - index: + type: file + description: BWA genome index files + pattern: "Directory containing BWA index *.{0132,amb,ann,bwt.2bit.64,pac}" + - sort_bam: + type: boolean + description: use samtools sort (true) or samtools view (false) + pattern: "true or false" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@maxulysse" diff --git a/modules/nf-core/cat/cat/main.nf b/modules/nf-core/cat/cat/main.nf new file mode 100644 index 00000000..40e53f3e --- /dev/null +++ b/modules/nf-core/cat/cat/main.nf @@ -0,0 +1,62 @@ +process CAT_CAT { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "conda-forge::pigz=2.3.4" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pigz:2.3.4' : + 'quay.io/biocontainers/pigz:2.3.4' }" + + input: + tuple val(meta), path(files_in) + + output: + tuple val(meta), path("${prefix}"), emit: file_out + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def file_list = files_in.collect { it.toString() } + + // | input | output | command1 | command2 | + // |-----------|------------|----------|----------| + // | gzipped | gzipped | cat | | + // | ungzipped | ungzipped | cat | | + // | gzipped | ungzipped | zcat | | + // | ungzipped | gzipped | cat | pigz | + + // Use input file ending as default + prefix = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}" + out_zip = prefix.endsWith('.gz') + in_zip = file_list[0].endsWith('.gz') + command1 = (in_zip && !out_zip) ? 'zcat' : 'cat' + command2 = (!in_zip && out_zip) ? "| pigz -c -p $task.cpus $args2" : '' + """ + $command1 \\ + $args \\ + ${file_list.join(' ')} \\ + $command2 \\ + > ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + + stub: + def file_list = files_in.collect { it.toString() } + prefix = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}" + """ + touch $prefix + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/cat/cat/meta.yml b/modules/nf-core/cat/cat/meta.yml new file mode 100644 index 00000000..5eeff5a6 --- /dev/null +++ b/modules/nf-core/cat/cat/meta.yml @@ -0,0 +1,37 @@ +name: cat_cat +description: A module for concatenation of gzipped or uncompressed files +keywords: + - concatenate + - gzip + - cat +tools: + - cat: + description: Just concatenation + homepage: None + documentation: https://man7.org/linux/man-pages/man1/cat.1.html + tool_dev_url: None + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - files_in: + type: file + description: List of compressed / uncompressed files + pattern: "*" + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - file_out: + type: file + description: Concatenated file. Will be gzipped if file_out ends with ".gz" + pattern: "${file_out}" + +authors: + - "@erikrikarddaniel" + - "@FriederikeHanssen" diff --git a/modules/nf-core/cat/fastq/main.nf b/modules/nf-core/cat/fastq/main.nf new file mode 100644 index 00000000..4fa365d3 --- /dev/null +++ b/modules/nf-core/cat/fastq/main.nf @@ -0,0 +1,80 @@ +process CAT_FASTQ { + tag "$meta.id" + label 'process_single' + + conda (params.enable_conda ? "conda-forge::sed=4.7" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'ubuntu:20.04' }" + + input: + tuple val(meta), path(reads, stageAs: "input*/*") + + output: + tuple val(meta), path("*.merged.fastq.gz"), emit: reads + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def readList = reads instanceof List ? reads.collect{ it.toString() } : [reads.toString()] + if (meta.single_end) { + if (readList.size >= 1) { + """ + cat ${readList.join(' ')} > ${prefix}.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } else { + if (readList.size >= 2) { + def read1 = [] + def read2 = [] + readList.eachWithIndex{ v, ix -> ( ix & 1 ? read2 : read1 ) << v } + """ + cat ${read1.join(' ')} > ${prefix}_1.merged.fastq.gz + cat ${read2.join(' ')} > ${prefix}_2.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def readList = reads instanceof List ? reads.collect{ it.toString() } : [reads.toString()] + if (meta.single_end) { + if (readList.size > 1) { + """ + touch ${prefix}.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } else { + if (readList.size > 2) { + """ + touch ${prefix}_1.merged.fastq.gz + touch ${prefix}_2.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } + +} diff --git a/modules/nf-core/cat/fastq/meta.yml b/modules/nf-core/cat/fastq/meta.yml new file mode 100644 index 00000000..c836598e --- /dev/null +++ b/modules/nf-core/cat/fastq/meta.yml @@ -0,0 +1,39 @@ +name: cat_fastq +description: Concatenates fastq files +keywords: + - fastq + - concatenate +tools: + - cat: + description: | + The cat utility reads files sequentially, writing them to the standard output. + documentation: https://www.gnu.org/software/coreutils/manual/html_node/cat-invocation.html + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: list + description: | + List of input FastQ files to be concatenated. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: Merged fastq file + pattern: "*.{merged.fastq.gz}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf new file mode 100644 index 00000000..cebb6e05 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -0,0 +1,24 @@ +process CUSTOM_DUMPSOFTWAREVERSIONS { + label 'process_single' + + // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container + conda (params.enable_conda ? 'bioconda::multiqc=1.13' : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' : + 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }" + + input: + path versions + + output: + path "software_versions.yml" , emit: yml + path "software_versions_mqc.yml", emit: mqc_yml + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + template 'dumpsoftwareversions.py' +} diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml new file mode 100644 index 00000000..60b546a0 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/meta.yml @@ -0,0 +1,34 @@ +name: custom_dumpsoftwareversions +description: Custom module used to dump software versions within the nf-core pipeline template +keywords: + - custom + - version +tools: + - custom: + description: Custom module used to dump software versions within the nf-core pipeline template + homepage: https://github.com/nf-core/tools + documentation: https://github.com/nf-core/tools + licence: ["MIT"] +input: + - versions: + type: file + description: YML file containing software versions + pattern: "*.yml" + +output: + - yml: + type: file + description: Standard YML file containing software versions + pattern: "software_versions.yml" + - mqc_yml: + type: file + description: MultiQC custom content YML file containing software versions + pattern: "software_versions_mqc.yml" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@drpatelh" + - "@grst" diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py new file mode 100644 index 00000000..7c2abfa4 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python + +import yaml +import platform +from textwrap import dedent + + +def _make_versions_html(versions): + html = [ + dedent( + """\\ + + + + + + + + + + """ + ) + ] + for process, tmp_versions in sorted(versions.items()): + html.append("") + for i, (tool, version) in enumerate(sorted(tmp_versions.items())): + html.append( + dedent( + f"""\\ + + + + + + """ + ) + ) + html.append("") + html.append("
    Process Name Software Version
    {process if (i == 0) else ''}{tool}{version}
    ") + return "\\n".join(html) + + +versions_this_module = {} +versions_this_module["${task.process}"] = { + "python": platform.python_version(), + "yaml": yaml.__version__, +} + +with open("$versions") as f: + versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module + +# aggregate versions by the module name (derived from fully-qualified process name) +versions_by_module = {} +for process, process_versions in versions_by_process.items(): + module = process.split(":")[-1] + try: + if versions_by_module[module] != process_versions: + raise AssertionError( + "We assume that software versions are the same between all modules. " + "If you see this error-message it means you discovered an edge-case " + "and should open an issue in nf-core/tools. " + ) + except KeyError: + versions_by_module[module] = process_versions + +versions_by_module["Workflow"] = { + "Nextflow": "$workflow.nextflow.version", + "$workflow.manifest.name": "$workflow.manifest.version", +} + +versions_mqc = { + "id": "software_versions", + "section_name": "${workflow.manifest.name} Software Versions", + "section_href": "https://github.com/${workflow.manifest.name}", + "plot_type": "html", + "description": "are collected at run time from the software output.", + "data": _make_versions_html(versions_by_module), +} + +with open("software_versions.yml", "w") as f: + yaml.dump(versions_by_module, f, default_flow_style=False) +with open("software_versions_mqc.yml", "w") as f: + yaml.dump(versions_mqc, f, default_flow_style=False) + +with open("versions.yml", "w") as f: + yaml.dump(versions_this_module, f, default_flow_style=False) diff --git a/modules/nf-core/custom/getchromsizes/main.nf b/modules/nf-core/custom/getchromsizes/main.nf new file mode 100644 index 00000000..8e1693d4 --- /dev/null +++ b/modules/nf-core/custom/getchromsizes/main.nf @@ -0,0 +1,44 @@ +process CUSTOM_GETCHROMSIZES { + tag "$fasta" + label 'process_single' + + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path ("*.sizes"), emit: sizes + tuple val(meta), path ("*.fai") , emit: fai + tuple val(meta), path ("*.gzi") , emit: gzi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools faidx $fasta + cut -f 1,2 ${fasta}.fai > ${fasta}.sizes + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + getchromsizes: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${fasta}.fai + touch ${fasta}.sizes + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + getchromsizes: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/custom/getchromsizes/meta.yml b/modules/nf-core/custom/getchromsizes/meta.yml new file mode 100644 index 00000000..219ca1d8 --- /dev/null +++ b/modules/nf-core/custom/getchromsizes/meta.yml @@ -0,0 +1,53 @@ +name: custom_getchromsizes +description: Generates a FASTA file of chromosome sizes and a fasta index file +keywords: + - fasta + - chromosome + - indexing +tools: + - samtools: + description: Tools for dealing with SAM, BAM and CRAM files + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + tool_dev_url: https://github.com/samtools/samtools + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: FASTA file + pattern: "*.{fa,fasta,fna,fas}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - sizes: + type: file + description: File containing chromosome lengths + pattern: "*.{sizes}" + - fai: + type: file + description: FASTA index file + pattern: "*.{fai}" + - gzi: + type: file + description: Optional gzip index file for compressed inputs + pattern: "*.gzi" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@tamara-hodgetts" + - "@chris-cheshire" + - "@muffato" diff --git a/modules/nf-core/deeptools/bamcoverage/main.nf b/modules/nf-core/deeptools/bamcoverage/main.nf new file mode 100644 index 00000000..04073ed9 --- /dev/null +++ b/modules/nf-core/deeptools/bamcoverage/main.nf @@ -0,0 +1,67 @@ +process DEEPTOOLS_BAMCOVERAGE { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::deeptools=3.5.1 bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-eb9e7907c7a753917c1e4d7a64384c047429618a:2c687053c0252667cca265c9f4118f2c205a604c-0': + 'quay.io/biocontainers/mulled-v2-eb9e7907c7a753917c1e4d7a64384c047429618a:2c687053c0252667cca265c9f4118f2c205a604c-0' }" + + input: + tuple val(meta), path(input), path(input_index) + path(fasta) + path(fasta_fai) + + output: + tuple val(meta), path("*.bigWig") , emit: bigwig, optional: true + tuple val(meta), path("*.bedgraph") , emit: bedgraph, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}.bigWig" + + // cram_input is currently not working with deeptools + // therefore it's required to convert cram to bam first + def is_cram = input.Extension == "cram" ? true : false + def input_out = is_cram ? input.BaseName + ".bam" : "${input}" + def fai_reference = fasta_fai ? "--fai-reference ${fasta_fai}" : "" + + if (is_cram){ + """ + samtools view -T $fasta $input $fai_reference -@ $task.cpus -o $input_out + samtools index -b $input_out -@ $task.cpus + + bamCoverage \\ + --bam $input_out \\ + $args \\ + --numberOfProcessors ${task.cpus} \\ + --outFileName ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + deeptools: \$(bamCoverage --version | sed -e "s/bamCoverage //g") + END_VERSIONS + """ + + } + else { + """ + bamCoverage \\ + --bam $input_out \\ + $args \\ + --numberOfProcessors ${task.cpus} \\ + --outFileName ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + deeptools: \$(bamCoverage --version | sed -e "s/bamCoverage //g") + END_VERSIONS + """ + } + +} diff --git a/modules/nf-core/deeptools/bamcoverage/meta.yml b/modules/nf-core/deeptools/bamcoverage/meta.yml new file mode 100644 index 00000000..c6566910 --- /dev/null +++ b/modules/nf-core/deeptools/bamcoverage/meta.yml @@ -0,0 +1,58 @@ +name: deeptools_bamcoverage +description: This tool takes an alignment of reads or fragments as input (BAM file) and generates a coverage track (bigWig or bedGraph) as output. +keywords: + - sort +tools: + - deeptools: + description: A set of user-friendly tools for normalization and visualzation of deep-sequencing data + homepage: https://deeptools.readthedocs.io/en/develop/content/tools/bamCoverage.html + documentation: https://deeptools.readthedocs.io/en/develop/content/tools/bamCoverage.html + tool_dev_url: https://github.com/deeptools/deepTools/ + doi: "https://doi.org/10.1093/nar/gkw257" + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file + pattern: "*.{bam,cram}" + - input_index: + type: file + description: BAM/CRAM index file + pattern: "*.{bai,crai}" + - fasta: + type: file + description: Reference file the CRAM file was created with (required with CRAM input) + pattern: "*.{fasta,fa}" + - fasta_fai: + type: file + description: Index of the reference file (optional, but recommended) + pattern: "*.{fai}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - bigWig: + type: file + description: BigWig file + pattern: "*.bigWig" + - bedgraph: + type: file + description: Bedgraph file + pattern: "*.bedgraph" + +authors: + - "@FriederikeHanssen" + - "@SusiJo" diff --git a/modules/nf-core/dragmap/align/main.nf b/modules/nf-core/dragmap/align/main.nf new file mode 100644 index 00000000..f0d59f05 --- /dev/null +++ b/modules/nf-core/dragmap/align/main.nf @@ -0,0 +1,46 @@ +process DRAGMAP_ALIGN { + tag "$meta.id" + label 'process_high' + + conda (params.enable_conda ? "bioconda::dragmap=1.2.1 bioconda::samtools=1.15.1 conda-forge::pigz=2.3.4" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-580d344d9d4a496cd403932da8765f9e0187774d:5ebebbc128cd624282eaa37d2c7fe01505a91a69-0': + 'quay.io/biocontainers/mulled-v2-580d344d9d4a496cd403932da8765f9e0187774d:5ebebbc128cd624282eaa37d2c7fe01505a91a69-0' }" + + input: + tuple val(meta), path(reads) + path hashmap + val sort_bam + + output: + tuple val(meta), path("*.bam"), emit: bam + tuple val(meta), path('*.log'), emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reads_command = meta.single_end ? "-1 $reads" : "-1 ${reads[0]} -2 ${reads[1]}" + def samtools_command = sort_bam ? 'sort' : 'view' + + """ + dragen-os \\ + -r $hashmap \\ + $args \\ + --num-threads $task.cpus \\ + $reads_command \\ + 2> ${prefix}.dragmap.log \\ + | samtools $samtools_command $args2 --threads $task.cpus -o ${prefix}.bam - + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + dragmap: \$(echo \$(dragen-os --version 2>&1)) + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/dragmap/align/meta.yml b/modules/nf-core/dragmap/align/meta.yml new file mode 100644 index 00000000..dcce34fb --- /dev/null +++ b/modules/nf-core/dragmap/align/meta.yml @@ -0,0 +1,42 @@ +name: dragmap_align +description: Performs fastq alignment to a reference using DRAGMAP +keywords: + - alignment + - map + - fastq + - bam + - sam +tools: + - dragmap: + description: Dragmap is the Dragen mapper/aligner Open Source Software. + homepage: https://github.com/Illumina/dragmap + documentation: https://github.com/Illumina/dragmap + tool_dev_url: https://github.com/Illumina/dragmap#basic-command-line-usage + doi: "" + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - hashmap: + type: file + description: DRAGMAP hash table + pattern: "Directory containing DRAGMAP hash table *.{cmp,.bin,.txt}" +output: + - bam: + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@Emiller88" diff --git a/modules/nf-core/dragmap/hashtable/main.nf b/modules/nf-core/dragmap/hashtable/main.nf new file mode 100644 index 00000000..81333dfd --- /dev/null +++ b/modules/nf-core/dragmap/hashtable/main.nf @@ -0,0 +1,36 @@ +process DRAGMAP_HASHTABLE { + tag "$fasta" + label 'process_high' + + conda (params.enable_conda ? "bioconda::dragmap=1.2.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/dragmap:1.2.1--hd4ca14e_0': + 'quay.io/biocontainers/dragmap:1.2.1--hd4ca14e_0' }" + + input: + path fasta + + output: + path "dragmap" , emit: hashmap + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + mkdir dragmap + dragen-os \\ + --build-hash-table true \\ + --ht-reference $fasta \\ + --output-directory dragmap \\ + $args \\ + --ht-num-threads $task.cpus + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + dragmap: \$(echo \$(dragen-os --version 2>&1)) + END_VERSIONS + """ +} diff --git a/modules/nf-core/dragmap/hashtable/meta.yml b/modules/nf-core/dragmap/hashtable/meta.yml new file mode 100644 index 00000000..f86a5dbb --- /dev/null +++ b/modules/nf-core/dragmap/hashtable/meta.yml @@ -0,0 +1,30 @@ +name: dragmap_hashtable +description: Create DRAGEN hashtable for reference genome +keywords: + - index + - fasta + - genome + - reference +tools: + - dragmap: + description: Dragmap is the Dragen mapper/aligner Open Source Software. + homepage: https://github.com/Illumina/dragmap + documentation: https://github.com/Illumina/dragmap + tool_dev_url: https://github.com/Illumina/dragmap#basic-command-line-usage + doi: "" + licence: ["GPL v3"] +input: + - fasta: + type: file + description: Input genome fasta file +output: + - hashmap: + type: file + description: DRAGMAP hash table + pattern: "*.{cmp,.bin,.txt}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@Emiller88" diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf new file mode 100644 index 00000000..11ea4db3 --- /dev/null +++ b/modules/nf-core/fastp/main.nf @@ -0,0 +1,98 @@ +process FASTP { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? 'bioconda::fastp=0.23.2' : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/fastp:0.23.2--h79da9fb_0' : + 'quay.io/biocontainers/fastp:0.23.2--h79da9fb_0' }" + + input: + tuple val(meta), path(reads) + val save_trimmed_fail + val save_merged + + output: + tuple val(meta), path('*.fastp.fastq.gz') , optional:true, emit: reads + tuple val(meta), path('*.json') , emit: json + tuple val(meta), path('*.html') , emit: html + tuple val(meta), path('*.log') , emit: log + path "versions.yml" , emit: versions + tuple val(meta), path('*.fail.fastq.gz') , optional:true, emit: reads_fail + tuple val(meta), path('*.merged.fastq.gz'), optional:true, emit: reads_merged + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : '' + // Added soft-links to original fastqs for consistent naming in MultiQC + // Use single ended for interleaved. Add --interleaved_in in config. + if ( task.ext.args?.contains('--interleaved_in') ) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz + + fastp \\ + --stdout \\ + --in1 ${prefix}.fastq.gz \\ + --thread $task.cpus \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $fail_fastq \\ + $args \\ + 2> ${prefix}.fastp.log \\ + | gzip -c > ${prefix}.fastp.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } else if (meta.single_end) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz + + fastp \\ + --stdout \\ + --in1 ${prefix}.fastq.gz \\ + --out1 ${prefix}.fastp.fastq.gz \\ + --thread $task.cpus \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $fail_fastq \\ + $args \\ + 2> ${prefix}.fastp.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } else { + def merge_fastq = save_merged ? "-m --merged_out ${prefix}.merged.fastq.gz" : '' + """ + [ ! -f ${prefix}_1.fastq.gz ] && ln -sf ${reads[0]} ${prefix}_1.fastq.gz + [ ! -f ${prefix}_2.fastq.gz ] && ln -sf ${reads[1]} ${prefix}_2.fastq.gz + fastp \\ + --in1 ${prefix}_1.fastq.gz \\ + --in2 ${prefix}_2.fastq.gz \\ + --out1 ${prefix}_1.fastp.fastq.gz \\ + --out2 ${prefix}_2.fastp.fastq.gz \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $fail_fastq \\ + $merge_fastq \\ + --thread $task.cpus \\ + --detect_adapter_for_pe \\ + $args \\ + 2> ${prefix}.fastp.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } +} diff --git a/modules/nf-core/fastp/meta.yml b/modules/nf-core/fastp/meta.yml new file mode 100644 index 00000000..2368fded --- /dev/null +++ b/modules/nf-core/fastp/meta.yml @@ -0,0 +1,69 @@ +name: fastp +description: Perform adapter/quality trimming on sequencing reads +keywords: + - trimming + - quality control + - fastq +tools: + - fastp: + description: | + A tool designed to provide fast all-in-one preprocessing for FastQ files. This tool is developed in C++ with multithreading supported to afford high performance. + documentation: https://github.com/OpenGene/fastp + doi: https://doi.org/10.1093/bioinformatics/bty560 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information. Use 'single_end: true' to specify single ended or interleaved FASTQs. Use 'single_end: false' for paired-end reads. + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. If you wish to run interleaved paired-end data, supply as single-end data + but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module. + - save_trimmed_fail: + type: boolean + description: Specify true to save files that failed to pass trimming thresholds ending in `*.fail.fastq.gz` + - save_merged: + type: boolean + description: Specify true to save all merged reads to the a file ending in `*.merged.fastq.gz` + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: The trimmed/modified/unmerged fastq reads + pattern: "*fastp.fastq.gz" + - json: + type: file + description: Results in JSON format + pattern: "*.json" + - html: + type: file + description: Results in HTML format + pattern: "*.html" + - log: + type: file + description: fastq log file + pattern: "*.log" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - reads_fail: + type: file + description: Reads the failed the preprocessing + pattern: "*fail.fastq.gz" + - reads_merged: + type: file + description: Reads that were successfully merged + pattern: "*.{merged.fastq.gz}" +authors: + - "@drpatelh" + - "@kevinmenden" diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf new file mode 100644 index 00000000..05730368 --- /dev/null +++ b/modules/nf-core/fastqc/main.nf @@ -0,0 +1,59 @@ +process FASTQC { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::fastqc=0.11.9" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : + 'quay.io/biocontainers/fastqc:0.11.9--0' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.html"), emit: html + tuple val(meta), path("*.zip") , emit: zip + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + // Add soft-links to original FastQs for consistent naming in pipeline + def prefix = task.ext.prefix ?: "${meta.id}" + if (meta.single_end) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz + fastqc $args --threads $task.cpus ${prefix}.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + END_VERSIONS + """ + } else { + """ + [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz + [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz + fastqc $args --threads $task.cpus ${prefix}_1.fastq.gz ${prefix}_2.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + END_VERSIONS + """ + } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.html + touch ${prefix}.zip + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml new file mode 100644 index 00000000..4da5bb5a --- /dev/null +++ b/modules/nf-core/fastqc/meta.yml @@ -0,0 +1,52 @@ +name: fastqc +description: Run FastQC on sequenced reads +keywords: + - quality control + - qc + - adapters + - fastq +tools: + - fastqc: + description: | + FastQC gives general quality metrics about your reads. + It provides information about the quality score distribution + across your reads, the per base sequence content (%A/C/G/T). + You get information about adapter contamination and other + overrepresented sequences. + homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ + documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ + licence: ["GPL-2.0-only"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - html: + type: file + description: FastQC report + pattern: "*_{fastqc.html}" + - zip: + type: file + description: FastQC report archive + pattern: "*_{fastqc.zip}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@grst" + - "@ewels" + - "@FelixKrueger" diff --git a/modules/nf-core/gffread/main.nf b/modules/nf-core/gffread/main.nf new file mode 100644 index 00000000..7c575c97 --- /dev/null +++ b/modules/nf-core/gffread/main.nf @@ -0,0 +1,33 @@ +process GFFREAD { + tag "$gff" + label 'process_low' + + conda (params.enable_conda ? "bioconda::gffread=0.12.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gffread:0.12.1--h8b12597_0' : + 'quay.io/biocontainers/gffread:0.12.1--h8b12597_0' }" + + input: + path gff + + output: + path "*.gtf" , emit: gtf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${gff.baseName}" + """ + gffread \\ + $gff \\ + $args \\ + -o ${prefix}.gtf + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gffread: \$(gffread --version 2>&1) + END_VERSIONS + """ +} diff --git a/modules/nf-core/gffread/meta.yml b/modules/nf-core/gffread/meta.yml new file mode 100644 index 00000000..20335747 --- /dev/null +++ b/modules/nf-core/gffread/meta.yml @@ -0,0 +1,33 @@ +name: gffread +description: Validate, filter, convert and perform various other operations on GFF files +keywords: + - gff + - conversion + - validation +tools: + - gffread: + description: GFF/GTF utility providing format conversions, region filtering, FASTA sequence extraction and more. + homepage: http://ccb.jhu.edu/software/stringtie/gff.shtml#gffread + documentation: http://ccb.jhu.edu/software/stringtie/gff.shtml#gffread + tool_dev_url: https://github.com/gpertea/gffread + doi: 10.12688/f1000research.23297.1 + licence: ["MIT"] + +input: + - gff: + type: file + description: A reference file in either the GFF3, GFF2 or GTF format. + pattern: "*.{gff, gtf}" + +output: + - gtf: + type: file + description: GTF file resulting from the conversion of the GFF input file + pattern: "*.{gtf}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@emiller88" diff --git a/modules/nf-core/gunzip/main.nf b/modules/nf-core/gunzip/main.nf new file mode 100644 index 00000000..fa6ba26a --- /dev/null +++ b/modules/nf-core/gunzip/main.nf @@ -0,0 +1,44 @@ +process GUNZIP { + tag "$archive" + label 'process_single' + + conda (params.enable_conda ? "conda-forge::sed=4.7" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'ubuntu:20.04' }" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("$gunzip"), emit: gunzip + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + gunzip = archive.toString() - '.gz' + """ + gunzip \\ + -f \\ + $args \\ + $archive + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') + END_VERSIONS + """ + + stub: + gunzip = archive.toString() - '.gz' + """ + touch $gunzip + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml new file mode 100644 index 00000000..4d2ebc84 --- /dev/null +++ b/modules/nf-core/gunzip/meta.yml @@ -0,0 +1,34 @@ +name: gunzip +description: Compresses and decompresses files. +keywords: + - gunzip + - compression +tools: + - gunzip: + description: | + gzip is a file format and a software application used for file compression and decompression. + documentation: https://www.gnu.org/software/gzip/manual/gzip.html + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Optional groovy Map containing meta information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be compressed/uncompressed + pattern: "*.*" +output: + - gunzip: + type: file + description: Compressed/uncompressed file + pattern: "*.*" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/homer/findpeaks/main.nf b/modules/nf-core/homer/findpeaks/main.nf new file mode 100644 index 00000000..29a08c69 --- /dev/null +++ b/modules/nf-core/homer/findpeaks/main.nf @@ -0,0 +1,37 @@ +process HOMER_FINDPEAKS { + tag "$meta.id" + label 'process_medium' + + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + conda (params.enable_conda ? "bioconda::homer=4.11=pl526hc9558a2_3" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/homer:4.11--pl526hc9558a2_3' : + 'quay.io/biocontainers/homer:4.11--pl526hc9558a2_3' }" + + input: + tuple val(meta), path(tagDir) + + output: + tuple val(meta), path("*.peaks.txt"), emit: txt + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '4.11' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + + findPeaks \\ + $tagDir \\ + $args \\ + -o ${prefix}.peaks.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + homer: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/homer/findpeaks/meta.yml b/modules/nf-core/homer/findpeaks/meta.yml new file mode 100644 index 00000000..72eb8d0c --- /dev/null +++ b/modules/nf-core/homer/findpeaks/meta.yml @@ -0,0 +1,40 @@ +name: homer_findpeaks +description: Find peaks with HOMER suite +keywords: + - annotations + - peaks +tools: + - homer: + description: | + HOMER (Hypergeometric Optimization of Motif EnRichment) is a suite of tools for Motif Discovery and next-gen sequencing analysis. + homepage: "http://homer.ucsd.edu/homer/index.html" + documentation: "http://homer.ucsd.edu/homer/" + tool_dev_url: "http://homer.ucsd.edu/homer/ngs/peaks.html" + doi: 10.1016/j.molcel.2010.05.004. + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - tagDir: + type: directory + description: "The 'Tag Directory'" + pattern: "tagDir" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - peaks: + type: file + description: The found peaks + pattern: "*.peaks.txt" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@EMiller88" diff --git a/modules/nf-core/homer/maketagdirectory/main.nf b/modules/nf-core/homer/maketagdirectory/main.nf new file mode 100644 index 00000000..d679b0cf --- /dev/null +++ b/modules/nf-core/homer/maketagdirectory/main.nf @@ -0,0 +1,41 @@ + +process HOMER_MAKETAGDIRECTORY { + tag "$meta.id" + label 'process_medium' + + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + conda (params.enable_conda ? "bioconda::homer=4.11 bioconda::samtools=1.11 conda-forge::r-base=4.0.2 bioconda::bioconductor-deseq2=1.30.0 bioconda::bioconductor-edger=3.32.0 anaconda::perl=5.26.2" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-29293b111ffe5b4c1d1e14c711264aaed6b97b4a:594338b771cacf1623bd27772b5e12825f8835f2-0' : + 'quay.io/biocontainers/mulled-v2-29293b111ffe5b4c1d1e14c711264aaed6b97b4a:594338b771cacf1623bd27772b5e12825f8835f2-0' }" + + input: + tuple val(meta), path(bam) + path fasta + + output: + tuple val(meta), path("*_tagdir") , emit: tagdir + tuple val(meta), path("*_tagdir/tagInfo.txt"), emit: taginfo + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '4.11' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + makeTagDirectory \\ + ${prefix}_tagdir \\ + -genome $fasta \\ + $args \\ + $bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + homer: $VERSION + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/homer/maketagdirectory/meta.yml b/modules/nf-core/homer/maketagdirectory/meta.yml new file mode 100644 index 00000000..ccd2d6a8 --- /dev/null +++ b/modules/nf-core/homer/maketagdirectory/meta.yml @@ -0,0 +1,73 @@ +name: homer_maketagdirectory +description: Create a tag directory with the HOMER suite +keywords: + - peaks + - bed + - bam + - sam +tools: + - homer: + description: | + HOMER (Hypergeometric Optimization of Motif EnRichment) is a suite of tools for Motif Discovery and next-gen sequencing analysis. + documentation: http://homer.ucsd.edu/homer/ + doi: 10.1016/j.molcel.2010.05.004. + licence: ["GPL-3.0-or-later"] + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: hhttp://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + - DESeq2: + description: | + Differential gene expression analysis based on the negative binomial distribution + homepage: "https://bioconductor.org/packages/DESeq2" + documentation: "https://bioconductor.org/packages/DESeq2" + tool_dev_url: "https://github.com/mikelove/DESeq2" + doi: 10.18129/B9.bioc.DESeq2 + licence: ["LGPL-3.0-or-later"] + - edgeR: + description: | + Empirical Analysis of Digital Gene Expression Data in R + homepage: "https://bioinf.wehi.edu.au/edgeR" + documentation: "https://bioconductor.org/packages/edgeR" + tool_dev_url: " https://git.bioconductor.org/packages/edgeR" + doi: 10.18129/B9.bioc.edgeR + licence: ["GPL >=2"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/BED/SAM file + pattern: "*.{bam,bed,sam}" + - fasta: + type: file + description: Fasta file of reference genome + pattern: "*.fasta" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - tagdir: + type: directory + description: The "Tag Directory" + pattern: "*_tagdir" + - taginfo: + type: directory + description: The tagInfo.txt included to ensure there's proper output + pattern: "*_tagdir/tagInfo.txt" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@EMiller88" diff --git a/modules/nf-core/homer/makeucscfile/main.nf b/modules/nf-core/homer/makeucscfile/main.nf new file mode 100644 index 00000000..7ef58b3d --- /dev/null +++ b/modules/nf-core/homer/makeucscfile/main.nf @@ -0,0 +1,36 @@ +process HOMER_MAKEUCSCFILE { + tag "$meta.id" + label 'process_medium' + + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + conda (params.enable_conda ? "bioconda::homer=4.11=pl526hc9558a2_3" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/homer:4.11--pl526hc9558a2_3' : + 'quay.io/biocontainers/homer:4.11--pl526hc9558a2_3' }" + + input: + tuple val(meta), path(tagDir) + + output: + tuple val(meta), path("*.bedGraph.gz"), emit: bedGraph + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '4.11' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + makeUCSCfile \\ + $tagDir \\ + -o ${prefix}.bedGraph \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + homer: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/homer/makeucscfile/meta.yml b/modules/nf-core/homer/makeucscfile/meta.yml new file mode 100644 index 00000000..039fd37f --- /dev/null +++ b/modules/nf-core/homer/makeucscfile/meta.yml @@ -0,0 +1,39 @@ +name: homer_makeucscfile +description: Create a UCSC bed graph with the HOMER suite +keywords: + - peaks + - bed + - bedGraph +tools: + - homer: + description: | + HOMER (Hypergeometric Optimization of Motif EnRichment) is a suite of tools for Motif Discovery and next-gen sequencing analysis. + documentation: http://homer.ucsd.edu/homer/ + doi: 10.1016/j.molcel.2010.05.004. + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - tagDir: + type: directory + description: "The 'Tag Directory'" + pattern: "tagDir" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bedGraph: + type: file + description: The UCSC bed graph + pattern: "*.bedGraph.gz" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@EMiller88" diff --git a/modules/nf-core/homer/pos2bed/main.nf b/modules/nf-core/homer/pos2bed/main.nf new file mode 100644 index 00000000..f0a2ee21 --- /dev/null +++ b/modules/nf-core/homer/pos2bed/main.nf @@ -0,0 +1,33 @@ +process HOMER_POS2BED { + tag "$meta.id" + label 'process_medium' + + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + conda (params.enable_conda ? "bioconda::homer=4.11" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/homer:4.11--pl526hc9558a2_3' : + 'quay.io/biocontainers/homer:4.11--pl526hc9558a2_3' }" + + input: + tuple val(meta), path(peaks) + + output: + tuple val(meta), path("*.bed"), emit: bed + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '4.11' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + pos2bed.pl $peaks > ${prefix}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + homer: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/homer/pos2bed/meta.yml b/modules/nf-core/homer/pos2bed/meta.yml new file mode 100644 index 00000000..fb75bb1f --- /dev/null +++ b/modules/nf-core/homer/pos2bed/meta.yml @@ -0,0 +1,42 @@ +name: "homer_pos2bed" +description: Coverting from HOMER peak to BED file formats +keywords: + - peaks +tools: + - "homer": + description: | + HOMER (Hypergeometric Optimization of Motif EnRichment) is a suite of tools for Motif Discovery and next-gen sequencing analysis. + homepage: "http://homer.ucsd.edu/homer/index.html" + documentation: "http://homer.ucsd.edu/homer/" + tool_dev_url: "http://homer.ucsd.edu/homer/ngs/miscellaneous.html" + doi: 10.1016/j.molcel.2010.05.004. + licence: ["GPL-3.0-or-later"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - tagDir: + type: directory + description: "The 'Tag Directory'" + pattern: "tagDir" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bed: + type: file + description: BED file + pattern: "*.bed" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@Emiller88" diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf new file mode 100644 index 00000000..a8159a57 --- /dev/null +++ b/modules/nf-core/multiqc/main.nf @@ -0,0 +1,53 @@ +process MULTIQC { + label 'process_single' + + conda (params.enable_conda ? 'bioconda::multiqc=1.13' : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' : + 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }" + + input: + path multiqc_files, stageAs: "?/*" + path(multiqc_config) + path(extra_multiqc_config) + path(multiqc_logo) + + output: + path "*multiqc_report.html", emit: report + path "*_data" , emit: data + path "*_plots" , optional:true, emit: plots + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def config = multiqc_config ? "--config $multiqc_config" : '' + def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' + """ + multiqc \\ + --force \\ + $args \\ + $config \\ + $extra_config \\ + . + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) + END_VERSIONS + """ + + stub: + """ + touch multiqc_data + touch multiqc_plots + touch multiqc_report.html + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml new file mode 100644 index 00000000..ebc29b27 --- /dev/null +++ b/modules/nf-core/multiqc/meta.yml @@ -0,0 +1,55 @@ +name: MultiQC +description: Aggregate results from bioinformatics analyses across many samples into a single report +keywords: + - QC + - bioinformatics tools + - Beautiful stand-alone HTML report +tools: + - multiqc: + description: | + MultiQC searches a given directory for analysis logs and compiles a HTML report. + It's a general use tool, perfect for summarising the output from numerous bioinformatics tools. + homepage: https://multiqc.info/ + documentation: https://multiqc.info/docs/ + licence: ["GPL-3.0-or-later"] + +input: + - multiqc_files: + type: file + description: | + List of reports / files recognised by MultiQC, for example the html and zip output of FastQC + - multiqc_config: + type: file + description: Optional config yml for MultiQC + pattern: "*.{yml,yaml}" + - extra_multiqc_config: + type: file + description: Second optional config yml for MultiQC. Will override common sections in multiqc_config. + pattern: "*.{yml,yaml}" + - multiqc_logo: + type: file + description: Optional logo file for MultiQC + pattern: "*.{png}" + +output: + - report: + type: file + description: MultiQC report file + pattern: "multiqc_report.html" + - data: + type: dir + description: MultiQC data dir + pattern: "multiqc_data" + - plots: + type: file + description: Plots created by MultiQC + pattern: "*_data" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@abhi18av" + - "@bunop" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/pints/caller/main.nf b/modules/nf-core/pints/caller/main.nf new file mode 100644 index 00000000..c7ab2ae0 --- /dev/null +++ b/modules/nf-core/pints/caller/main.nf @@ -0,0 +1,44 @@ +process PINTS_CALLER { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::pypints=1.1.6" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pypints:1.1.6--pyh5e36f6f_1' : + 'quay.io/biocontainers/pypints:1.1.6--pyh5e36f6f_1' }" + + input: + tuple val(meta), path(bams) + + output: + tuple val(meta), path("*_divergent_peaks.bed") , emit: divergent_TREs + tuple val(meta), path("*_bidirectional_peaks.bed") , emit: bidirectional_TREs + tuple val(meta), path("*_unidirectional_peaks.bed"), emit: unidirectional_TREs + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + // TODO handle bigwigs + // def input_type = ("${input[0]}".endsWith(".bam")) ? "--bam-file $input" : + // ("$input".contains(".bw")) ? "--bw-pl ${input[0]} --bw-mn ${input[1]}" : + // error "Please use bam or BigWig files" + """ + pints_caller \\ + --bam-file $bams \\ + --save-to . \\ + --file-prefix $prefix \\ + --thread $task.cpus \\ + --dont-check-updates \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + pints: \$(pints_caller --version) + END_VERSIONS + """ +} diff --git a/modules/nf-core/pints/caller/meta.yml b/modules/nf-core/pints/caller/meta.yml new file mode 100644 index 00000000..647e5b89 --- /dev/null +++ b/modules/nf-core/pints/caller/meta.yml @@ -0,0 +1,59 @@ +name: "pints_caller" +description: Main caller script for peak calling +keywords: + - peak-calling + - CoPRO + - GRO-cap + - PRO-cap + - CAGE + - NETCAGE + - RAMPAGE + - csRNA-seq + - STRIPE-seq + - PRO-seq + - GRO-seq +tools: + - "pints": + description: "Peak Identifier for Nascent Transcripts Starts (PINTS)" + homepage: "https://pints.yulab.org/" + documentation: "https://github.com/hyulab/PINTS/blob/main/README.md" + tool_dev_url: "https://github.com/hyulab/PINTS" + doi: "https://doi.org/10.1038/s41587-022-01211-7" + licence: "['GPL']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bams: + type: file + description: BAM/ file + pattern: "*.{bam}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - divergent_TREs: + type: file + description: Divergent TREs + pattern: "*_divergent_peaks.bed" + - bidirectional_TREs: + type: file + description: Divergent TREs and convergent TREs + pattern: "*_bidirectional_peaks.bed" + - unidirectional_TREs: + type: file + description: Unidirectional TREs, maybe lncRNAs transcribed from enhancers (e-lncRNAs) + pattern: "*_unidirectional_peaks.bed" + +authors: + - "@Emiller88" diff --git a/modules/nf-core/preseq/ccurve/main.nf b/modules/nf-core/preseq/ccurve/main.nf new file mode 100644 index 00000000..79ef66f5 --- /dev/null +++ b/modules/nf-core/preseq/ccurve/main.nf @@ -0,0 +1,40 @@ +process PRESEQ_CCURVE { + tag "$meta.id" + label 'process_single' + label 'error_ignore' + + conda (params.enable_conda ? "bioconda::preseq=3.1.2" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/preseq:3.1.2--h445547b_2': + 'quay.io/biocontainers/preseq:3.1.2--h445547b_2' }" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("*.c_curve.txt"), emit: c_curve + tuple val(meta), path("*.log") , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def paired_end = meta.single_end ? '' : '-pe' + """ + preseq \\ + c_curve \\ + $args \\ + $paired_end \\ + -output ${prefix}.c_curve.txt \\ + $bam + cp .command.err ${prefix}.command.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + preseq: \$(echo \$(preseq 2>&1) | sed 's/^.*Version: //; s/Usage:.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/preseq/ccurve/meta.yml b/modules/nf-core/preseq/ccurve/meta.yml new file mode 100644 index 00000000..86ed6296 --- /dev/null +++ b/modules/nf-core/preseq/ccurve/meta.yml @@ -0,0 +1,48 @@ +name: preseq_ccurve +description: Software for predicting library complexity and genome coverage in high-throughput sequencing +keywords: + - preseq + - library + - complexity +tools: + - preseq: + description: Software for predicting library complexity and genome coverage in high-throughput sequencing + homepage: http://smithlabresearch.org/software/preseq/ + documentation: http://smithlabresearch.org/wp-content/uploads/manual.pdf + tool_dev_url: https://github.com/smithlabcode/preseq + doi: "" + licence: ["GPL"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - ccurve: + type: file + description: File containing output of Preseq c curve + pattern: "*.{c_curve.txt}" + - log: + type: file + description: Log file containing stderr produced by Preseq + pattern: "*.{log}" + +authors: + - "@drpatelh" + - "@Emiller88" diff --git a/modules/nf-core/preseq/lcextrap/main.nf b/modules/nf-core/preseq/lcextrap/main.nf new file mode 100644 index 00000000..a98a922c --- /dev/null +++ b/modules/nf-core/preseq/lcextrap/main.nf @@ -0,0 +1,40 @@ +process PRESEQ_LCEXTRAP { + tag "$meta.id" + label 'process_single' + label 'error_ignore' + + conda (params.enable_conda ? "bioconda::preseq=3.1.2" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/preseq:3.1.2--h445547b_2': + 'quay.io/biocontainers/preseq:3.1.2--h445547b_2' }" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("*.lc_extrap.txt"), emit: lc_extrap + tuple val(meta), path("*.log") , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def paired_end = meta.single_end ? '' : '-pe' + """ + preseq \\ + lc_extrap \\ + $args \\ + $paired_end \\ + -output ${prefix}.lc_extrap.txt \\ + $bam + cp .command.err ${prefix}.command.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + preseq: \$(echo \$(preseq 2>&1) | sed 's/^.*Version: //; s/Usage:.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/preseq/lcextrap/meta.yml b/modules/nf-core/preseq/lcextrap/meta.yml new file mode 100755 index 00000000..f1be05a2 --- /dev/null +++ b/modules/nf-core/preseq/lcextrap/meta.yml @@ -0,0 +1,48 @@ +name: preseq_lcextrap +description: Software for predicting library complexity and genome coverage in high-throughput sequencing +keywords: + - preseq + - library + - complexity +tools: + - preseq: + description: Software for predicting library complexity and genome coverage in high-throughput sequencing + homepage: http://smithlabresearch.org/software/preseq/ + documentation: http://smithlabresearch.org/wp-content/uploads/manual.pdf + tool_dev_url: https://github.com/smithlabcode/preseq + doi: "" + licence: ["GPL"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - lc_extrap: + type: file + description: File containing output of Preseq lcextrap + pattern: "*.{lc_extrap.txt}" + - log: + type: file + description: Log file containing stderr produced by Preseq + pattern: "*.{log}" + +authors: + - "@drpatelh" + - "@Emiller88" diff --git a/modules/nf-core/rseqc/inferexperiment/main.nf b/modules/nf-core/rseqc/inferexperiment/main.nf new file mode 100644 index 00000000..23c1b688 --- /dev/null +++ b/modules/nf-core/rseqc/inferexperiment/main.nf @@ -0,0 +1,36 @@ +process RSEQC_INFEREXPERIMENT { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::rseqc=3.0.1 'conda-forge::r-base>=3.5'" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/rseqc:3.0.1--py37h516909a_1' : + 'quay.io/biocontainers/rseqc:3.0.1--py37h516909a_1' }" + + input: + tuple val(meta), path(bam) + path bed + + output: + tuple val(meta), path("*.infer_experiment.txt"), emit: txt + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + infer_experiment.py \\ + -i $bam \\ + -r $bed \\ + $args \\ + > ${prefix}.infer_experiment.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rseqc: \$(infer_experiment.py --version | sed -e "s/infer_experiment.py //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/rseqc/inferexperiment/meta.yml b/modules/nf-core/rseqc/inferexperiment/meta.yml new file mode 100644 index 00000000..b4162059 --- /dev/null +++ b/modules/nf-core/rseqc/inferexperiment/meta.yml @@ -0,0 +1,40 @@ +name: rseqc_inferexperiment +description: Infer strandedness from sequencing reads +keywords: + - rnaseq + - experiment +tools: + - rseqc: + description: | + RSeQC package provides a number of useful modules that can comprehensively evaluate + high throughput sequence data especially RNA-seq data. + homepage: http://rseqc.sourceforge.net/ + documentation: http://rseqc.sourceforge.net/ + doi: 10.1093/bioinformatics/bts356 + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: the bam file to calculate statistics of + pattern: "*.{bam}" + - bed: + type: file + description: a bed file for the reference gene model + pattern: "*.{bed}" +output: + - txt: + type: file + description: infer_experiment results report + pattern: "*.infer_experiment.txt" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@kevinmenden" diff --git a/modules/nf-core/rseqc/readdistribution/main.nf b/modules/nf-core/rseqc/readdistribution/main.nf new file mode 100644 index 00000000..3198c5c6 --- /dev/null +++ b/modules/nf-core/rseqc/readdistribution/main.nf @@ -0,0 +1,35 @@ +process RSEQC_READDISTRIBUTION { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::rseqc=3.0.1 'conda-forge::r-base>=3.5'" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/rseqc:3.0.1--py37h516909a_1' : + 'quay.io/biocontainers/rseqc:3.0.1--py37h516909a_1' }" + + input: + tuple val(meta), path(bam) + path bed + + output: + tuple val(meta), path("*.read_distribution.txt"), emit: txt + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + read_distribution.py \\ + -i $bam \\ + -r $bed \\ + > ${prefix}.read_distribution.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rseqc: \$(read_distribution.py --version | sed -e "s/read_distribution.py //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/rseqc/readdistribution/meta.yml b/modules/nf-core/rseqc/readdistribution/meta.yml new file mode 100644 index 00000000..94c64712 --- /dev/null +++ b/modules/nf-core/rseqc/readdistribution/meta.yml @@ -0,0 +1,41 @@ +name: rseqc_readdistribution +description: Calculate how mapped reads are distributed over genomic features +keywords: + - read distribution + - genomics + - rnaseq +tools: + - rseqc: + description: | + RSeQC package provides a number of useful modules that can comprehensively evaluate + high throughput sequence data especially RNA-seq data. + homepage: http://rseqc.sourceforge.net/ + documentation: http://rseqc.sourceforge.net/ + doi: 10.1093/bioinformatics/bts356 + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: the alignment in bam format + pattern: "*.{bam}" + - bed: + type: file + description: a bed file for the reference gene model + pattern: "*.{bed}" +output: + - txt: + type: file + description: the read distribution report + pattern: "*.read_distribution.txt" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@kevinmenden" diff --git a/modules/nf-core/rseqc/readduplication/main.nf b/modules/nf-core/rseqc/readduplication/main.nf new file mode 100644 index 00000000..cb989a5f --- /dev/null +++ b/modules/nf-core/rseqc/readduplication/main.nf @@ -0,0 +1,37 @@ +process RSEQC_READDUPLICATION { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::rseqc=3.0.1 'conda-forge::r-base>=3.5'" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/rseqc:3.0.1--py37h516909a_1' : + 'quay.io/biocontainers/rseqc:3.0.1--py37h516909a_1' }" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("*seq.DupRate.xls"), emit: seq_xls + tuple val(meta), path("*pos.DupRate.xls"), emit: pos_xls + tuple val(meta), path("*.pdf") , emit: pdf + tuple val(meta), path("*.r") , emit: rscript + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + read_duplication.py \\ + -i $bam \\ + -o $prefix \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rseqc: \$(read_duplication.py --version | sed -e "s/read_duplication.py //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/rseqc/readduplication/meta.yml b/modules/nf-core/rseqc/readduplication/meta.yml new file mode 100644 index 00000000..5a866643 --- /dev/null +++ b/modules/nf-core/rseqc/readduplication/meta.yml @@ -0,0 +1,52 @@ +name: rseqc_readduplication +description: Calculate read duplication rate +keywords: + - rnaseq + - duplication +tools: + - rseqc: + description: | + RSeQC package provides a number of useful modules that can comprehensively evaluate + high throughput sequence data especially RNA-seq data. + homepage: http://rseqc.sourceforge.net/ + documentation: http://rseqc.sourceforge.net/ + doi: 10.1093/bioinformatics/bts356 + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: the alignment in bam format + pattern: "*.{bam}" + - bed: + type: file + description: a bed file for the reference gene model + pattern: "*.{bed}" +output: + - seq_xls: + type: file + description: Read duplication rate determined from mapping position of read + pattern: "*seq.DupRate.xls" + - pos_xls: + type: file + description: Read duplication rate determined from sequence of read + pattern: "*pos.DupRate.xls" + - pdf: + type: file + description: plot of duplication rate + pattern: "*.pdf" + - rscript: + type: file + description: script to reproduce the plot + pattern: "*.R" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@kevinmenden" diff --git a/modules/nf-core/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf new file mode 100644 index 00000000..ef940db2 --- /dev/null +++ b/modules/nf-core/samtools/faidx/main.nf @@ -0,0 +1,44 @@ +process SAMTOOLS_FAIDX { + tag "$fasta" + label 'process_single' + + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path ("*.fai"), emit: fai + tuple val(meta), path ("*.gzi"), emit: gzi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools \\ + faidx \\ + $args \\ + $fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${fasta}.fai + cat <<-END_VERSIONS > versions.yml + + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/faidx/meta.yml b/modules/nf-core/samtools/faidx/meta.yml new file mode 100644 index 00000000..fe2fe9a1 --- /dev/null +++ b/modules/nf-core/samtools/faidx/meta.yml @@ -0,0 +1,47 @@ +name: samtools_faidx +description: Index FASTA file +keywords: + - index + - fasta +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: FASTA file + pattern: "*.{fa,fasta}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fai: + type: file + description: FASTA index file + pattern: "*.{fai}" + - gzi: + type: file + description: Optional gzip index file for compressed inputs + pattern: "*.gzi" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@phue" diff --git a/modules/nf-core/samtools/flagstat/main.nf b/modules/nf-core/samtools/flagstat/main.nf new file mode 100644 index 00000000..c3152aca --- /dev/null +++ b/modules/nf-core/samtools/flagstat/main.nf @@ -0,0 +1,35 @@ +process SAMTOOLS_FLAGSTAT { + tag "$meta.id" + label 'process_single' + + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + + input: + tuple val(meta), path(bam), path(bai) + + output: + tuple val(meta), path("*.flagstat"), emit: flagstat + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + samtools \\ + flagstat \\ + --threads ${task.cpus} \\ + $bam \\ + > ${prefix}.flagstat + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/flagstat/meta.yml b/modules/nf-core/samtools/flagstat/meta.yml new file mode 100644 index 00000000..95269063 --- /dev/null +++ b/modules/nf-core/samtools/flagstat/meta.yml @@ -0,0 +1,49 @@ +name: samtools_flagstat +description: Counts the number of alignments in a BAM/CRAM/SAM file for each FLAG type +keywords: + - stats + - mapping + - counts + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: hhttp://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: Index for BAM/CRAM/SAM file + pattern: "*.{bai,crai,sai}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - flagstat: + type: file + description: File containing samtools flagstat output + pattern: "*.{flagstat}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" diff --git a/modules/nf-core/samtools/idxstats/main.nf b/modules/nf-core/samtools/idxstats/main.nf new file mode 100644 index 00000000..87618e5f --- /dev/null +++ b/modules/nf-core/samtools/idxstats/main.nf @@ -0,0 +1,36 @@ +process SAMTOOLS_IDXSTATS { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + + input: + tuple val(meta), path(bam), path(bai) + + output: + tuple val(meta), path("*.idxstats"), emit: idxstats + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + samtools \\ + idxstats \\ + --threads ${task.cpus-1} \\ + $bam \\ + > ${prefix}.idxstats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/idxstats/meta.yml b/modules/nf-core/samtools/idxstats/meta.yml new file mode 100644 index 00000000..3710ab88 --- /dev/null +++ b/modules/nf-core/samtools/idxstats/meta.yml @@ -0,0 +1,50 @@ +name: samtools_idxstats +description: Reports alignment summary statistics for a BAM/CRAM/SAM file +keywords: + - stats + - mapping + - counts + - chromosome + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: hhttp://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: Index for BAM/CRAM/SAM file + pattern: "*.{bai,crai,sai}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - idxstats: + type: file + description: File containing samtools idxstats output + pattern: "*.{idxstats}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf new file mode 100644 index 00000000..e04e63e8 --- /dev/null +++ b/modules/nf-core/samtools/index/main.nf @@ -0,0 +1,48 @@ +process SAMTOOLS_INDEX { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("*.bai") , optional:true, emit: bai + tuple val(meta), path("*.csi") , optional:true, emit: csi + tuple val(meta), path("*.crai"), optional:true, emit: crai + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools \\ + index \\ + -@ ${task.cpus-1} \\ + $args \\ + $input + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${input}.bai + touch ${input}.crai + touch ${input}.csi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml new file mode 100644 index 00000000..e5cadbc2 --- /dev/null +++ b/modules/nf-core/samtools/index/meta.yml @@ -0,0 +1,53 @@ +name: samtools_index +description: Index SAM/BAM/CRAM file +keywords: + - index + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: hhttp://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - crai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - csi: + type: file + description: CSI index file + pattern: "*.{csi}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@maxulysse" diff --git a/modules/nf-core/samtools/sort/main.nf b/modules/nf-core/samtools/sort/main.nf new file mode 100644 index 00000000..ab7f1cca --- /dev/null +++ b/modules/nf-core/samtools/sort/main.nf @@ -0,0 +1,43 @@ +process SAMTOOLS_SORT { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("*.bam"), emit: bam + tuple val(meta), path("*.csi"), emit: csi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + samtools sort $args -@ $task.cpus -o ${prefix}.bam -T $prefix $bam + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/sort/meta.yml b/modules/nf-core/samtools/sort/meta.yml new file mode 100644 index 00000000..09289751 --- /dev/null +++ b/modules/nf-core/samtools/sort/meta.yml @@ -0,0 +1,48 @@ +name: samtools_sort +description: Sort SAM/BAM/CRAM file +keywords: + - sort + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: hhttp://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - csi: + type: file + description: BAM index file (optional) + pattern: "*.csi" +authors: + - "@drpatelh" + - "@ewels" diff --git a/modules/nf-core/samtools/stats/main.nf b/modules/nf-core/samtools/stats/main.nf new file mode 100644 index 00000000..9b0c3867 --- /dev/null +++ b/modules/nf-core/samtools/stats/main.nf @@ -0,0 +1,49 @@ +process SAMTOOLS_STATS { + tag "$meta.id" + label 'process_single' + + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + + input: + tuple val(meta), path(input), path(input_index) + path fasta + + output: + tuple val(meta), path("*.stats"), emit: stats + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--reference ${fasta}" : "" + """ + samtools \\ + stats \\ + --threads ${task.cpus} \\ + ${reference} \\ + ${input} \\ + > ${prefix}.stats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.stats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/stats/meta.yml b/modules/nf-core/samtools/stats/meta.yml new file mode 100644 index 00000000..cac50b1c --- /dev/null +++ b/modules/nf-core/samtools/stats/meta.yml @@ -0,0 +1,53 @@ +name: samtools_stats +description: Produces comprehensive statistics from SAM/BAM/CRAM file +keywords: + - statistics + - counts + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: hhttp://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + - input_index: + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" + - fasta: + type: optional file + description: Reference file the CRAM was created with + pattern: "*.{fasta,fa}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - stats: + type: file + description: File containing samtools stats output + pattern: "*.{stats}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@FriederikeHanssen" diff --git a/modules/nf-core/subread/featurecounts/main.nf b/modules/nf-core/subread/featurecounts/main.nf new file mode 100644 index 00000000..18e2a92b --- /dev/null +++ b/modules/nf-core/subread/featurecounts/main.nf @@ -0,0 +1,47 @@ +process SUBREAD_FEATURECOUNTS { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::subread=2.0.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/subread:2.0.1--hed695b0_0' : + 'quay.io/biocontainers/subread:2.0.1--hed695b0_0' }" + + input: + tuple val(meta), path(bams), path(annotation) + + output: + tuple val(meta), path("*featureCounts.txt") , emit: counts + tuple val(meta), path("*featureCounts.txt.summary"), emit: summary + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def paired_end = meta.single_end ? '' : '-p' + + def strandedness = 0 + if (meta.strandedness == 'forward') { + strandedness = 1 + } else if (meta.strandedness == 'reverse') { + strandedness = 2 + } + """ + featureCounts \\ + $args \\ + $paired_end \\ + -T $task.cpus \\ + -a $annotation \\ + -s $strandedness \\ + -o ${prefix}.featureCounts.txt \\ + ${bams.join(' ')} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + subread: \$( echo \$(featureCounts -v 2>&1) | sed -e "s/featureCounts v//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/subread/featurecounts/meta.yml b/modules/nf-core/subread/featurecounts/meta.yml new file mode 100644 index 00000000..cf02f1ea --- /dev/null +++ b/modules/nf-core/subread/featurecounts/meta.yml @@ -0,0 +1,52 @@ +name: subread_featurecounts +description: Count reads that map to genomic features +keywords: + - counts + - fasta + - genome + - reference + +tools: + - featurecounts: + description: featureCounts is a highly efficient general-purpose read summarization program that counts mapped reads for genomic features such as genes, exons, promoter, gene bodies, genomic bins and chromosomal locations. It can be used to count both RNA-seq and genomic DNA-seq reads. + homepage: http://bioinf.wehi.edu.au/featureCounts/ + documentation: http://bioinf.wehi.edu.au/subread-package/SubreadUsersGuide.pdf + doi: "10.1093/bioinformatics/btt656" + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/SAM file containing read alignments + pattern: "*.{bam}" + - annotation: + type: file + description: Genomic features annotation in GTF or SAF + pattern: "*.{gtf,saf}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - counts: + type: file + description: Counts of reads mapping to features + pattern: "*featureCounts.txt" + - summary: + type: file + description: Summary log file + pattern: "*.featureCounts.txt.summary" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@ntoda03" diff --git a/modules/nf-core/umitools/dedup/main.nf b/modules/nf-core/umitools/dedup/main.nf new file mode 100644 index 00000000..48559d81 --- /dev/null +++ b/modules/nf-core/umitools/dedup/main.nf @@ -0,0 +1,45 @@ +process UMITOOLS_DEDUP { + tag "$meta.id" + label "process_medium" + + conda (params.enable_conda ? "bioconda::umi_tools=1.1.2" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/umi_tools:1.1.2--py38h4a8c8d9_0' : + 'quay.io/biocontainers/umi_tools:1.1.2--py38h4a8c8d9_0' }" + + input: + tuple val(meta), path(bam), path(bai) + val get_output_stats + + output: + tuple val(meta), path("*.bam") , emit: bam + tuple val(meta), path("*edit_distance.tsv"), optional:true, emit: tsv_edit_distance + tuple val(meta), path("*per_umi.tsv") , optional:true, emit: tsv_per_umi + tuple val(meta), path("*per_position.tsv") , optional:true, emit: tsv_umi_per_position + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def paired = meta.single_end ? "" : "--paired" + def stats = get_output_stats ? "--output-stats $prefix" : "" + + if (!(args ==~ /.*--random-seed.*/)) {args += " --random-seed=100"} + """ + PYTHONHASHSEED=0 umi_tools \\ + dedup \\ + -I $bam \\ + -S ${prefix}.bam \\ + $stats \\ + $paired \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + umitools: \$(umi_tools --version 2>&1 | sed 's/^.*UMI-tools version://; s/ *\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/umitools/dedup/meta.yml b/modules/nf-core/umitools/dedup/meta.yml new file mode 100644 index 00000000..56888e5a --- /dev/null +++ b/modules/nf-core/umitools/dedup/meta.yml @@ -0,0 +1,63 @@ +name: umitools_dedup +description: Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read. +keywords: + - umitools + - deduplication +tools: + - umi_tools: + description: > + UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) + and single cell RNA-Seq cell barcodes + documentation: https://umi-tools.readthedocs.io/en/latest/ + license: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: | + BAM file containing reads to be deduplicated via UMIs. + pattern: "*.{bam}" + - bai: + type: file + description: | + BAM index files corresponding to the input BAM file. + pattern: "*.{bai}" + - get_output_stats: + type: boolean + description: | + Whether or not to generate output stats. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file with deduplicated UMIs. + pattern: "*.{bam}" + - tsv_edit_distance: + type: file + description: Reports the (binned) average edit distance between the UMIs at each position. + pattern: "*edit_distance.tsv" + - tsv_per_umi: + type: file + description: UMI-level summary statistics. + pattern: "*per_umi.tsv" + - tsv_umi_per_position: + type: file + description: Tabulates the counts for unique combinations of UMI and position. + pattern: "*per_position.tsv" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@drpatelh" + - "@grst" + - "@klkeys" diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf new file mode 100644 index 00000000..71eea7b2 --- /dev/null +++ b/modules/nf-core/untar/main.nf @@ -0,0 +1,64 @@ +process UNTAR { + tag "$archive" + label 'process_single' + + conda (params.enable_conda ? "conda-forge::sed=4.7" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'ubuntu:20.04' }" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("$untar"), emit: untar + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + untar = archive.toString() - '.tar.gz' + + """ + mkdir output + + ## Ensures --strip-components only applied when top level of tar contents is a directory + ## If just files or multiple directories, place all in output + if [[ \$(tar -tzf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then + tar \\ + -C output --strip-components 1 \\ + -xzvf \\ + $args \\ + $archive \\ + $args2 + else + tar \\ + -C output \\ + -xzvf \\ + $args \\ + $archive \\ + $args2 + fi + + mv output ${untar} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ + + stub: + untar = archive.toString() - '.tar.gz' + """ + touch $untar + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/untar/meta.yml b/modules/nf-core/untar/meta.yml new file mode 100644 index 00000000..ea7a3f38 --- /dev/null +++ b/modules/nf-core/untar/meta.yml @@ -0,0 +1,40 @@ +name: untar +description: Extract files. +keywords: + - untar + - uncompress +tools: + - untar: + description: | + Extract tar.gz files. + documentation: https://www.gnu.org/software/tar/manual/ + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be untar + pattern: "*.{tar}.{gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - untar: + type: directory + description: Directory containing contents of archive + pattern: "*/" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@matthdsm" + - "@jfy133" diff --git a/nextflow.config b/nextflow.config index c253a402..6f2ebc2b 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,150 +1,253 @@ /* - * ------------------------------------------------- - * nf-core/nascent Nextflow config file - * ------------------------------------------------- - * Default config options for all environments. - */ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + nf-core/nascent Nextflow config file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Default config options for all compute environments +---------------------------------------------------------------------------------------- +*/ // Global default params, used in configs params { - - // Run arguments - workdir = false - clusterOptions = false - flip = false - saveAllfq = false - savefq = false - saveTrim = false - skipMultiQC = false - threadfqdump = false - - // File listing the number of nucleotides per chromosome for the reference genome used. - // Will be generated the first time each genome is used to process datasets. - chrom_sizes = 0 - - // Path to the Hisat2 index directory. If not provided, hese indices will be generated - // the first time this pipeline is executed. - hisat2_indices = 0 - - // Path to the RefSeq genome annotation file. Optional, but useful to collect stats via RseQC. - genome_refseq = 0 - - // Path to SRR files obtained from the Gene Expression Omnibus (GEO) platform. This is an - // alternative to providing fastq files if re-analizing existing public datasets. - sras = 0 - - reads = "data/*{R1,R2}*.fastq" - singleEnd = true - outdir = './results' - - // Boilerplate options - name = false - multiqc_config = "$baseDir/assets/multiqc_config.yaml" - email = false - maxMultiqcEmailFileSize = 25.MB - plaintext_email = false - monochrome_logs = false - help = false - igenomes_base = "./iGenomes" - tracedir = "${params.outdir}/pipeline_info" - awsqueue = false - awsregion = 'eu-west-1' - igenomesIgnore = false - custom_config_version = 'master' - custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - hostnames = false - config_profile_description = false - config_profile_contact = false - config_profile_url = false -} -// Container slug. Stable releases should specify release tag! -// Developmental code should specify :dev -process.container = 'nfcore/nascent:1.0' + // Input options + input = null + + // References + genome = null + igenomes_base = 's3://ngi-igenomes/igenomes' + igenomes_ignore = false + save_reference = false + + // UMI handling + with_umi = false + umitools_dedup_stats = false + + // Trimming + skip_trimming = false + + // Alignment + aligner = 'bwa' + skip_alignment = false + + // Transcript identification method + assay_type = null + skip_grohmm = false + skip_tuning = false + tuning_file = null + + filter_bed = null + no_overlap = true + + // MultiQC options + multiqc_config = null + multiqc_title = null + multiqc_logo = null + max_multiqc_email_size = '25.MB' + multiqc_methods_description = null + + // Boilerplate options + outdir = null + tracedir = "${params.outdir}/pipeline_info" + publish_dir_mode = 'copy' + email = null + email_on_fail = null + plaintext_email = false + monochrome_logs = false + hook_url = null + help = false + validate_params = true + show_hidden_params = false + schema_ignore_params = 'genomes' + enable_conda = false + + + // Config options + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + config_profile_description = null + config_profile_contact = null + config_profile_url = null + config_profile_name = null + + + // Max resource options + // Defaults only, expecting to be overwritten + max_memory = '128.GB' + max_cpus = 16 + max_time = '240.h' + +} // Load base.config by default for all pipelines includeConfig 'conf/base.config' // Load nf-core custom profiles from different Institutions try { - includeConfig "${params.custom_config_base}/nfcore_custom.config" + includeConfig "${params.custom_config_base}/nfcore_custom.config" } catch (Exception e) { - System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") + System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") } +// Load nf-core/nascent custom profiles from different institutions. +// Warning: Uncomment only if a pipeline-specific instititutional config already exists on nf-core/configs! +// try { +// includeConfig "${params.custom_config_base}/pipeline/nascent.config" +// } catch (Exception e) { +// System.err.println("WARNING: Could not load nf-core/config/nascent profiles: ${params.custom_config_base}/pipeline/nascent.config") +// } + + profiles { - awsbatch { includeConfig 'conf/awsbatch.config' } - conda { process.conda = "$baseDir/environment.yml" } - debug { process.beforeScript = 'echo $HOSTNAME' } - docker { docker.enabled = true } - singularity { singularity.enabled = true } - test { includeConfig 'conf/test.config' } + debug { process.beforeScript = 'echo $HOSTNAME' } + conda { + params.enable_conda = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + mamba { + params.enable_conda = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + docker { + docker.enabled = true + docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } + gitpod { + executor.name = 'local' + executor.cpus = 16 + executor.memory = 60.GB + } + test { includeConfig 'conf/test.config' } + test_grocap { includeConfig 'conf/test_grocap.config' } + test_copro { includeConfig 'conf/test_copro.config' } + test_full { includeConfig 'conf/test_full.config' } } + // Load igenomes.config if required -if(!params.igenomesIgnore){ - includeConfig 'conf/igenomes.config' +if (!params.igenomes_ignore) { + includeConfig 'conf/igenomes.config' +} else { + params.genomes = [:] +} + + +// Export these variables to prevent local Python/R libraries from conflicting with those in the container +// The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. +// See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. + +env { + PYTHONNOUSERSITE = 1 + R_PROFILE_USER = "/.Rprofile" + R_ENVIRON_USER = "/.Renviron" + JULIA_DEPOT_PATH = "/usr/local/share/julia" } // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] +def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { - enabled = true - file = "${params.tracedir}/execution_timeline.html" + enabled = true + file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" } report { - enabled = true - file = "${params.tracedir}/execution_report.html" + enabled = true + file = "${params.tracedir}/execution_report_${trace_timestamp}.html" } trace { - enabled = true - file = "${params.tracedir}/execution_trace.txt" + enabled = true + file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" } dag { - enabled = true - file = "${params.tracedir}/pipeline_dag.svg" + enabled = true + file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html" } manifest { - name = 'nf-core/nascent' - author = 'Ignacio Tripodi, Margaret Gruca' - homePage = 'https://github.com/nf-core/nascent' - description = 'Nascent Transcription Processing Pipeline' - mainScript = 'main.nf' - nextflowVersion = '>=0.32.0' - version = '1.0' + name = 'nf-core/nascent' + author = 'Edmund Miller, Ignacio Tripodi, Margaret Gruca' + homePage = 'https://github.com/nf-core/nascent' + description = 'Global Run-On sequencing analysis pipeline' + mainScript = 'main.nf' + nextflowVersion = '!>=21.10.6' + version = '2.0.0' + doi = '' } +// Load modules.config for DSL2 module specific options +includeConfig 'conf/modules.config' + // Function to ensure that resource requirements don't go beyond // a maximum limit def check_max(obj, type) { - if(type == 'memory'){ - try { - if(obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) - return params.max_memory as nextflow.util.MemoryUnit - else - return obj - } catch (all) { - println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" - return obj - } - } else if(type == 'time'){ - try { - if(obj.compareTo(params.max_time as nextflow.util.Duration) == 1) - return params.max_time as nextflow.util.Duration - else - return obj - } catch (all) { - println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" - return obj - } - } else if(type == 'cpus'){ - try { - return Math.min( obj, params.max_cpus as int ) - } catch (all) { - println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" - return obj + if (type == 'memory') { + try { + if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) + return params.max_memory as nextflow.util.MemoryUnit + else + return obj + } catch (all) { + println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'time') { + try { + if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) + return params.max_time as nextflow.util.Duration + else + return obj + } catch (all) { + println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'cpus') { + try { + return Math.min( obj, params.max_cpus as int ) + } catch (all) { + println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" + return obj + } } - } } diff --git a/nextflow_schema.json b/nextflow_schema.json new file mode 100644 index 00000000..6cd74350 --- /dev/null +++ b/nextflow_schema.json @@ -0,0 +1,455 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/nf-core/nascent/master/nextflow_schema.json", + "title": "nf-core/nascent pipeline parameters", + "description": "Global Run-On sequencing analysis pipeline", + "type": "object", + "definitions": { + "input_output_options": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "required": ["input", "outdir"], + "properties": { + "input": { + "type": "string", + "format": "file-path", + "mimetype": "text/csv", + "pattern": "^\\S+\\.csv$", + "schema": "assets/schema_input.json", + "description": "Path to comma-separated file containing information about the samples in the experiment.", + "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/nascent/usage#samplesheet-input).", + "fa_icon": "fas fa-file-csv" + }, + "outdir": { + "type": "string", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", + "fa_icon": "fas fa-folder-open" + }, + "email": { + "type": "string", + "description": "Email address for completion summary.", + "fa_icon": "fas fa-envelope", + "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" + }, + "multiqc_title": { + "type": "string", + "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.", + "fa_icon": "fas fa-file-signature" + } + } + }, + "alignment_options": { + "title": "Alignment Options", + "type": "object", + "description": "", + "default": "", + "properties": { + "aligner": { + "type": "string", + "default": "bwa", + "fa_icon": "fas fa-puzzle-piece", + "enum": ["bwa", "bwamem2", "dragmap"], + "description": "Specify aligner to be used to map reads to reference genome." + }, + "skip_alignment": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip all of the alignment-based processes within the pipeline." + }, + "skip_trimming": { + "type": "boolean", + "description": "Skip the adapter trimming step.", + "help_text": "Use this if your input FastQ files have already been trimmed outside of the workflow or if you're very confident that there is no adapter contamination in your data.", + "fa_icon": "fas fa-fast-forward" + } + } + }, + "umi_options": { + "title": "UMI options", + "type": "object", + "description": "Options for processing reads with unique molecular identifiers", + "default": "", + "fa_icon": "fas fa-barcode", + "properties": { + "with_umi": { + "type": "boolean", + "fa_icon": "fas fa-barcode", + "description": "Enable UMI-based read deduplication." + }, + "umitools_dedup_stats": { + "type": "boolean", + "fa_icon": "fas fa-barcode", + "help_text": "It can be quite time consuming generating these output stats - see [#827](https://github.com/nf-core/rnaseq/issues/827).", + "description": "Generate output stats when running \"umi_tools dedup\"." + } + } + }, + "transcript_identification_options": { + "title": "Transcript Identification Options", + "type": "object", + "description": "Type of experiment to use for Transcript Identification(NT or TSS)", + "properties": { + "assay_type": { + "type": "string", + "fa_icon": "fas fa-vial", + "enum": [ + "CoPRO", + "GROcap", + "PROcap", + "CAGE", + "NETCAGE", + "RAMPAGE", + "csRNAseq", + "STRIPEseq", + "PROseq", + "GROseq", + "R_5", + "R_3", + "R1_5", + "R1_3", + "R2_5", + "R2_3" + ] + }, + "skip_tuning": { + "type": "boolean", + "description": "Skip grohmm tuning step as it can take a long time.", + "fa_icon": "fas fa-wrench", + "hidden": true + }, + "skip_grohmm": { + "type": "boolean", + "fa_icon": "fas fa-eye-slash" + }, + "tuning_file": { + "type": "string", + "format": "file-path", + "mimetype": "text/csv", + "pattern": "^\\S+\\.csv$", + "hidden": true, + "fa_icon": "fas fa-file-csv", + "description": "File of parameters to test for groHMM tuning" + }, + "filter_bed": { + "type": "string", + "fa_icon": "fas fa-filter", + "pattern": "^\\S+\\.bed(\\.gz)?$" + }, + "no_overlap": { + "type": "boolean", + "default": true, + "fa_icon": "far fa-chart-bar", + "description": "Only report those entries in A that have no overlap in B" + } + }, + "required": ["assay_type"], + "fa_icon": "fas fa-microscope" + }, + "reference_genome_options": { + "title": "Reference genome options", + "type": "object", + "fa_icon": "fas fa-dna", + "description": "Reference genome related files and options required for the workflow.", + "properties": { + "genome": { + "type": "string", + "description": "Name of iGenomes reference.", + "fa_icon": "fas fa-book", + "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." + }, + "fasta": { + "type": "string", + "format": "file-path", + "mimetype": "text/plain", + "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", + "description": "Path to FASTA genome file.", + "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", + "fa_icon": "far fa-file-code" + }, + "gtf": { + "type": "string", + "format": "file-path", + "mimetype": "text/plain", + "pattern": "^\\S+\\.gtf(\\.gz)?$", + "description": "Path to GTF annotation file.", + "fa_icon": "fas fa-code-branch", + "help_text": "This parameter is *mandatory* if `--genome` is not specified." + }, + "gff": { + "type": "string", + "format": "file-path", + "mimetype": "text/plain", + "pattern": "^\\S+\\.gff(\\.gz)?$", + "fa_icon": "fas fa-code-branch", + "description": "Path to GFF3 annotation file.", + "help_text": "This parameter must be specified if `--genome` or `--gtf` are not specified." + }, + "gene_bed": { + "type": "string", + "format": "file-path", + "mimetype": "text/plain", + "pattern": "^\\S+\\.bed(\\.gz)?$", + "fa_icon": "fas fa-procedures", + "description": "Path to BED file containing gene intervals. This will be created from the GTF file if not specified." + }, + "bwa_index": { + "type": "string", + "description": "Path to BWA mem indices.", + "fa_icon": "fas fa-copy", + "help_text": "> **NB** If none provided, will be generated automatically from the FASTA reference." + }, + "bwamem2_index": { + "type": "string", + "description": "Path to bwa-mem2 mem indices.", + "fa_icon": "fas fa-copy", + "help_text": "> **NB** If none provided, will be generated automatically from the FASTA reference.", + "hidden": true + }, + "dragmap": { + "type": "string", + "fa_icon": "fas fa-copy", + "description": "Path to dragmap indices.", + "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\n\nIf you wish to recompute indices available on igenomes, set `--dragmap false`.\n\n> **NB** If none provided, will be generated automatically from the FASTA reference, if `--aligner dragmap` is specified. Combine with `--save_reference` to save for future runs.", + "hidden": true + }, + "save_reference": { + "type": "boolean", + "description": "If generated by the pipeline save the BWA index in the results directory.", + "help_text": "If an alignment index is generated by the pipeline use this parameter to save it to your results folder. These can then be used for future pipeline runs, reducing processing times.", + "fa_icon": "fas fa-save" + }, + "igenomes_base": { + "type": "string", + "format": "directory-path", + "description": "Directory / URL base for iGenomes references.", + "default": "s3://ngi-igenomes/igenomes", + "fa_icon": "fas fa-cloud-download-alt", + "hidden": true + }, + "igenomes_ignore": { + "type": "boolean", + "description": "Do not load the iGenomes reference config.", + "fa_icon": "fas fa-ban", + "hidden": true, + "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." + } + } + }, + "institutional_config_options": { + "title": "Institutional config options", + "type": "object", + "fa_icon": "fas fa-university", + "description": "Parameters used to describe centralised config profiles. These should not be edited.", + "help_text": "The centralised nf-core configuration profiles use a handful of pipeline parameters to describe themselves. This information is then printed to the Nextflow log when you run a pipeline. You should not need to change these values when you run a pipeline.", + "properties": { + "custom_config_version": { + "type": "string", + "description": "Git commit id for Institutional configs.", + "default": "master", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "custom_config_base": { + "type": "string", + "description": "Base directory for Institutional configs.", + "default": "https://raw.githubusercontent.com/nf-core/configs/master", + "hidden": true, + "help_text": "If you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.", + "fa_icon": "fas fa-users-cog" + }, + "config_profile_name": { + "type": "string", + "description": "Institutional config name.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_description": { + "type": "string", + "description": "Institutional config description.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_contact": { + "type": "string", + "description": "Institutional config contact information.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_url": { + "type": "string", + "description": "Institutional config URL link.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + } + } + }, + "max_job_request_options": { + "title": "Max job request options", + "type": "object", + "fa_icon": "fab fa-acquisitions-incorporated", + "description": "Set the top limit for requested resources for any single job.", + "help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.", + "properties": { + "max_cpus": { + "type": "integer", + "description": "Maximum number of CPUs that can be requested for any single job.", + "default": 16, + "fa_icon": "fas fa-microchip", + "hidden": true, + "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`" + }, + "max_memory": { + "type": "string", + "description": "Maximum amount of memory that can be requested for any single job.", + "default": "128.GB", + "fa_icon": "fas fa-memory", + "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", + "hidden": true, + "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`" + }, + "max_time": { + "type": "string", + "description": "Maximum amount of time that can be requested for any single job.", + "default": "240.h", + "fa_icon": "far fa-clock", + "pattern": "^(\\d+\\.?\\s*(s|m|h|day)\\s*)+$", + "hidden": true, + "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" + } + } + }, + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "properties": { + "help": { + "type": "boolean", + "description": "Display help text.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "hidden": true + }, + "email_on_fail": { + "type": "string", + "description": "Email address for completion summary, only when pipeline fails.", + "fa_icon": "fas fa-exclamation-triangle", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", + "help_text": "An email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully.", + "hidden": true + }, + "plaintext_email": { + "type": "boolean", + "description": "Send plain-text email instead of HTML.", + "fa_icon": "fas fa-remove-format", + "hidden": true + }, + "max_multiqc_email_size": { + "type": "string", + "description": "File size limit when attaching MultiQC reports to summary emails.", + "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", + "default": "25.MB", + "fa_icon": "fas fa-file-upload", + "hidden": true + }, + "monochrome_logs": { + "type": "boolean", + "description": "Do not use coloured log outputs.", + "fa_icon": "fas fa-palette", + "hidden": true + }, + "hook_url": { + "type": "string", + "description": "Incoming hook URL for messaging service", + "fa_icon": "fas fa-people-group", + "help_text": "Incoming hook URL for messaging service. Currently, only MS Teams is supported.", + "hidden": true + }, + "multiqc_config": { + "type": "string", + "description": "Custom config file to supply to MultiQC.", + "fa_icon": "fas fa-cog", + "hidden": true + }, + "multiqc_logo": { + "type": "string", + "description": "Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file", + "fa_icon": "fas fa-image", + "hidden": true + }, + "multiqc_methods_description": { + "type": "string", + "description": "Custom MultiQC yaml file containing HTML including a methods description.", + "fa_icon": "fas fa-cog" + }, + "tracedir": { + "type": "string", + "description": "Directory to keep pipeline Nextflow logs and reports.", + "default": "${params.outdir}/pipeline_info", + "fa_icon": "fas fa-cogs", + "hidden": true + }, + "validate_params": { + "type": "boolean", + "description": "Boolean whether to validate parameters against the schema at runtime", + "default": true, + "fa_icon": "fas fa-check-square", + "hidden": true + }, + "show_hidden_params": { + "type": "boolean", + "fa_icon": "far fa-eye-slash", + "description": "Show all params when using `--help`", + "hidden": true, + "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." + }, + "enable_conda": { + "type": "boolean", + "description": "Run this workflow with Conda. You can also use '-profile conda' instead of providing this parameter.", + "hidden": true, + "fa_icon": "fas fa-bacon" + } + } + } + }, + "allOf": [ + { + "$ref": "#/definitions/input_output_options" + }, + { + "$ref": "#/definitions/alignment_options" + }, + { + "$ref": "#/definitions/umi_options" + }, + { + "$ref": "#/definitions/transcript_identification_options" + }, + { + "$ref": "#/definitions/reference_genome_options" + }, + { + "$ref": "#/definitions/institutional_config_options" + }, + { + "$ref": "#/definitions/max_job_request_options" + }, + { + "$ref": "#/definitions/generic_options" + } + ] +} diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..0d62beb6 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,10 @@ +# Config file for Python. Mostly used to configure linting of bin/check_samplesheet.py with Black. +# Should be kept the same as nf-core/tools to avoid fighting with template synchronisation. +[tool.black] +line-length = 120 +target_version = ["py37", "py38", "py39", "py310"] + +[tool.isort] +profile = "black" +known_first_party = ["nf_core"] +multi_line_output = 3 diff --git a/subworkflows/local/align_bwa/main.nf b/subworkflows/local/align_bwa/main.nf new file mode 100644 index 00000000..4e6f2c66 --- /dev/null +++ b/subworkflows/local/align_bwa/main.nf @@ -0,0 +1,39 @@ +// +// Alignment with BWA MEM +// + +include { BWA_MEM } from '../../../modules/nf-core/bwa/mem/main' +include { BAM_SORT_STATS_SAMTOOLS } from '../../nf-core/bam_sort_stats_samtools/main' + +workflow ALIGN_BWA { + take: + reads // channel: [ val(meta), [ reads ] ] + index // file: /path/to/bwa/index/ + + main: + + ch_versions = Channel.empty() + + // + // Map reads with BWA MEM + // + BWA_MEM ( reads, index, false ) + ch_versions = ch_versions.mix(BWA_MEM.out.versions.first()) + + // + // Sort, index BAM file and run samtools stats, flagstat and idxstats + // + BAM_SORT_STATS_SAMTOOLS ( BWA_MEM.out.bam, [] ) + ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) + + emit: + orig_bam = BWA_MEM.out.bam // channel: [ val(meta), bam ] + + bam = BAM_SORT_STATS_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] + bai = BAM_SORT_STATS_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] + stats = BAM_SORT_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_SORT_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_SORT_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/align_bwamem2/main.nf b/subworkflows/local/align_bwamem2/main.nf new file mode 100644 index 00000000..f6babfa6 --- /dev/null +++ b/subworkflows/local/align_bwamem2/main.nf @@ -0,0 +1,39 @@ +// +// Alignment with BWAMEM2 +// + +include { BWAMEM2_MEM } from '../../../modules/nf-core/bwamem2/mem/main' +include { BAM_SORT_STATS_SAMTOOLS } from '../../nf-core/bam_sort_stats_samtools/main' + +workflow ALIGN_BWAMEM2 { + take: + reads // channel: [ val(meta), [ reads ] ] + index // file: /path/to/bwa/index/ + + main: + + ch_versions = Channel.empty() + + // + // Map reads with BWAMEM2 + // + BWAMEM2_MEM ( reads, index, false ) + ch_versions = ch_versions.mix(BWAMEM2_MEM.out.versions.first()) + + // + // Sort, index BAM file and run samtools stats, flagstat and idxstats + // + BAM_SORT_STATS_SAMTOOLS ( BWAMEM2_MEM.out.bam, [] ) + ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) + + emit: + orig_bam = BWAMEM2_MEM.out.bam // channel: [ val(meta), bam ] + + bam = BAM_SORT_STATS_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] + bai = BAM_SORT_STATS_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] + stats = BAM_SORT_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_SORT_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_SORT_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/align_dragmap/main.nf b/subworkflows/local/align_dragmap/main.nf new file mode 100644 index 00000000..4cfac1dc --- /dev/null +++ b/subworkflows/local/align_dragmap/main.nf @@ -0,0 +1,39 @@ +// +// Alignment with dragmap +// + +include { DRAGMAP_ALIGN } from '../../../modules/nf-core/dragmap/align/main' +include { BAM_SORT_STATS_SAMTOOLS } from '../../nf-core/bam_sort_stats_samtools/main' + +workflow ALIGN_DRAGMAP { + take: + reads // channel: [ val(meta), [ reads ] ] + index // file: /path/to/bwa/index/ + + main: + + ch_versions = Channel.empty() + + // + // Map reads with BWA MEM + // + DRAGMAP_ALIGN ( reads, index, false ) + ch_versions = ch_versions.mix(DRAGMAP_ALIGN.out.versions.first()) + + // + // Sort, index BAM file and run samtools stats, flagstat and idxstats + // + BAM_SORT_STATS_SAMTOOLS ( DRAGMAP_ALIGN.out.bam, [] ) + ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) + + emit: + orig_bam = DRAGMAP_ALIGN.out.bam // channel: [ val(meta), bam ] + + bam = BAM_SORT_STATS_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] + bai = BAM_SORT_STATS_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] + stats = BAM_SORT_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_SORT_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_SORT_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/coverage_graphs.nf b/subworkflows/local/coverage_graphs.nf new file mode 100644 index 00000000..a64fdc29 --- /dev/null +++ b/subworkflows/local/coverage_graphs.nf @@ -0,0 +1,67 @@ +/* + * Create bigWig and bedGraph files + */ + +include { + BEDTOOLS_GENOMECOV as BEDTOOLS_GENOMECOV_PLUS + BEDTOOLS_GENOMECOV as BEDTOOLS_GENOMECOV_MINUS } from '../../modules/nf-core/bedtools/genomecov/main' + +include { + DEEPTOOLS_BAMCOVERAGE as DEEPTOOLS_BAMCOVERAGE_PLUS + DEEPTOOLS_BAMCOVERAGE as DEEPTOOLS_BAMCOVERAGE_MINUS } from '../../modules/nf-core/deeptools/bamcoverage/main' + +workflow COVERAGE_GRAPHS { + take: + bam + bai + sizes + fasta + fai + + main: + + ch_versions = Channel.empty() + + ch_genomecov_bam = bam.combine(Channel.from(1)) + + BEDTOOLS_GENOMECOV_PLUS ( + ch_genomecov_bam, + [], + 'bedGraph' + ) + ch_versions = ch_versions.mix(BEDTOOLS_GENOMECOV_PLUS.out.versions.first()) + + BEDTOOLS_GENOMECOV_MINUS ( + ch_genomecov_bam, + [], + 'bedGraph' + ) + ch_versions = ch_versions.mix(BEDTOOLS_GENOMECOV_MINUS.out.versions.first()) + + + bam.join(bai, by: [0], remainder: true).set { ch_bam_bai } + + DEEPTOOLS_BAMCOVERAGE_PLUS ( + ch_bam_bai, + fasta, + fai + ) + ch_versions = ch_versions.mix(DEEPTOOLS_BAMCOVERAGE_PLUS.out.versions.first()) + + DEEPTOOLS_BAMCOVERAGE_MINUS ( + ch_bam_bai, + fasta, + fai + ) + ch_versions = ch_versions.mix(DEEPTOOLS_BAMCOVERAGE_MINUS.out.versions.first()) + + ch_plus_minus = DEEPTOOLS_BAMCOVERAGE_PLUS.out.bigwig.join(DEEPTOOLS_BAMCOVERAGE_MINUS.out.bigwig) + + emit: + plus_bedGraph = BEDTOOLS_GENOMECOV_PLUS.out.genomecov + minus_bedGraph = BEDTOOLS_GENOMECOV_MINUS.out.genomecov + + plus_minus = ch_plus_minus + + versions = ch_versions +} diff --git a/subworkflows/local/grohmm.nf b/subworkflows/local/grohmm.nf new file mode 100644 index 00000000..4db719e5 --- /dev/null +++ b/subworkflows/local/grohmm.nf @@ -0,0 +1,46 @@ +/* + * Run parametertuning optionally, otherwise just run transcript calling + */ + +include { GROHMM_TRANSCRIPTCALLING } from '../../modules/local/grohmm/transcriptcalling/main.nf' +include { GROHMM_PARAMETERTUNING } from '../../modules/local/grohmm/parametertuning/main.nf' + +/* + * Note meta refers to all merged files + */ +workflow GROHMM { + take: + bams + gtf + tuning_file + + main: + + ch_versions = Channel.empty() + + ch_tuning = [] + + if(!params.skip_tuning) { + GROHMM_PARAMETERTUNING ( + bams, + gtf, + tuning_file + ) + ch_tuning = GROHMM_PARAMETERTUNING.out.tuning + ch_versions = ch_versions.mix(GROHMM_PARAMETERTUNING.out.versions.first()) + } + + GROHMM_TRANSCRIPTCALLING ( + bams, + gtf, + ch_tuning + ) + ch_versions = ch_versions.mix(GROHMM_TRANSCRIPTCALLING.out.versions.first()) + + emit: + transcripts = GROHMM_TRANSCRIPTCALLING.out.transcripts + bed = GROHMM_TRANSCRIPTCALLING.out.transcripts_bed + td_plot = GROHMM_TRANSCRIPTCALLING.out.td_plot + + versions = ch_versions +} diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf new file mode 100644 index 00000000..0aecf87f --- /dev/null +++ b/subworkflows/local/input_check.nf @@ -0,0 +1,44 @@ +// +// Check input samplesheet and get read channels +// + +include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check' + +workflow INPUT_CHECK { + take: + samplesheet // file: /path/to/samplesheet.csv + + main: + SAMPLESHEET_CHECK ( samplesheet ) + .csv + .splitCsv ( header:true, sep:',' ) + .map { create_fastq_channel(it) } + .set { reads } + + emit: + reads // channel: [ val(meta), [ reads ] ] + versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ] +} + +// Function to get list of [ meta, [ fastq_1, fastq_2 ] ] +def create_fastq_channel(LinkedHashMap row) { + // create meta map + def meta = [:] + meta.id = row.sample + meta.single_end = row.single_end.toBoolean() + + // add path(s) of the fastq file(s) to the meta map + def fastq_meta = [] + if (!file(row.fastq_1).exists()) { + exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}" + } + if (meta.single_end) { + fastq_meta = [ meta, [ file(row.fastq_1) ] ] + } else { + if (!file(row.fastq_2).exists()) { + exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}" + } + fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ] + } + return fastq_meta +} diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf new file mode 100644 index 00000000..26475c61 --- /dev/null +++ b/subworkflows/local/prepare_genome.nf @@ -0,0 +1,139 @@ +// +// Uncompress and prepare reference genome files +// + +include { GTF2BED } from '../../modules/local/gtf2bed' +include { GTF_GENE_FILTER } from '../../modules/local/gtf_gene_filter' + +include { + GUNZIP as GUNZIP_FASTA + GUNZIP as GUNZIP_GTF + GUNZIP as GUNZIP_GFF + GUNZIP as GUNZIP_GENE_BED } from '../../modules/nf-core/gunzip/main' +include { UNTAR as UNTAR_BWA_INDEX + UNTAR as UNTAR_DRAGMAP } from '../../modules/nf-core/untar/main' +include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/faidx/main' +include { GFFREAD } from '../../modules/nf-core/gffread/main' +include { BWA_INDEX } from '../../modules/nf-core/bwa/index/main' +include { BWAMEM2_INDEX } from '../../modules/nf-core/bwamem2/index/main' +include { DRAGMAP_HASHTABLE } from '../../modules/nf-core/dragmap/hashtable/main' +include { CUSTOM_GETCHROMSIZES } from '../../modules/nf-core/custom/getchromsizes/main' + +workflow PREPARE_GENOME { + take: + prepare_tool_indices + + main: + + ch_versions = Channel.empty() + + // + // Uncompress genome fasta file if required + // + if (params.fasta.endsWith('.gz')) { + ch_fasta = GUNZIP_FASTA ( [ [:], params.fasta ] ).gunzip.map { it[1] } + ch_versions = ch_versions.mix(GUNZIP_FASTA.out.versions) + } else { + ch_fasta = file(params.fasta) + } + + // Create Fai file + ch_fai = SAMTOOLS_FAIDX( [ [:], ch_fasta ] ).fai.map { it[1] } + ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions) + + // + // Uncompress GTF annotation file or create from GFF3 if required + // + if (params.gtf) { + if (params.gtf.endsWith('.gz')) { + ch_gtf = GUNZIP_GTF ( [ [:], params.gtf ] ).gunzip.map { it[1] } + ch_versions = ch_versions.mix(GUNZIP_GTF.out.versions) + } else { + ch_gtf = file(params.gtf) + } + } else if (params.gff) { + if (params.gff.endsWith('.gz')) { + ch_gff = GUNZIP_GFF ( [ [:], params.gff ] ).gunzip.map { it[1] } + ch_versions = ch_versions.mix(GUNZIP_GFF.out.versions) + } else { + ch_gff = file(params.gff) + } + ch_gtf = GFFREAD ( ch_gff ).gtf + ch_versions = ch_versions.mix(GFFREAD.out.versions) + } + + // + // Uncompress gene BED annotation file or create from GTF if required + // + if (params.gene_bed) { + if (params.gene_bed.endsWith('.gz')) { + ch_gene_bed = GUNZIP_GENE_BED ( [ [:], params.gene_bed ] ).gunzip.map { it[1] } + ch_versions = ch_versions.mix(GUNZIP_GENE_BED.out.versions) + } else { + ch_gene_bed = file(params.gene_bed) + } + } else { + ch_gene_bed = GTF2BED ( ch_gtf ).bed + ch_versions = ch_versions.mix(GTF2BED.out.versions) + } + + // + // Create chromosome sizes file + // + ch_chrom_sizes = CUSTOM_GETCHROMSIZES ( [ [:], ch_fasta ] ).sizes + ch_versions = ch_versions.mix(CUSTOM_GETCHROMSIZES.out.versions) + + // + // Uncompress BWA index or generate from scratch if required + // + ch_bwa_index = Channel.empty() + ch_dragmap = Channel.empty() + if ('bwa' in prepare_tool_indices) { + if (params.bwa_index) { + if (params.bwa_index.endsWith('.tar.gz')) { + ch_bwa_index = UNTAR_BWA_INDEX ( params.bwa_index ).untar + ch_versions = ch_versions.mix(UNTAR_BWA_INDEX.out.versions) + } else { + ch_bwa_index = file(params.bwa_index) + } + } else { + ch_bwa_index = BWA_INDEX ( ch_fasta ).index + ch_versions = ch_versions.mix(BWA_INDEX.out.versions) + } + } else if ('bwamem2' in prepare_tool_indices) { + if (params.bwamem2_index) { + if (params.bwamem2_index.endsWith('.tar.gz') || params.bwamem2_index.endsWith('.tgz')) { + ch_bwa_index = UNTAR_BWA_INDEX ( [ [:], params.bwamem2_index ] ).untar + ch_versions = ch_versions.mix(UNTAR_BWA_INDEX.out.versions) + } else { + ch_bwa_index = file(params.bwamem2_index) + } + } else { + ch_bwa_index = BWAMEM2_INDEX ( [ [:], ch_fasta ] ).index + ch_versions = ch_versions.mix(BWAMEM2_INDEX.out.versions) + } + } else if ('dragmap' in prepare_tool_indices) { + if (params.dragmap) { + if (params.dragmap.endsWith('.tar.gz')) { + ch_bwa_index = UNTAR_DRAGMAP_INDEX ( params.dragmap ).untar + ch_versions = ch_versions.mix(UNTAR_DRAGMAP_INDEX.out.versions) + } else { + ch_bwa_index = file(params.dragmap) + } + } else { + ch_dragmap = DRAGMAP_HASHTABLE( ch_fasta ).hashmap + ch_versions = ch_versions.mix(DRAGMAP_HASHTABLE.out.versions) + } + } + + emit: + fasta = ch_fasta + fai = ch_fai + gtf = ch_gtf + gene_bed = ch_gene_bed + chrom_sizes = ch_chrom_sizes + bwa_index = ch_bwa_index + dragmap = ch_dragmap + + versions = ch_versions.ifEmpty(null) +} diff --git a/subworkflows/local/quality_control.nf b/subworkflows/local/quality_control.nf new file mode 100644 index 00000000..dd438bcc --- /dev/null +++ b/subworkflows/local/quality_control.nf @@ -0,0 +1,56 @@ +include { PRESEQ_CCURVE } from '../../modules/nf-core/preseq/ccurve/main' +include { PRESEQ_LCEXTRAP } from '../../modules/nf-core/preseq/lcextrap/main' +include { RSEQC_READDISTRIBUTION } from '../../modules/nf-core/rseqc/readdistribution/main' +include { RSEQC_READDUPLICATION } from '../../modules/nf-core/rseqc/readduplication/main' +include { RSEQC_INFEREXPERIMENT } from '../../modules/nf-core/rseqc/inferexperiment/main' +include { BBMAP_PILEUP } from '../../modules/nf-core/bbmap/pileup/main' + +workflow QUALITY_CONTROL { + take: + bam + bed + + main: + + ch_versions = Channel.empty() + + PRESEQ_CCURVE ( bam ) + ch_versions = ch_versions.mix(PRESEQ_CCURVE.out.versions.first()) + + PRESEQ_LCEXTRAP ( bam ) + ch_versions = ch_versions.mix(PRESEQ_LCEXTRAP.out.versions.first()) + + RSEQC_READDISTRIBUTION ( + bam, + bed + ) + ch_versions = ch_versions.mix(RSEQC_READDISTRIBUTION.out.versions.first()) + + RSEQC_READDUPLICATION ( bam ) + ch_versions = ch_versions.mix(RSEQC_READDUPLICATION.out.versions.first()) + + RSEQC_INFEREXPERIMENT ( + bam, + bed + ) + ch_versions = ch_versions.mix(RSEQC_INFEREXPERIMENT.out.versions.first()) + + BBMAP_PILEUP ( bam ) + ch_versions = ch_versions.mix(BBMAP_PILEUP.out.versions.first()) + + emit: + preseq_ccurve = PRESEQ_CCURVE.out.c_curve + preseq_lcextrap = PRESEQ_LCEXTRAP.out.lc_extrap + + readdistribution_txt = RSEQC_READDISTRIBUTION.out.txt + readduplication_seq_xls = RSEQC_READDUPLICATION.out.seq_xls + readduplication_pos_xls = RSEQC_READDUPLICATION.out.pos_xls + readduplication_pdf = RSEQC_READDUPLICATION.out.pdf + readduplication_rscript = RSEQC_READDUPLICATION.out.rscript + inferexperiment_txt = RSEQC_INFEREXPERIMENT.out.txt + + pileup_stats = BBMAP_PILEUP.out.covstats + pileup_hist = BBMAP_PILEUP.out.hist + + versions = ch_versions +} diff --git a/subworkflows/local/transcript_identification.nf b/subworkflows/local/transcript_identification.nf new file mode 100644 index 00000000..d7c5d422 --- /dev/null +++ b/subworkflows/local/transcript_identification.nf @@ -0,0 +1,82 @@ +/* + * Calls Transcripts and Transcript Start Sites and various cleaning steps + */ + +include { GROHMM } from './grohmm' + +include { PINTS_CALLER } from '../../modules/nf-core/pints/caller/main' +include { CAT_CAT } from '../../modules/nf-core/cat/cat/main' +include { BEDTOOLS_MERGE } from '../../modules/nf-core/bedtools/merge/main' +include { BEDTOOLS_SORT } from '../../modules/nf-core/bedtools/sort/main' +include { BEDTOOLS_INTERSECT as BEDTOOLS_INTERSECT_FILTER } from '../../modules/nf-core/bedtools/intersect/main' + +include { HOMER_GROSEQ } from '../nf-core/homer/groseq/main' + +workflow TRANSCRIPT_INDENTIFICATION { + take: + group_bams + gtf + fasta + + main: + + ch_versions = Channel.empty() + ch_identification_bed = Channel.empty() + + ch_tuning_file = params.tuning_file ? file(params.tuning_file, checkIfExists: true) : file("${projectDir}/assets/tuningparamstotest.csv") + grohmm_td_plot = Channel.empty() + if(!params.skip_grohmm && params.assay_type == "GROseq") { + GROHMM ( group_bams, gtf, ch_tuning_file ) + ch_identification_bed = ch_identification_bed.mix(GROHMM.out.bed) + grohmm_td_plot = GROHMM.out.td_plot + ch_versions = ch_versions.mix(GROHMM.out.versions.first()) + } + + + if(params.assay_type == "GROseq") { + HOMER_GROSEQ ( group_bams, fasta ) + ch_identification_bed = ch_identification_bed.mix(HOMER_GROSEQ.out.bed) + homer_peaks = HOMER_GROSEQ.out.peaks + homer_tagdir = HOMER_GROSEQ.out.tagdir + ch_versions = ch_versions.mix(HOMER_GROSEQ.out.versions.first()) + } + + + // TODO https://github.com/hyulab/PINTS/issues/15 + PINTS_CALLER ( group_bams ) + ch_versions = ch_versions.mix(PINTS_CALLER.out.versions.first()) + // HACK Not sure if this is as good as reporting all of them, but it should + // reduce the overall noise. + CAT_CAT ( PINTS_CALLER.out.bidirectional_TREs ) + ch_versions = ch_versions.mix(CAT_CAT.out.versions.first()) + BEDTOOLS_SORT ( CAT_CAT.out.file_out, "bed" ) + ch_versions = ch_versions.mix(BEDTOOLS_SORT.out.versions.first()) + BEDTOOLS_MERGE ( BEDTOOLS_SORT.out.sorted ) + ch_identification_bed = ch_identification_bed.mix(BEDTOOLS_MERGE.out.bed) + ch_versions = ch_versions.mix(BEDTOOLS_MERGE.out.versions.first()) + + if(params.filter_bed) { + ch_filter_bed = Channel.from(params.filter_bed) + BEDTOOLS_INTERSECT_FILTER ( ch_identification_bed.combine(ch_filter_bed), "bed" ) + ch_versions = ch_versions.mix(BEDTOOLS_INTERSECT_FILTER.out.versions.first()) + } + + // Use non-filtered bed files if we skip filtering + if(!params.filter_bed) { + ch_identification_bed = BEDTOOLS_INTERSECT_FILTER.out.intersect + } + + ch_identification_bed + // Drop any empty bed files + .filter { meta, bed -> bed.size() > 0 } + .set { ch_identification_bed_clean } + + emit: + grohmm_td_plot + homer_peaks + homer_tagdir + + transcript_beds = ch_identification_bed_clean + + versions = ch_versions +} diff --git a/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/main.nf b/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/main.nf new file mode 100644 index 00000000..9d4294f1 --- /dev/null +++ b/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/main.nf @@ -0,0 +1,56 @@ +// +// UMI-tools dedup, index BAM file and run samtools stats, flagstat and idxstats +// + +include { UMITOOLS_DEDUP } from '../../../modules/nf-core/umitools/dedup/main' +include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' +include { BAM_STATS_SAMTOOLS } from '../bam_stats_samtools/main' + +workflow BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS { + take: + bam_bai // channel: [ val(meta), [ bam ], [ bai/csi ] ] + get_dedup_stats // boolean: true/false + + main: + + ch_versions = Channel.empty() + + // + // UMI-tools dedup + // + UMITOOLS_DEDUP ( bam_bai, get_dedup_stats ) + ch_versions = ch_versions.mix(UMITOOLS_DEDUP.out.versions.first()) + + // + // Index BAM file and run samtools stats, flagstat and idxstats + // + SAMTOOLS_INDEX ( UMITOOLS_DEDUP.out.bam ) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + + UMITOOLS_DEDUP.out.bam + .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true) + .join(SAMTOOLS_INDEX.out.csi, by: [0], remainder: true) + .map { + meta, bam, bai, csi -> + if (bai) { + [ meta, bam, bai ] + } else { + [ meta, bam, csi ] + } + } + .set { ch_bam_bai } + + BAM_STATS_SAMTOOLS ( ch_bam_bai, [] ) + ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) + + emit: + bam = UMITOOLS_DEDUP.out.bam // channel: [ val(meta), [ bam ] ] + + bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] + csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), [ csi ] ] + stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/meta.yml b/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/meta.yml new file mode 100644 index 00000000..a3b29479 --- /dev/null +++ b/subworkflows/nf-core/bam_dedup_stats_samtools_umitools/meta.yml @@ -0,0 +1,56 @@ +name: "bam_dedup_stats_samtools_umitools" +description: UMI-tools dedup, index BAM file and run samtools stats, flagstat and idxstats +keywords: + - umi + - dedup + - index + - bam + - sam + - cram +modules: + - umitools/dedup + - samtools/index + - samtools/stats + - samtools/idxstats + - samtools/flagstat +input: + - bam_bai: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - get_dedup_stats: + type: boolean + description: | + Generate output stats when running "umi_tools dedup" +output: + - bam: + type: file + description: Umi deduplicated BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: Umi deduplicated BAM/CRAM/SAM samtools index + pattern: "*.{bai,crai,sai}" + - csi: + type: file + description: CSI samtools index + pattern: "*.csi" + - stats: + type: file + description: File containing samtools stats output + pattern: "*.{stats}" + - flagstat: + type: file + description: File containing samtools flagstat output + pattern: "*.{flagstat}" + - idxstats: + type: file + description: File containing samtools idxstats output + pattern: "*.{idxstats}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@KamilMaliszArdigen" diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/main.nf b/subworkflows/nf-core/bam_sort_stats_samtools/main.nf new file mode 100644 index 00000000..617871fe --- /dev/null +++ b/subworkflows/nf-core/bam_sort_stats_samtools/main.nf @@ -0,0 +1,50 @@ +// +// Sort, index BAM file and run samtools stats, flagstat and idxstats +// + +include { SAMTOOLS_SORT } from '../../../modules/nf-core/samtools/sort/main' +include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' +include { BAM_STATS_SAMTOOLS } from '../bam_stats_samtools/main' + +workflow BAM_SORT_STATS_SAMTOOLS { + take: + ch_bam // channel: [ val(meta), [ bam ] ] + ch_fasta // channel: [ fasta ] + + main: + + ch_versions = Channel.empty() + + SAMTOOLS_SORT ( ch_bam ) + ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions.first()) + + SAMTOOLS_INDEX ( SAMTOOLS_SORT.out.bam ) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + + SAMTOOLS_SORT.out.bam + .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true) + .join(SAMTOOLS_INDEX.out.csi, by: [0], remainder: true) + .map { + meta, bam, bai, csi -> + if (bai) { + [ meta, bam, bai ] + } else { + [ meta, bam, csi ] + } + } + .set { ch_bam_bai } + + BAM_STATS_SAMTOOLS ( ch_bam_bai, ch_fasta ) + ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) + + emit: + bam = SAMTOOLS_SORT.out.bam // channel: [ val(meta), [ bam ] ] + bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] + csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), [ csi ] ] + + stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml b/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml new file mode 100644 index 00000000..131065be --- /dev/null +++ b/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml @@ -0,0 +1,65 @@ +name: bam_sort_stats_samtools +description: Sort SAM/BAM/CRAM file +keywords: + - sort + - bam + - sam + - cram +modules: + - samtools/sort + - samtools/index + - samtools/stats + - samtools/idxstats + - samtools/flagstat +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - fasta: + type: file + description: Reference genome fasta file + pattern: "*.{fasta,fa}" +# TODO Update when we decide on a standard for subworkflow docs +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - crai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - stats: + type: file + description: File containing samtools stats output + pattern: "*.{stats}" + - flagstat: + type: file + description: File containing samtools flagstat output + pattern: "*.{flagstat}" + - idxstats: + type: file + description: File containing samtools idxstats output + pattern: "*.{idxstats}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" diff --git a/subworkflows/nf-core/bam_stats_samtools/main.nf b/subworkflows/nf-core/bam_stats_samtools/main.nf new file mode 100644 index 00000000..f4fab25b --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/main.nf @@ -0,0 +1,32 @@ +// +// Run SAMtools stats, flagstat and idxstats +// + +include { SAMTOOLS_STATS } from '../../../modules/nf-core/samtools/stats/main' +include { SAMTOOLS_IDXSTATS } from '../../../modules/nf-core/samtools/idxstats/main' +include { SAMTOOLS_FLAGSTAT } from '../../../modules/nf-core/samtools/flagstat/main' + +workflow BAM_STATS_SAMTOOLS { + take: + bam_bai // channel: [ val(meta), [ bam/cram ], [bai/csi] ] + fasta // channel: [ fasta ] + + main: + ch_versions = Channel.empty() + + SAMTOOLS_STATS ( bam_bai, fasta ) + ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions.first()) + + SAMTOOLS_FLAGSTAT ( bam_bai ) + ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions.first()) + + SAMTOOLS_IDXSTATS ( bam_bai ) + ch_versions = ch_versions.mix(SAMTOOLS_IDXSTATS.out.versions.first()) + + emit: + stats = SAMTOOLS_STATS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = SAMTOOLS_FLAGSTAT.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = SAMTOOLS_IDXSTATS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/bam_stats_samtools/meta.yml b/subworkflows/nf-core/bam_stats_samtools/meta.yml new file mode 100644 index 00000000..e87822df --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/meta.yml @@ -0,0 +1,55 @@ +name: bam_stats_samtools +description: Produces comprehensive statistics from SAM/BAM/CRAM file +keywords: + - statistics + - counts + - bam + - sam + - cram +modules: + - samtools/stats + - samtools/idxstats + - samtools/flagstat +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: Index for BAM/CRAM/SAM file + pattern: "*.{bai,crai,sai}" + - fasta: + type: file + description: Reference genome fasta file + pattern: "*.{fasta,fa}" +# TODO Update when we decide on a standard for subworkflow docs +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - stats: + type: file + description: File containing samtools stats output + pattern: "*.{stats}" + - flagstat: + type: file + description: File containing samtools flagstat output + pattern: "*.{flagstat}" + - idxstats: + type: file + description: File containing samtools idxstats output + pattern: "*.{idxstats}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" diff --git a/subworkflows/nf-core/homer/groseq/main.nf b/subworkflows/nf-core/homer/groseq/main.nf new file mode 100644 index 00000000..ad6f0bf1 --- /dev/null +++ b/subworkflows/nf-core/homer/groseq/main.nf @@ -0,0 +1,50 @@ +/* + * Identify transcripts with homer + */ + +include { HOMER_MAKETAGDIRECTORY } from '../../../../modules/nf-core/homer/maketagdirectory/main' +include { HOMER_MAKEUCSCFILE } from '../../../../modules/nf-core/homer/makeucscfile/main' +include { HOMER_FINDPEAKS } from '../../../../modules/nf-core/homer/findpeaks/main' +include { HOMER_POS2BED } from '../../../../modules/nf-core/homer/pos2bed/main' + +workflow HOMER_GROSEQ { + take: + bam // channel: [ val(meta), [ reads ] ] + fasta // file: /path/to/bwa/index/ + + main: + + ch_versions = Channel.empty() + + /* + * Create a Tag Directory From The GRO-Seq experiment + */ + HOMER_MAKETAGDIRECTORY ( bam, fasta ) + ch_versions = ch_versions.mix(HOMER_MAKETAGDIRECTORY.out.versions.first()) + + /* + * Creating UCSC Visualization Files + */ + HOMER_MAKEUCSCFILE ( HOMER_MAKETAGDIRECTORY.out.tagdir ) + ch_versions = ch_versions.mix(HOMER_MAKEUCSCFILE.out.versions.first()) + + /* + * Find transcripts directly from GRO-Seq + */ + HOMER_FINDPEAKS ( HOMER_MAKETAGDIRECTORY.out.tagdir ) + ch_versions = ch_versions.mix(HOMER_FINDPEAKS.out.versions.first()) + + /* + * Convert peak file to bed file + */ + HOMER_POS2BED ( HOMER_FINDPEAKS.out.txt ) + ch_versions = ch_versions.mix(HOMER_POS2BED.out.versions.first()) + + emit: + tagdir = HOMER_MAKETAGDIRECTORY.out.tagdir // channel: [ val(meta), [ tagdir ] ] + bed_graph = HOMER_MAKEUCSCFILE.out.bedGraph // channel: [ val(meta), [ tag_dir/*ucsc.bedGraph.gz ] ] + peaks = HOMER_FINDPEAKS.out.txt // channel: [ val(meta), [ *peaks.txt ] ] + bed = HOMER_POS2BED.out.bed // channel: [ val(meta), [ *peaks.txt ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/homer/groseq/meta.yml b/subworkflows/nf-core/homer/groseq/meta.yml new file mode 100644 index 00000000..4bd36a88 --- /dev/null +++ b/subworkflows/nf-core/homer/groseq/meta.yml @@ -0,0 +1,48 @@ +name: homer_groseq +description: Perform variant calling on a set of normal samples using mutect2 panel of normals mode. Group them into a genomicsdbworkspace using genomicsdbimport, then use this to create a panel of normals using createsomaticpanelofnormals. +keywords: + - homer + - groseq + - nascent +modules: + - homer/maketagdirectory + - homer/makeucscfile + - homer/findpeaks + - homer/pos2bed +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - input: + type: list + description: list of BAM files, also able to take SAM and BED as input + pattern: "[ *.{bam/sam/bed} ]" + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" +output: + - tagdir: + type: directory + description: The "Tag Directory" + pattern: "*_tagdir" + - bedGraph: + type: file + description: The UCSC bed graph + pattern: "*.bedGraph.gz" + - peaks: + type: file + description: The found peaks + pattern: "*.peaks.txt" + - bed: + type: file + description: A BED file of the found peaks + pattern: "*.bed" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@Emiller88" diff --git a/tests/config/nextflow.config b/tests/config/nextflow.config new file mode 100644 index 00000000..ab506980 --- /dev/null +++ b/tests/config/nextflow.config @@ -0,0 +1,35 @@ +params { + outdir = "output/" + publish_dir_mode = "copy" + enable_conda = false + singularity_pull_docker_container = false + max_cpus = 2 + max_memory = 6.GB + max_time = 6.h +} + +process { + cpus = 2 + memory = 6.GB + time = 48.h +} + +if ("$PROFILE" == "singularity") { + singularity.enabled = true + singularity.autoMounts = true +} else if ("$PROFILE" == "conda") { + params.enable_conda = true +} else { + docker.enabled = true + docker.runOptions = '-u \$(id -u):\$(id -g)' +} + +// Load test_data.config containing paths to test data +includeConfig 'test_data.config' + +// Load modules.config for default module params +includeConfig '../../conf/modules.config' + +manifest { + nextflowVersion = '!>=21.10.3' +} diff --git a/tests/config/pytest_software.yml b/tests/config/pytest_software.yml new file mode 100644 index 00000000..189adecc --- /dev/null +++ b/tests/config/pytest_software.yml @@ -0,0 +1,5 @@ +# FIXME Tests don't work because of bin/ scripts +grohmm: + - modules/local/grohmm/** + - subworkflows/local/grohmm.nf + - tests/modules/local/grohmm/** diff --git a/tests/config/test_data.config b/tests/config/test_data.config new file mode 100644 index 00000000..d154d161 --- /dev/null +++ b/tests/config/test_data.config @@ -0,0 +1,19 @@ +def test_data_dir = "${launchDir}/tests/data/" +def nf_core_modules_data = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/" +def nf_core_nascent_data = "https://raw.githubusercontent.com/nf-core/test-datasets/nascent" + +try { + includeConfig "https://raw.githubusercontent.com/nf-core/modules/master/tests/config/test_data.config" +} catch (Exception e) { + System.err.println("WARNING: Could not load nf-core/modules test data config") +} + +params { + test_data { + 'grohmm' { + s0mR1 = "https://raw.githubusercontent.com/Kraus-Lab/groHMM/master/inst/extdata/S0mR1.bam" + s40mR1 = "https://raw.githubusercontent.com/Kraus-Lab/groHMM/master/inst/extdata/S40mR1.bam" + tune_csv = "${nf_core_nascent_data}/misc/tune.csv" + } + } +} diff --git a/tests/modules/local/grohmm/main.nf b/tests/modules/local/grohmm/main.nf new file mode 100644 index 00000000..1d62210e --- /dev/null +++ b/tests/modules/local/grohmm/main.nf @@ -0,0 +1,37 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { GROHMM_MAKEUCSCFILE } from '../../../../modules/local/grohmm/makeucscfile/main.nf' +include { GROHMM_TRANSCRIPTCALLING } from '../../../../modules/local/grohmm/transcriptcalling/main.nf' +include { GROHMM_PARAMETERTUNING } from '../../../../modules/local/grohmm/parametertuning/main.nf' + +workflow test_grohmm_makeucscfile { + def input = [] + input = [[ id: 'test' ], + [ file(params.test_data['grohmm']['s0mR1'], checkIfExists: true), + file(params.test_data['grohmm']['s40mR1'], checkIfExists: true) ]] + GROHMM_MAKEUCSCFILE ( input ) +} + +workflow test_grohmm_transcriptcalling { + def input = [] + input = [[ id: 'test' ], + [ file(params.test_data['grohmm']['s0mR1'], checkIfExists: true), + file(params.test_data['grohmm']['s40mR1'], checkIfExists: true) ]] + gtf = file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true) + + GROHMM_TRANSCRIPTCALLING ( + input, + gtf, + [] + ) +} + +workflow test_grohmm_parametertuning { + def input = [] + input = [[ id: 'test' ], + [ file(params.test_data['grohmm']['s0mR1'], checkIfExists: true), + file(params.test_data['grohmm']['s40mR1'], checkIfExists: true) ]] + GROHMM_PARAMETERTUNING ( input ) +} diff --git a/tests/modules/local/grohmm/nextflow.config b/tests/modules/local/grohmm/nextflow.config new file mode 100644 index 00000000..9ae75d18 --- /dev/null +++ b/tests/modules/local/grohmm/nextflow.config @@ -0,0 +1,12 @@ +params.transcript_identification = 'grohmm' + +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: GROHMM_TRANSCRIPTCALLING { + ext.when = { params.transcript_identification == "grohmm" } + } + +} + diff --git a/tests/modules/local/grohmm/test.yml b/tests/modules/local/grohmm/test.yml new file mode 100644 index 00000000..2b41ffde --- /dev/null +++ b/tests/modules/local/grohmm/test.yml @@ -0,0 +1,30 @@ +- name: Run grohmm makeucscfile + command: nextflow run tests/modules/local/grohmm/ -entry test_grohmm_makeucscfile -c tests/config/nextflow.config -c tests/modules/local/grohmm/nextflow.config --transcript_identification grohmm + # FIXME Tests don't work because of bin/ scripts + # https://github.com/nextflow-io/nextflow/issues/1868 + # tags: + # - grohmm + # TODO Add Files + # files: + # - path: output/test_grohmm/test.transcripts.txt + # - path: output/test_grohmm/test.eval.txt + # - path: output/test_grohmm/test.fwd.wig + # - path: output/test_grohmm/test.rev.wig + # - path: output/test_grohmm/test.fwd.normalized.wig + # - path: output/test_grohmm/test.rev.normalized.wig + +- name: Run grohmm transcriptcalling + command: nextflow run tests/modules/local/grohmm/ -entry test_grohmm_transcriptcalling -c tests/config/nextflow.config -c tests/modules/local/grohmm/nextflow.config --transcript_identification grohmm + # FIXME Tests don't work because of bin/ scripts + # https://github.com/nextflow-io/nextflow/issues/1868 + # tags: + # - grohmm + # TODO Add Files + +- name: Run grohmm parametertuning + command: nextflow run tests/modules/local/grohmm/ -entry test_grohmm_parametertuning -c tests/modules/local/grohmm/nextflow.config -c tests/config/nextflow.config --transcript_identification grohmm + # FIXME Tests don't work because of bin/ scripts + # https://github.com/nextflow-io/nextflow/issues/1868 + # tags: + # - grohmm + # TODO Add Files diff --git a/tests/samplesheets/copro.csv b/tests/samplesheets/copro.csv new file mode 100644 index 00000000..896fd74d --- /dev/null +++ b/tests/samplesheets/copro.csv @@ -0,0 +1,3 @@ +sample,fastq_1,fastq_2 +caco2_REP1,https://www.encodeproject.org/files/ENCFF594DTA/@@download/ENCFF594DTA.fastq.gz,https://www.encodeproject.org/files/ENCFF669OMY/@@download/ENCFF669OMY.fastq.gz +caco2_REP2,https://www.encodeproject.org/files/ENCFF039OZP/@@download/ENCFF039OZP.fastq.gz,https://www.encodeproject.org/files/ENCFF900WVT/@@download/ENCFF900WVT.fastq.gz diff --git a/tests/samplesheets/grocap.csv b/tests/samplesheets/grocap.csv new file mode 100644 index 00000000..abd993d5 --- /dev/null +++ b/tests/samplesheets/grocap.csv @@ -0,0 +1,2 @@ +sample,fastq_1,fastq_2 +GROcap_REP1,https://www.encodeproject.org/files/ENCFF028THC/@@download/ENCFF028THC.fastq.gz, diff --git a/tests/samplesheets/region.bed b/tests/samplesheets/region.bed new file mode 100644 index 00000000..500757b8 --- /dev/null +++ b/tests/samplesheets/region.bed @@ -0,0 +1,3 @@ +chr21 29018950 29019462 +chr21 33542180 33543325 +chr21 36297960 36298439 diff --git a/tests/subworkflows/local/grohmm/main.nf b/tests/subworkflows/local/grohmm/main.nf new file mode 100644 index 00000000..b6843d88 --- /dev/null +++ b/tests/subworkflows/local/grohmm/main.nf @@ -0,0 +1,17 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { GROHMM } from '../../../../subworkflows/local/grohmm' + +workflow test_grohmm { + def input = [[ id: 'test' ], + [ file(params.test_data['grohmm']['s0mR1'], checkIfExists: true), + file(params.test_data['grohmm']['s40mR1'], checkIfExists: true) ]] + gtf = file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true) + + GROHMM ( + input, + gtf + ) +} diff --git a/tests/subworkflows/local/grohmm/test.yml b/tests/subworkflows/local/grohmm/test.yml new file mode 100644 index 00000000..1a451cfb --- /dev/null +++ b/tests/subworkflows/local/grohmm/test.yml @@ -0,0 +1,14 @@ +- name: subworkflow grohmm + command: nextflow run ./tests/subworkflows/local/grohmm/ -entry test_grohmm -c tests/config/nextflow.config -c tests/subworkflows/local/grohmm/nextflow.config --transcript_identification grohmm + # FIXME Tests don't work because of bin/ scripts + # tags: + # - grohmm + # TODO md5 sum these + files: + - path: output/grohmm/test.transcripts.txt + - path: output/grohmm/test.transcripts.bed + - path: output/grohmm/test.normalized.wig + - path: output/grohmm/test.collapsed.wig + - path: output/grohmm/test.wig + - path: output/grohmm/test.minus.wig + - path: output/grohmm/test.plus.wig diff --git a/tests/subworkflows/local/grohmm/tuningparams_small.csv b/tests/subworkflows/local/grohmm/tuningparams_small.csv new file mode 100644 index 00000000..047b4186 --- /dev/null +++ b/tests/subworkflows/local/grohmm/tuningparams_small.csv @@ -0,0 +1,3 @@ +"LtProbB","UTS" +-100,5 +-200,5 diff --git a/tests/test_default.yml b/tests/test_default.yml new file mode 100644 index 00000000..66c8ed91 --- /dev/null +++ b/tests/test_default.yml @@ -0,0 +1,10 @@ +- name: Run test for everything + command: nextflow run main.nf -profile docker,test + files: + - path: output/multiqc_report.html + - path: output/test_grohmm/test.transcripts.txt + - path: output/test_grohmm/test.eval.txt + - path: output/test_grohmm/test.fwd.wig + - path: output/test_grohmm/test.rev.wig + - path: output/test_grohmm/test.fwd.normalized.wig + - path: output/test_grohmm/test.rev.normalized.wig diff --git a/workflows/nascent.nf b/workflows/nascent.nf new file mode 100644 index 00000000..6912d1ba --- /dev/null +++ b/workflows/nascent.nf @@ -0,0 +1,314 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + VALIDATE INPUTS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +def valid_params = [ + aligners : ['bwa', 'bwamem2', 'dragmap'] +] + +def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) + +// Validate input parameters +WorkflowNascent.initialise(params, log) + +// Check input path parameters to see if they exist +def checkPathParamList = [ + params.input, + params.multiqc_config, + params.fasta, + params.gtf, + params.gff, + params.gene_bed, + params.bwa_index, + params.bwamem2_index, + params.dragmap +] +for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } + +// Check mandatory parameters +if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } + +// Check alignment parameters +def prepareToolIndices = [] +if (!params.skip_alignment) { prepareToolIndices << params.aligner } + +if (params.filter_bed) { + ch_filter_bed = file(params.filter_bed, checkIfExists: true) + // if (ch_ribo_db.isEmpty()) {exit 1, "File provided with --ribo_database_manifest is empty: ${ch_ribo_db.getName()}!"} +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + CONFIG FILES +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) +ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() +ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() +ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT LOCAL MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { BED2SAF } from '../modules/local/bed2saf' + +include { INPUT_CHECK } from '../subworkflows/local/input_check' +include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome' +include { ALIGN_BWA } from '../subworkflows/local/align_bwa/main' +include { ALIGN_BWAMEM2 } from '../subworkflows/local/align_bwamem2/main' +include { ALIGN_DRAGMAP } from '../subworkflows/local/align_dragmap/main' +include { QUALITY_CONTROL } from '../subworkflows/local/quality_control.nf' +include { COVERAGE_GRAPHS } from '../subworkflows/local/coverage_graphs.nf' +include { TRANSCRIPT_INDENTIFICATION } from '../subworkflows/local/transcript_identification.nf' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT NF-CORE MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { FASTQC } from '../modules/nf-core/fastqc/main' +include { FASTP } from '../modules/nf-core/fastp/main' +include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main' +include { + SUBREAD_FEATURECOUNTS as SUBREAD_FEATURECOUNTS_GENE + SUBREAD_FEATURECOUNTS as SUBREAD_FEATURECOUNTS_PREDICTED } from '../modules/nf-core/subread/featurecounts/main' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' + + +// +// SUBWORKFLOW: Consisting entirely of nf-core/modules +// +include { BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS } from '../subworkflows/nf-core/bam_dedup_stats_samtools_umitools/main' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// Info required for completion email and summary +def multiqc_report = [] + +workflow NASCENT { + + ch_versions = Channel.empty() + ch_nascent_logo = Channel.fromPath("$projectDir/docs/images/nf-core-nascent_logo_light.png") + + // + // SUBWORKFLOW: Uncompress and prepare reference genome files + // + PREPARE_GENOME ( + prepareToolIndices + ) + ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions.first()) + + // + // SUBWORKFLOW: Read in samplesheet, validate and stage input files + // + INPUT_CHECK ( + ch_input + ) + ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) + + // + // MODULE: Run FastQC + // + FASTQC ( + INPUT_CHECK.out.reads + ) + ch_versions = ch_versions.mix(FASTQC.out.versions.first()) + + ch_reads = Channel.empty() + if(!params.skip_trimming) { + FASTP ( INPUT_CHECK.out.reads, [], [] ) + ch_reads = FASTP.out.reads + ch_versions = ch_versions.mix(FASTP.out.versions.first()) + } else { + ch_reads = INPUT_CHECK.out.reads + } + + // + // SUBWORKFLOW: Alignment with BWA + // + ch_genome_bam = Channel.empty() + ch_genome_bai = Channel.empty() + ch_samtools_stats = Channel.empty() + ch_samtools_flagstat = Channel.empty() + ch_samtools_idxstats = Channel.empty() + ch_star_multiqc = Channel.empty() + ch_aligner_pca_multiqc = Channel.empty() + ch_aligner_clustering_multiqc = Channel.empty() + if (!params.skip_alignment && params.aligner == 'bwa') { + ALIGN_BWA( + ch_reads, + PREPARE_GENOME.out.bwa_index, + ) + ch_genome_bam = ALIGN_BWA.out.bam + ch_genome_bai = ALIGN_BWA.out.bai + ch_samtools_stats = ALIGN_BWA.out.stats + ch_samtools_flagstat = ALIGN_BWA.out.flagstat + ch_samtools_idxstats = ALIGN_BWA.out.idxstats + + ch_versions = ch_versions.mix(ALIGN_BWA.out.versions.first()) + } else if (!params.skip_alignment && params.aligner == 'bwamem2') { + ALIGN_BWAMEM2( + ch_reads, + PREPARE_GENOME.out.bwa_index, + ) + ch_genome_bam = ALIGN_BWAMEM2.out.bam + ch_genome_bai = ALIGN_BWAMEM2.out.bai + ch_samtools_stats = ALIGN_BWAMEM2.out.stats + ch_samtools_flagstat = ALIGN_BWAMEM2.out.flagstat + ch_samtools_idxstats = ALIGN_BWAMEM2.out.idxstats + + ch_versions = ch_versions.mix(ALIGN_BWAMEM2.out.versions) + } else if (!params.skip_alignment && params.aligner == 'dragmap') { + ALIGN_DRAGMAP( + ch_reads, + PREPARE_GENOME.out.dragmap + ) + ch_genome_bam = ALIGN_DRAGMAP.out.bam + ch_genome_bai = ALIGN_DRAGMAP.out.bai + ch_samtools_stats = ALIGN_DRAGMAP.out.stats + ch_samtools_flagstat = ALIGN_DRAGMAP.out.flagstat + ch_samtools_idxstats = ALIGN_DRAGMAP.out.idxstats + + ch_versions = ch_versions.mix(ALIGN_DRAGMAP.out.versions) + } + + if(params.with_umi) { + BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS ( + ch_genome_bam.join(ch_genome_bai, by: [0]), + params.umitools_dedup_stats + ) + ch_genome_bam = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS.out.bam + ch_genome_bai = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS.out.bai + ch_samtools_stats = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS.out.stats + ch_samtools_flagstat = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS.out.flagstat + ch_samtools_idxstats = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS.out.idxstats + + ch_versions = ch_versions.mix(BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS.out.versions) + } + + QUALITY_CONTROL ( + ch_genome_bam, + PREPARE_GENOME.out.gene_bed + ) + ch_versions = ch_versions.mix(QUALITY_CONTROL.out.versions) + + COVERAGE_GRAPHS ( + ch_genome_bam, + ch_genome_bai, + PREPARE_GENOME.out.chrom_sizes, + PREPARE_GENOME.out.fasta, + PREPARE_GENOME.out.fai + ) + ch_versions = ch_versions.mix(COVERAGE_GRAPHS.out.versions) + + // + // SUBWORKFLOW: Transcript indetification + // + ch_genome_bam.map { + meta, bam -> + fmeta = meta.findAll { it.key != 'read_group' } + fmeta.id = fmeta.id.split('_')[0..-3].join('_') + [ fmeta, bam ] } + .groupTuple(by: [0]) + .map { it -> [ it[0], it[1].flatten() ] } + .set { ch_sort_bam } + + TRANSCRIPT_INDENTIFICATION ( + ch_sort_bam, + PREPARE_GENOME.out.gtf, + PREPARE_GENOME.out.fasta + ) + ch_grohmm_multiqc = TRANSCRIPT_INDENTIFICATION.out.grohmm_td_plot.collect() + ch_homer_multiqc = TRANSCRIPT_INDENTIFICATION.out.homer_peaks + ch_homer_multiqc = ch_homer_multiqc.mix(TRANSCRIPT_INDENTIFICATION.out.homer_tagdir) + ch_versions = ch_versions.mix(TRANSCRIPT_INDENTIFICATION.out.versions) + + SUBREAD_FEATURECOUNTS_PREDICTED ( + ch_sort_bam.combine( + BED2SAF ( + TRANSCRIPT_INDENTIFICATION.out.transcript_beds + ).saf.map { it[1] } + ) + ) + ch_versions = ch_versions.mix(SUBREAD_FEATURECOUNTS_PREDICTED.out.versions.first()) + + SUBREAD_FEATURECOUNTS_GENE ( + ch_sort_bam.combine(PREPARE_GENOME.out.gtf) + ) + ch_versions = ch_versions.mix(SUBREAD_FEATURECOUNTS_GENE.out.versions.first()) + + CUSTOM_DUMPSOFTWAREVERSIONS ( + ch_versions.unique{ it.text }.collectFile(name: 'collated_versions.yml') + ) + + // + // MODULE: MultiQC + // + workflow_summary = WorkflowNascent.paramsSummaryMultiqc(workflow, summary_params) + ch_workflow_summary = Channel.value(workflow_summary) + + methods_description = WorkflowNascent.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description) + ch_methods_description = Channel.value(methods_description) + + ch_multiqc_files = Channel.empty() + ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(ch_samtools_stats.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(ch_samtools_flagstat.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(ch_samtools_idxstats.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(QUALITY_CONTROL.out.preseq_ccurve.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(QUALITY_CONTROL.out.preseq_lcextrap.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(QUALITY_CONTROL.out.readdistribution_txt.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(QUALITY_CONTROL.out.readduplication_seq_xls.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(QUALITY_CONTROL.out.readduplication_pos_xls.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(QUALITY_CONTROL.out.inferexperiment_txt.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(ch_grohmm_multiqc.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(ch_homer_multiqc.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(SUBREAD_FEATURECOUNTS_PREDICTED.out.summary.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(SUBREAD_FEATURECOUNTS_GENE.out.summary.collect{it[1]}.ifEmpty([])) + + MULTIQC ( + ch_multiqc_files.collect(), + ch_multiqc_config.collect().ifEmpty([]), + ch_multiqc_custom_config.collect().ifEmpty([]), + ch_multiqc_logo.collect().ifEmpty([]) + ) + multiqc_report = MULTIQC.out.report.toList() + ch_versions = ch_versions.mix(MULTIQC.out.versions) +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + COMPLETION EMAIL AND SUMMARY +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow.onComplete { + if (params.email || params.email_on_fail) { + NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) + } + NfcoreTemplate.summary(workflow, params, log) + if (params.hook_url) { + NfcoreTemplate.adaptivecard(workflow, params, summary_params, projectDir, log) + } +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/