diff --git a/.github/workflows/test_workflow.yml b/.github/workflows/test_workflow.yml new file mode 100644 index 0000000..3bb06df --- /dev/null +++ b/.github/workflows/test_workflow.yml @@ -0,0 +1,44 @@ +name: Test Nextflow Workflow + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +jobs: + build: + + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Python 3.9 + uses: actions/setup-python@v4 + with: + python-version: 3.9 + # - name: Install dependencies + # run: | + # # Installing Nextflow + # curl -s https://get.nextflow.io | bash + # mv nextflow /usr/local/bin + + # # Installing Conda + # wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh + # chmod +x Miniconda3-latest-Linux-x86_64.sh + # bash ./Miniconda3-latest-Linux-x86_64.sh -b -p $HOME/miniconda + # export PATH="$HOME/miniconda/bin:$PATH" + # conda init bash + # conda config --set always_yes yes --set changeps1 no + + # # Install mamba + # conda install -c conda-forge mamba + - name: Set up Conda + uses: conda-incubator/setup-miniconda@v2 + with: + activate-environment: nextflow + environment-file: test/test_environment.yml + auto-activate-base: true + - name: Test Workflow + shell: bash -el {0} + run: | + conda activate nextflow && make run \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ebc3ff2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +*.ipynb_checkpoints* +*.env +*credentials \ No newline at end of file diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..4ac8ee2 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "GNPS2_DeploymentTooling"] + path = GNPS2_DeploymentTooling + url = https://github.com/Wang-Bioinformatics-Lab/GNPS2_DeploymentTooling.git diff --git a/GNPS2_DeploymentTooling b/GNPS2_DeploymentTooling new file mode 160000 index 0000000..a92a048 --- /dev/null +++ b/GNPS2_DeploymentTooling @@ -0,0 +1 @@ +Subproject commit a92a0488cd8032ad1274b58c01d6aed47fc7c7c6 diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..0df889f --- /dev/null +++ b/Makefile @@ -0,0 +1,8 @@ +run: + nextflow run ./nf_workflow.nf -resume -c nextflow.config + +run_hpcc: + nextflow run ./nf_workflow.nf -resume -c nextflow_hpcc.config + +run_docker: + nextflow run ./nf_workflow.nf -resume -with-docker \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..b592e12 --- /dev/null +++ b/README.md @@ -0,0 +1,85 @@ +# Nextflow Template + +To run the workflow to test simply do + +``` +make run +``` + +To learn NextFlow checkout this documentation: + +https://www.nextflow.io/docs/latest/index.html + +## Installation + +You will need to have conda, mamba, and nextflow installed to run things locally. + +## GNPS2 Workflow Input information + +Check the definition for the workflow input and display parameters: +https://wang-bioinformatics-lab.github.io/GNPS2_Documentation/workflowdev/ + + +## Deployment to GNPS2 + +In order to deploy, we have a set of deployment tools that will enable deployment to the various gnps2 systems. To run the deployment, you will need the following setup steps completed: + +1. Checked out of the deployment submodules +1. Conda environment and dependencies +1. SSH configuration updated + +### Checking out the deployment submodules + +use the following commands from the deploy_gnps2 folder. + +You might need to checkout the module, do this by running + +``` +git submodule init +git submodule update +``` + +You will also need to specify the user on the server that you've been given that your public key has been associated with. If you want to not enter this every time you do a deployment, you can create a Makefile.credentials file in the deploy_gnps2 folder with the following contents + +``` +USERNAME= +``` + +### Deployment Dependencies + +You will need to install the dependencies in GNPS2_DeploymentTooling/requirements.txt on your own local machine. + +You can find this [here](https://github.com/Wang-Bioinformatics-Lab/GNPS2_DeploymentTooling). + +One way to do this is to use conda to create an environment, for example: + +``` +conda create -n deploy python=3.8 +pip install -r GNPS2_DeploymentTooling/requirements.txt +``` + +### SSH Configuration + +Also update your ssh config file to include the following ssh target: + +``` +Host ucr-gnps2-dev + Hostname ucr-lemon.duckdns.org +``` + +### Deploying to Dev Server + +To deploy to development, use the following command, if you don't have your ssh public key installed onto the server, you will not be able to deploy. + +``` +make deploy-dev +``` + +### Deploying to Production Server + +To deploy to production, use the following command, if you don't have your ssh public key installed onto the server, you will not be able to deploy. + +``` +make deploy-prod +``` + diff --git a/bin/conda_env.yml b/bin/conda_env.yml new file mode 100644 index 0000000..2dab1a1 --- /dev/null +++ b/bin/conda_env.yml @@ -0,0 +1,10 @@ +channels: + - conda-forge + - defaults +dependencies: + - python=3.8.8 + - pandas + - pip: + - xmltodict + - requests + - pyteomics \ No newline at end of file diff --git a/bin/python_script.py b/bin/python_script.py new file mode 100644 index 0000000..0e5c075 --- /dev/null +++ b/bin/python_script.py @@ -0,0 +1,20 @@ +import sys +import argparse +import pandas as pd + +def main(): + parser = argparse.ArgumentParser(description='Test write out a file.') + parser.add_argument('input_filename') + parser.add_argument('output_filename') + + args = parser.parse_args() + + df = pd.DataFrame() + df["OUTPUT"] = "1" + df["OUTPUT2"] = "2" + + # saving file + df.to_csv(args.output_filename, sep="\t", index=False) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/data/get_data.sh b/data/get_data.sh new file mode 100644 index 0000000..e69de29 diff --git a/deploy_gnps2/Makefile b/deploy_gnps2/Makefile new file mode 100644 index 0000000..c166a97 --- /dev/null +++ b/deploy_gnps2/Makefile @@ -0,0 +1,2 @@ +-include ./Makefile.credentials +include ./Makefile.deploytemplate diff --git a/deploy_gnps2/Makefile.deploytemplate b/deploy_gnps2/Makefile.deploytemplate new file mode 120000 index 0000000..2141be8 --- /dev/null +++ b/deploy_gnps2/Makefile.deploytemplate @@ -0,0 +1 @@ +../GNPS2_DeploymentTooling/Makefile.deploytemplate \ No newline at end of file diff --git a/deploy_gnps2/fabfile.py b/deploy_gnps2/fabfile.py new file mode 120000 index 0000000..485fcd0 --- /dev/null +++ b/deploy_gnps2/fabfile.py @@ -0,0 +1 @@ +../GNPS2_DeploymentTooling/fabfile.py \ No newline at end of file diff --git a/deploy_gnps2/fabric.yml b/deploy_gnps2/fabric.yml new file mode 120000 index 0000000..7ab457f --- /dev/null +++ b/deploy_gnps2/fabric.yml @@ -0,0 +1 @@ +../GNPS2_DeploymentTooling/fabric.yml \ No newline at end of file diff --git a/jupyter/.dockerignore b/jupyter/.dockerignore new file mode 100644 index 0000000..6320cd2 --- /dev/null +++ b/jupyter/.dockerignore @@ -0,0 +1 @@ +data \ No newline at end of file diff --git a/jupyter/Dockerfile b/jupyter/Dockerfile new file mode 100644 index 0000000..9be562a --- /dev/null +++ b/jupyter/Dockerfile @@ -0,0 +1,14 @@ +from jupyter/datascience-notebook:lab-3.4.4 + +COPY requirements.txt / +RUN pip install -r /requirements.txt +RUN pip install jupyterlab-git + +# Nextfow +RUN conda install -y mamba -c conda-forge +RUN mamba install -y -c bioconda nextflow + +# Installing system dependencies +USER root +RUN apt-get update && apt-get install build-essential -y +USER jovyan \ No newline at end of file diff --git a/jupyter/Dockerfile.coder b/jupyter/Dockerfile.coder new file mode 100644 index 0000000..ba36454 --- /dev/null +++ b/jupyter/Dockerfile.coder @@ -0,0 +1,17 @@ +from linuxserver/code-server:4.7.0 + +RUN apt-get update && apt-get install -y \ + git-core +RUN apt-get install build-essential wget -y + +ENV CONDA_DIR /opt/conda +RUN wget https://repo.anaconda.com/miniconda/Miniconda3-py39_4.12.0-Linux-x86_64.sh -O ~/miniconda.sh && \ + /bin/bash ~/miniconda.sh -b -p $CONDA_DIR +ENV PATH=$CONDA_DIR/bin:$PATH + +COPY requirements.txt / +RUN pip install -r /requirements.txt + +# Nextfow +RUN conda install -y mamba -c conda-forge +RUN mamba install -y -c bioconda nextflow \ No newline at end of file diff --git a/jupyter/Makefile b/jupyter/Makefile new file mode 100644 index 0000000..26e4d8b --- /dev/null +++ b/jupyter/Makefile @@ -0,0 +1,22 @@ + +jupyter-compose: + docker-compose build + docker-compose -f docker-compose.yml --compatibility up + +jupyter-compose-background: + docker-compose build + docker-compose -f docker-compose.yml --compatibility up -d + +jupyter-stop: + docker-compose -f docker-compose.yml down + +coder-compose: + docker-compose -f docker-compose-coder.yml build + docker-compose -f docker-compose-coder.yml --compatibility up + +coder-compose-background: + docker-compose -f docker-compose-coder.yml build + docker-compose -f docker-compose-coder.yml --compatibility up -d + +coder-stop: + docker-compose -f docker-compose-coder.yml down \ No newline at end of file diff --git a/jupyter/README b/jupyter/README new file mode 100644 index 0000000..cf6731a --- /dev/null +++ b/jupyter/README @@ -0,0 +1,30 @@ +## Wang Bioinformatics Lab Nextflow In Browser Dev Template + +Here we are able to launch a notebook inside of docker with all the dependencies you would want. + +### Customizing Docker for you + +1. Update dependencies - change the requirements.txt +1. Update your password - create an .env file with LOGINPASSWORD="YOUR_PASSWORD" or else it will not be available +1. Update the port for the server - change port 9000 to something around 9000, but not 9000 in docker-compose.yml or docker-compose-coder.yml +1. Change the name of the container - change wanglab-jupyter in docker-compose.yml or docker-compose-coder.yml + +### Launching the Jupyter Notebook + +make jupyter-compose + +### Launching Coder + +make coder-compose + +### Layout + +All your code for notebooks will go in src. + +All the data you'll want to work with will go into data. + +Additional documentation will go into docs. + +### Best Practices + +Commit and save changes often and push to github. \ No newline at end of file diff --git a/jupyter/docker-compose-coder.yml b/jupyter/docker-compose-coder.yml new file mode 100644 index 0000000..017299d --- /dev/null +++ b/jupyter/docker-compose-coder.yml @@ -0,0 +1,23 @@ +version: '3' +services: + wanglab-coder-workflowtemplate: + build: + context: . + dockerfile: Dockerfile.coder + container_name: wanglab-coder-workflowtemplate + volumes: + - ..:/config/workspace:rw + ports: + - "8443:8443" + restart: unless-stopped + deploy: + resources: + limits: + memory: 16000M + environment: + - PUID=1000 + - PGID=1000 + - TZ=US/Pacific + - PASSWORD=${LOGINPASSWORD:-PASSWORD} + labels: + - traefik.http.routers.backend.rule=Host(`test.wanglab.science`) \ No newline at end of file diff --git a/jupyter/docker-compose.yml b/jupyter/docker-compose.yml new file mode 100644 index 0000000..bfbfcea --- /dev/null +++ b/jupyter/docker-compose.yml @@ -0,0 +1,20 @@ + +version: '3' +services: + wanglab-jupyter-workflowtemplate: + build: + context: . + dockerfile: Dockerfile + container_name: wanglab-jupyter-workflowtemplate + volumes: + - ../:/home/jovyan/work:rw + ports: + - "9000:8888" + restart: unless-stopped + command: jupyter lab --ip='*' --port=8888 --no-browser --NotebookApp.token='${LOGINPASSWORD:-PASSWORD}' --allow-root + deploy: + resources: + limits: + memory: 16000M + labels: + - traefik.http.routers.backend.rule=Host(`test.wanglab.science`) \ No newline at end of file diff --git a/jupyter/requirements.txt b/jupyter/requirements.txt new file mode 100644 index 0000000..1411a4a --- /dev/null +++ b/jupyter/requirements.txt @@ -0,0 +1 @@ +pandas \ No newline at end of file diff --git a/nextflow.config b/nextflow.config new file mode 100644 index 0000000..8986149 --- /dev/null +++ b/nextflow.config @@ -0,0 +1,2 @@ +conda.enabled = true +conda.useMamba = true diff --git a/nextflow_slurm.config b/nextflow_slurm.config new file mode 100644 index 0000000..5a555d2 --- /dev/null +++ b/nextflow_slurm.config @@ -0,0 +1,6 @@ +conda.useMamba = true +executor{ + name='slurm' + queueSize=32 //The maximum number of tasks to queue at one time + jobName = { "-$task.name".replaceAll("\\s", "_") } +} diff --git a/nf_workflow.nf b/nf_workflow.nf new file mode 100644 index 0000000..3ca33c2 --- /dev/null +++ b/nf_workflow.nf @@ -0,0 +1,31 @@ +#!/usr/bin/env nextflow +nextflow.enable.dsl=2 + +params.input_spectra = "README.md" + +TOOL_FOLDER = "$baseDir/bin" + +process processDataPython { + publishDir "./nf_output", mode: 'copy' + + conda "$TOOL_FOLDER/conda_env.yml" + + input: + file input + + output: + file 'python_output.tsv' + + """ + python $TOOL_FOLDER/python_script.py $input python_output.tsv + """ +} + + +workflow { + data_ch = Channel.fromPath(params.input_spectra) + + // Outputting Python + processDataPython(data_ch) + +} diff --git a/requirements.txt b/requirements.txt new file mode 120000 index 0000000..3928e7a --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +jupyter/requirements.txt \ No newline at end of file diff --git a/test/test_environment.yml b/test/test_environment.yml new file mode 100644 index 0000000..9272f5b --- /dev/null +++ b/test/test_environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - nextflow + - mamba \ No newline at end of file diff --git a/workflowdisplay.yaml b/workflowdisplay.yaml new file mode 100644 index 0000000..84db10f --- /dev/null +++ b/workflowdisplay.yaml @@ -0,0 +1,19 @@ +name: template_nextflow_workflow +displayname: STUFF +Views: +- name: Summary List + displayname: Summary List + viewname: summarylist + displaytype: datatable + parameters: + filename: output/summary.tsv + columns: + - title: "View LCMS" + data: filename + - title: "filename" + data: filename + columnDefs: '[ {"targets": 0,"data": null,"render": function ( data, type, row, meta ) { + return ` + View LCMS - mzspec:GNPS2-${task}:${row["filename"]} + `;}}]' + diff --git a/workflowinput.yaml b/workflowinput.yaml new file mode 100644 index 0000000..2ad4d3b --- /dev/null +++ b/workflowinput.yaml @@ -0,0 +1,21 @@ +workflowname: template_nextflow_workflow +workflowdescription: template_nextflow_workflow +workflowlongdescription: This is a template nextflow workflow for GNPS2 +workflowversion: "0.1" +workflowfile: nf_workflow.nf +workflowautohide: false +adminonly: false +#This maps the parameters from an input form to those that will appear in nextflow +parameterlist: + - displayname: File Selection + paramtype: section + + - displayname: Input Data Folder + paramtype: fileselector + nf_paramname: input_spectra + formplaceholder: Enter the path to data + formvalue: "" + targettaskfolder: input_spectra + optional: false + selectsinglefile: false + folderunroll: true