nextflow.config

params {

    // Inputs
    csv                     = null                   // the input file containing all input data
    model                   = null                   // the model file in python, the model that will be tested by this pipeline
    exp_conf                = null                   // the json config file that specifies all the parameters relative to the data manipulation
    tune_conf               = null                   // the config file with all the hyperparameter directives (choiches) and all ray tune specs
    
    // Optional inputs
    initial_weights         = null                   // the initial weights of the model. These files can be used to start the training instead of random initialization. One can provide several files, each of them will be used for a different run.

    // Output options
    outdir                  = "./results/"          // the outdir has to be the one the user specify _ the unique name of the run _ the time so that multiple runs will not overlap
    publish_dir_mode        = "copy"

    // Computational resources
    max_cpus                = 12                      // this flasg and the following are for regulating resources, profiles can overwrite these.
    max_gpus                = 1                      // requesting the gpus for the tuning steps. 
    max_memory              = 32.GB
    max_time                = "72.h"

    // Error options
    max_retries             = 0
    err_start               = 'finish'

    // Optional flags
    check_model             = true                   // flag to tell whether to check or not if the model can be tuned and trained. It does one call of the batch function, (predicting), of the model importing and using everything needed for that. Default run such a check.
    check_model_num_samples = null                   // optional flag to do a more extensive check during check_model. This will override user given num_sample value for the tune run. This will give the user control on how extensive it wants the check to be.
    shuffle                 = true                   // flag to tell wether to shuffle or not the data and run a train on it. Sanity check always run on default. (If the way we think at shuffle change maybe is better to remove this flag and make it into a parameter of the user given json for noise nad split)
    debug_mode              = false                  // flag used to switch to debug mode for the pipeline.
    
    // General
    singularity_cache_dir   = "singularity_cache"
    help                    = false
    validate_params         = true                     // tells wether or not to validate input values using nf-schema.

    // Config options
    config_profile_name        = null
    config_profile_description = null
}

// Load modules.config for DSL2 module specific options
includeConfig 'conf/modules.config'

profiles {
    docker {
        docker.enabled         = true
        docker.runOptions      = '-u $(id -u):$(id -g)'
        conda.enabled          = false
        singularity.enabled    = false
        podman.enabled         = false
        shifter.enabled        = false
        charliecloud.enabled   = false
        apptainer.enabled      = false
    }
    apptainer {
        apptainer.enabled      = true
        apptainer.autoMounts   = true
        apptainer.cacheDir     = "${params.singularity_cache_dir}"
        conda.enabled          = false
        docker.enabled         = false
        singularity.enabled    = false
        podman.enabled         = false
        shifter.enabled        = false
        charliecloud.enabled   = false
    }
    singularity {
        singularity.enabled    = true
        singularity.autoMounts = true
        singularity.cacheDir   = "${params.singularity_cache_dir}"
        conda.enabled          = false
        docker.enabled         = false
        podman.enabled         = false
        shifter.enabled        = false
        charliecloud.enabled   = false
        apptainer.enabled      = false
    }
    debug {
        dumpHashes             = true
        process.beforeScript   = 'echo $HOSTNAME'
        process.debug          = true
        cleanup                = false
        nextflow.enable.configProcessNamesValidation = true
    }
    crg          { includeConfig "conf/crg.config"        }
    crg_slurm    { includeConfig "conf/crg_slurm.config"  }
    test         { includeConfig "conf/test.config"       }
    test_learn   { includeConfig "conf/test_learn.config" }
    test_stub    { includeConfig "conf/test_stub.config"  }
    local        { includeConfig "conf/local.config"      }
}


// Nextflow plugins
plugins {
    id 'nf-schema@2.0.0' // Validation of pipeline parameters and creation of an input channel from a sample sheet
}


// trace/report options
// this will allow the pipeline to create tracing/report files with all the steps and the time/memory/cpu they took
def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')
def trace_dir = "${params.outdir}/pipeline_info"
timeline {
    enabled = true
    file    = "${trace_dir}/execution_timeline_${trace_timestamp}.html"
}
report {
    enabled = true
    file    = "${trace_dir}/execution_report_${trace_timestamp}.html"
}
trace {
    enabled = true
    file = "${trace_dir}/execution_trace_${trace_timestamp}.txt"
}
dag {
    enabled = true
    file    = "${trace_dir}/execution_dag_${trace_timestamp}.html"
}

// Function to ensure that resource requirements don't go beyond
// a maximum limit
def check_max(obj, type) {
    if (type == 'memory') {
        try {
            if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1)
                return params.max_memory as nextflow.util.MemoryUnit
            else
                return obj
        } catch (all) {
            println "   ### ERROR ###   Max memory '${params.max_memory}' is not valid! Using default value: $obj"
            return obj
        }
    } else if (type == 'time') {
        try {
            if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1)
                return params.max_time as nextflow.util.Duration
            else
                return obj
        } catch (all) {
            println "   ### ERROR ###   Max time '${params.max_time}' is not valid! Using default value: $obj"
            return obj
        }
    } else if (type == 'cpus') {
        try {
            return Math.min( obj, params.max_cpus as int )
        } catch (all) {
            println "   ### ERROR ###   Max cpus '${params.max_cpus}' is not valid! Using default value: $obj"
            return obj
        }
    }
}