scripts/jes.conf

# Updated Cromwell template for JES

webservice {
  port = 8000
  interface = 0.0.0.0
  binding-timeout = 5s
  instance.name = "cromwell-for-wdl-runner"
}

akka {

  dispatchers {
    io-dispatcher {
      type = Dispatcher
      executor = "fork-join-executor"
    }

    api-dispatcher {
      type = Dispatcher
      executor = "fork-join-executor"
    }
    engine-dispatcher {
      type = Dispatcher
      executor = "fork-join-executor"
    }

    backend-dispatcher {
      type = Dispatcher
      executor = "fork-join-executor"
    }

    service-dispatcher {
      type = Dispatcher
      executor = "fork-join-executor"
    }

  }
}

system {
  # If 'true', a SIGINT will trigger Cromwell to attempt to abort all currently running jobs before exiting
  #abort-jobs-on-terminate = false

  # If 'true' then when Cromwell starts up, it tries to restart incomplete workflows
  workflow-restart = true

  # Cromwell will cap the number of running workflows at N
  max-concurrent-workflows = 5000

  # Cromwell will launch up to N submitted workflows at a time, regardless of how many open workflow slots exist
  max-workflow-launch-count = 50

  # Number of seconds between workflow launches
  new-workflow-poll-rate = 20

  # Since the WorkflowLogCopyRouter is initialized in code, this is the number of workers
  number-of-workflow-log-copy-workers = 10

  # Default number of cache read workers
  number-of-cache-read-workers = 25
  
  io {
    # Global Throttling - This is mostly useful for GCS and can be adjusted to match
    # the quota availble on the GCS API
    number-of-requests = 100000
    per = 100 seconds
    
    # Number of times an I/O operation should be attempted before giving up and failing it.
    number-of-attempts = 5
  }
}

workflow-options {
  encrypted-fields: []

  base64-encryption-key: "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="

  workflow-log-dir: "cromwell-workflow-logs"

  workflow-log-temporary: true
}

# Optional call-caching configuration.
call-caching {
  enabled = true
  invalidate-bad-cache-results = true
}

google {

  application-name = "cromwell"

  auths = [
    {
      name = "application-default"
      scheme = "application_default"
    },
  ]
}

docker {
  hash-lookup {
    // Set this to match your available quota against the Google Container Engine API
    gcr-api-queries-per-100-seconds = 1000
    // Time in minutes before an entry expires from the docker hashes cache and needs to be fetched again
    cache-entry-ttl = "20 minutes"
    // Maximum number of elements to be kept in the cache. If the limit is reached, old elements will be removed from the cache
    cache-size = 200
    // How should docker hashes be looked up. Possible values are "local" and "remote"
    // "local": Lookup hashes on the local docker daemon using the cli
    // "remote": Lookup hashes on docker hub and gcr
    method = "remote"
  }
}

engine {
  # This instructs the engine which filesystems are at its disposal to perform any IO operation that it might need.
  # For instance, WDL variables declared at the Workflow level will be evaluated using the filesystems declared here.
  # If you intend to be able to run workflows with this kind of declarations:
  # workflow {
  #    String str = read_string("gs://bucket/my-file.txt")
  # }
  # You will need to provide the engine with a gcs filesystem
  # Note that the default filesystem (local) is always available.
  filesystems {
    gcs {
      auth = "application-default"
    }
    local {
      enabled: true      
    }
  }
}

backend {
  default = "JES"
  providers {
    JES {
      actor-factory = "cromwell.backend.impl.jes.JesBackendLifecycleActorFactory"
      config {
        # Google project
        project = "washu-genome-inh-dis-analysis"
        root = "gs://ccdg-100-samples-trios-pilot-crams-mgi/workspace"
    
        # Set this to the lower of the two values "Queries per 100 seconds" and "Queries per 100 seconds per user" for
        # your project.
        #
        # Used to help determine maximum throughput to the Google Genomics API. Setting this value too low will
        # cause a drop in performance. Setting this value too high will cause QPS based locks from Google.
        # 1000 is the default "Queries per 100 seconds per user", 50000 is the default "Queries per 100 seconds"
        # See https://cloud.google.com/genomics/quotas for more information
        genomics-api-queries-per-100-seconds = 1000
    
        # Polling for completion backs-off gradually for slower-running jobs.
        # This is the maximum polling interval (in seconds):
        maximum-polling-interval = 600
    
        genomics {
          # A reference to an auth defined in the `google` stanza at the top.  This auth is used to create
          # Pipelines and manipulate auth JSONs.
          auth = "application-default"
    
    
          // alternative service account to use on the launched compute instance
          // NOTE: If combined with service account authorization, both that serivce account and this service account
          // must be able to read and write to the 'root' GCS path
          compute-service-account = "default"
    
          # Endpoint for APIs, no reason to change this unless directed by Google.
          endpoint-url = "https://genomics.googleapis.com/"
        }
    
        filesystems {
          gcs {
            # A reference to a potentially different auth for manipulating files via engine functions.
            auth = "application-default"
          }
        }
    
      }
    }
  }
}

services {
  KeyValue {
    class = "cromwell.services.keyvalue.impl.SqlKeyValueServiceActor"
  }
  MetadataService {
    class = "cromwell.services.metadata.impl.MetadataServiceActor"
    config {
      # Set this value to "Inf" to turn off metadata summary refresh.  The default value is currently "2 seconds".
      # metadata-summary-refresh-interval = "Inf"
      # For higher scale environments, e.g. many workflows and/or jobs, DB write performance for metadata events
      # can improved by writing to the database in batches. Increasing this value can dramatically improve overall
      # performance but will both lead to a higher memory usage as well as increase the risk that metadata events
      # might not have been persisted in the event of a Cromwell crash.
      #
      # For normal usage the default value of 1 (effectively no batching) should be fine but for larger/production
      # environments we recommend a value of at least 500. There'll be no one size fits all number here so we recommend
      # benchmarking performance and tuning the value to match your environment
      # db-batch-size = 1
      #
      # Periodically the stored metadata events will be forcibly written to the DB regardless of if the batch size
      # has been reached. This is to prevent situations where events wind up never being written to an incomplete batch
      # with no new events being generated. The default value is currently 5 seconds
      # db-flush-rate = 5 seconds
    }
  }
}

database {
  # hsql default
  profile = "slick.jdbc.HsqldbProfile$"
  db {
    driver = "org.hsqldb.jdbcDriver"
    url = "jdbc:hsqldb:mem:${uniqueSchema};shutdown=false;hsqldb.tx=mvcc"
    connectionTimeout = 3000
  }

  # mysql example
  #driver = "slick.driver.MySQLDriver$"
  #db {
  #  driver = "com.mysql.jdbc.Driver"
  #  url = "jdbc:mysql://host/cromwell?rewriteBatchedStatements=true"
  #  user = "user"
  #  password = "pass"
  #  connectionTimeout = 5000
  #}

  # For batch inserts the number of inserts to send to the DB at a time
  # insert-batch-size = 2000

  migration {
    # For databases with a very large number of symbols, selecting all the rows at once can generate a variety of
    # problems. In order to avoid any issue, the selection is paginated. This value sets how many rows should be
    # retrieved and processed at a time, before asking for the next chunk.
    read-batch-size = 100000

    # Because a symbol row can contain any arbitrary wdl value, the amount of metadata rows to insert from a single
    # symbol row can vary from 1 to several thousands (or more). To keep the size of the insert batch from growing out
    # of control we monitor its size and execute/commit when it reaches or exceeds writeBatchSize.
    write-batch-size = 100000
  }
}