forked from hall-lab/sv-pipeline
-
Notifications
You must be signed in to change notification settings - Fork 1
/
jes.conf
245 lines (204 loc) · 8.17 KB
/
jes.conf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
# Updated Cromwell template for JES
webservice {
port = 8000
interface = 0.0.0.0
binding-timeout = 5s
instance.name = "cromwell-for-wdl-runner"
}
akka {
dispatchers {
io-dispatcher {
type = Dispatcher
executor = "fork-join-executor"
}
api-dispatcher {
type = Dispatcher
executor = "fork-join-executor"
}
engine-dispatcher {
type = Dispatcher
executor = "fork-join-executor"
}
backend-dispatcher {
type = Dispatcher
executor = "fork-join-executor"
}
service-dispatcher {
type = Dispatcher
executor = "fork-join-executor"
}
}
}
system {
# If 'true', a SIGINT will trigger Cromwell to attempt to abort all currently running jobs before exiting
#abort-jobs-on-terminate = false
# If 'true' then when Cromwell starts up, it tries to restart incomplete workflows
workflow-restart = true
# Cromwell will cap the number of running workflows at N
max-concurrent-workflows = 5000
# Cromwell will launch up to N submitted workflows at a time, regardless of how many open workflow slots exist
max-workflow-launch-count = 50
# Number of seconds between workflow launches
new-workflow-poll-rate = 20
# Since the WorkflowLogCopyRouter is initialized in code, this is the number of workers
number-of-workflow-log-copy-workers = 10
# Default number of cache read workers
number-of-cache-read-workers = 25
io {
# Global Throttling - This is mostly useful for GCS and can be adjusted to match
# the quota availble on the GCS API
number-of-requests = 100000
per = 100 seconds
# Number of times an I/O operation should be attempted before giving up and failing it.
number-of-attempts = 5
}
}
workflow-options {
encrypted-fields: []
base64-encryption-key: "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="
workflow-log-dir: "cromwell-workflow-logs"
workflow-log-temporary: true
}
# Optional call-caching configuration.
call-caching {
enabled = true
invalidate-bad-cache-results = true
}
google {
application-name = "cromwell"
auths = [
{
name = "application-default"
scheme = "application_default"
},
]
}
docker {
hash-lookup {
// Set this to match your available quota against the Google Container Engine API
gcr-api-queries-per-100-seconds = 1000
// Time in minutes before an entry expires from the docker hashes cache and needs to be fetched again
cache-entry-ttl = "20 minutes"
// Maximum number of elements to be kept in the cache. If the limit is reached, old elements will be removed from the cache
cache-size = 200
// How should docker hashes be looked up. Possible values are "local" and "remote"
// "local": Lookup hashes on the local docker daemon using the cli
// "remote": Lookup hashes on docker hub and gcr
method = "remote"
}
}
engine {
# This instructs the engine which filesystems are at its disposal to perform any IO operation that it might need.
# For instance, WDL variables declared at the Workflow level will be evaluated using the filesystems declared here.
# If you intend to be able to run workflows with this kind of declarations:
# workflow {
# String str = read_string("gs://bucket/my-file.txt")
# }
# You will need to provide the engine with a gcs filesystem
# Note that the default filesystem (local) is always available.
filesystems {
gcs {
auth = "application-default"
}
local {
enabled: true
}
}
}
backend {
default = "JES"
providers {
JES {
actor-factory = "cromwell.backend.impl.jes.JesBackendLifecycleActorFactory"
config {
# Google project
project = "washu-genome-inh-dis-analysis"
root = "gs://ccdg-100-samples-trios-pilot-crams-mgi/workspace"
# Set this to the lower of the two values "Queries per 100 seconds" and "Queries per 100 seconds per user" for
# your project.
#
# Used to help determine maximum throughput to the Google Genomics API. Setting this value too low will
# cause a drop in performance. Setting this value too high will cause QPS based locks from Google.
# 1000 is the default "Queries per 100 seconds per user", 50000 is the default "Queries per 100 seconds"
# See https://cloud.google.com/genomics/quotas for more information
genomics-api-queries-per-100-seconds = 1000
# Polling for completion backs-off gradually for slower-running jobs.
# This is the maximum polling interval (in seconds):
maximum-polling-interval = 600
genomics {
# A reference to an auth defined in the `google` stanza at the top. This auth is used to create
# Pipelines and manipulate auth JSONs.
auth = "application-default"
// alternative service account to use on the launched compute instance
// NOTE: If combined with service account authorization, both that serivce account and this service account
// must be able to read and write to the 'root' GCS path
compute-service-account = "default"
# Endpoint for APIs, no reason to change this unless directed by Google.
endpoint-url = "https://genomics.googleapis.com/"
}
filesystems {
gcs {
# A reference to a potentially different auth for manipulating files via engine functions.
auth = "application-default"
}
}
}
}
}
}
services {
KeyValue {
class = "cromwell.services.keyvalue.impl.SqlKeyValueServiceActor"
}
MetadataService {
class = "cromwell.services.metadata.impl.MetadataServiceActor"
config {
# Set this value to "Inf" to turn off metadata summary refresh. The default value is currently "2 seconds".
# metadata-summary-refresh-interval = "Inf"
# For higher scale environments, e.g. many workflows and/or jobs, DB write performance for metadata events
# can improved by writing to the database in batches. Increasing this value can dramatically improve overall
# performance but will both lead to a higher memory usage as well as increase the risk that metadata events
# might not have been persisted in the event of a Cromwell crash.
#
# For normal usage the default value of 1 (effectively no batching) should be fine but for larger/production
# environments we recommend a value of at least 500. There'll be no one size fits all number here so we recommend
# benchmarking performance and tuning the value to match your environment
# db-batch-size = 1
#
# Periodically the stored metadata events will be forcibly written to the DB regardless of if the batch size
# has been reached. This is to prevent situations where events wind up never being written to an incomplete batch
# with no new events being generated. The default value is currently 5 seconds
# db-flush-rate = 5 seconds
}
}
}
database {
# hsql default
profile = "slick.jdbc.HsqldbProfile$"
db {
driver = "org.hsqldb.jdbcDriver"
url = "jdbc:hsqldb:mem:${uniqueSchema};shutdown=false;hsqldb.tx=mvcc"
connectionTimeout = 3000
}
# mysql example
#driver = "slick.driver.MySQLDriver$"
#db {
# driver = "com.mysql.jdbc.Driver"
# url = "jdbc:mysql://host/cromwell?rewriteBatchedStatements=true"
# user = "user"
# password = "pass"
# connectionTimeout = 5000
#}
# For batch inserts the number of inserts to send to the DB at a time
# insert-batch-size = 2000
migration {
# For databases with a very large number of symbols, selecting all the rows at once can generate a variety of
# problems. In order to avoid any issue, the selection is paginated. This value sets how many rows should be
# retrieved and processed at a time, before asking for the next chunk.
read-batch-size = 100000
# Because a symbol row can contain any arbitrary wdl value, the amount of metadata rows to insert from a single
# symbol row can vary from 1 to several thousands (or more). To keep the size of the insert batch from growing out
# of control we monitor its size and execute/commit when it reaches or exceeds writeBatchSize.
write-batch-size = 100000
}
}