This repository has been archived by the owner on May 28, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
cap3_3.yaml
435 lines (435 loc) · 24 KB
/
cap3_3.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
!mobyle/program
name: cap3
version: '3'
title: CAP3
description: Contig Assembly Program
authors: Huang, X. and Madan, A. (1999)
inputs: !mobyle/inputparagraph
children:
- !mobyle/inputprogramparameter
prompt: File of reads
format: '" " + str( value )'
simple: true
argpos: 1
mandatory: true
name: seq
command: false
type: !mobyle/formattedtype
format_terms: ['EDAM_format:2200']
data_terms: ['EDAM_data:2977']
- !mobyle/inputprogramparameter {comment: "CAP3 uses the same format of a quality\
\ file as Phrap. \n The sequence file and the corresponding quality\
\ file must be arranged \n in the same order in terms of reads,\
\ where for each read, \n the same name must be used in both\
\ files and the number of bases must \n be equal to the number\
\ of quality values.", prompt: Quality value file, format: '("" , "ln
-sf %s %s.qual && " %( value , seq ) )[value is not None and value !=
seq + ".qual" ]', argpos: -10, name: qual_file, command: false}
- !mobyle/inputprogramparameter {comment: "Each line of the constraint file specifies\
\ one forward-reverse constraint of the form:ReadA ReadB MinDistance\
\ MaxDistancewhere ReadA and ReadB are names of two reads, and MinDistance\
\ and MaxDistance are \n distances (integers) in base pairs.\
\ The constraint is satisfied if ReadA in forward orientation \n \
\ occurs in a contig before ReadB in reverse orientation, or ReadB\
\ in forward orientation occurs in \n a contig before ReadA in\
\ reverse orientation, and their distance is between MinDistance and \n\
\ MaxDistance. CAP3 works better if a lot more constraints are\
\ used.", prompt: Constraint file, format: '( "" , "ln -sf %s %s.con &&
" %( value , seq ) )[value is not None ]', argpos: -5, name: con_file,
command: false}
- !mobyle/inputprogramparagraph
comment: "<html:div xmlns:html=\"http://www.w3.org/1999/xhtml\"><html:p>CAP3\
\ computes clipping \n positions of each read using both base quality\
\ values and similarity \n information. Clipping of a poor end region\
\ of a read f is controlled by \n three parameters: quality value cutoff\
\ qualcut, clipping range crange, \n and depth of good coverage gdepth.\
\ The value for qualcut can specified \n with the \"-c\" option, the\
\ value for crange with the \"-y\" option, and the \n value for gdepth\
\ with the \"-z\" option.</html:p><html:p>If there are quality values,\
\ CAP3 computes two positions qualpos5 and \n qualpos3 of read f such\
\ that the region of read f from position qualpos5 \n to position qualpos3\
\ consists mostly of quality values greater than \n qualcut. If there\
\ are no quality values, then qualpos5 is set to 1 and \n qualpos3\
\ is set the length of read f. The range for the left clipping \n position\
\ of read f is from 1 to qualpos5 + crange. The range for the \n right\
\ clipping position of read f is from qualpos3 - crange to the end \n\
\ of read f. The minimum depth of good coverage at the left and right\
\ \n clipping positions of read f is expected to be gdepth.</html:p><html:p>Let\
\ realdepth5 be the maximum real depth of coverage for the initial \n\
\ region of read f ending at position qualpos5 + crange. Let depth5\
\ be the \n smaller of realdepth5 and gdepth. If depth5 is 0, then\
\ left clipping \n position of read f is set to qualpos5 by CAP3. The\
\ given value for the \n parameter crange may be too small for read\
\ f. CAP3 reports at the start \n of a .info file that \"No overlap\
\ is found in the given 5' clipping range \n for read f.\" If there\
\ are overlaps beyond the given 5' clipping range \n for read f, CAP3\
\ reports a new clipping range for each overlap. One of \n the reported\
\ range values can be used as a new value for the parameter \n crange\
\ for read f.</html:p><html:p>If depth5 is greater than 0, the left clipping\
\ position of read f is the \n smallest position x such that x is less\
\ than qualpos5 + crange and the \n region of read f beginning at position\
\ x is similar to depth5 other \n reads. The right clipping position\
\ of read f is computed similarly by \n CAP3. Larger values for the\
\ parameters crange and gdepth result in more \n aggressive clipping\
\ of poor end regions. A larger value for crange \n allows CAP3 to\
\ search for the left clipping position in a larger area. A \n larger\
\ value for gdepth may cause CAP3 to clip more bases so that the \n \
\ resulting good portion of read f is similar to more reads.</html:p><html:p>The\
\ user may provide specific values for the parameters crange and \n \
\ gdepth for individual reads in a file. Each line in the file has the\
\ \n following format: file has the following format:</html:p><html:pre><html:font\
\ face=\"Verdana\" style=\"font-size: 9pt;\">ReadName crange5 \
\ gdepth5 crange3 gdepth3</html:font></html:pre><html:p>where\
\ ReadName is the name \n of a read, crange5 & gdepth5 are values\
\ for the 5' end, and crange3 & \n gdepth3 are for the 3' end.</html:p></html:div>"
prompt: Clipping of poor regions
name: clipping_poor_regions
children:
- !mobyle/inputprogramparameter
comment: 'Default value: 12'
prompt: Base quality cutoff for clipping (-c)
format: ( "" , " -c "+ str( value ) )[ value is not None and value !=
vdef ]
argpos: 10
name: base_qual_cutoff_clipping
command: false
type: !mobyle/integertype {default: 12}
- !mobyle/inputprogramparameter
comment: 'Default value: 250'
prompt: Clipping range (-y)
format: ( "" , " -y "+ str( value ) )[ value is not None and value !=
vdef ]
argpos: 10
name: clipping_range
command: false
type: !mobyle/integertype {default: 250}
- !mobyle/inputprogramparameter
comment: 'Default value: 3'
prompt: Minimum number of good reads at clip pos (-z)
format: ( "" , " -z "+ str( value ) )[ value is not None and value !=
vdef ]
argpos: 10
name: good_reads
command: false
type: !mobyle/integertype {default: 3}
- !mobyle/inputprogramparameter {comment: 'The user may provide specific values
for the parameters crange and gdepth or individual reads in a file.Each
line in the file has the following format:ReadName crange5 gdepth5
crange3 gdepth3where ReadName is the name of a read, crange5 and gdepth5
are values for the 5'' end, and crange3 and gdepth3 are for the 3''
end.', prompt: File for clipping information (-w), format: '( "" ,
" -w "+ str( value ) )[ value is not None ]', argpos: 10, name: clipping_file,
command: false}
- !mobyle/inputprogramparagraph
comment: The program determines a minimum band of diagonals for an overlapping
alignment between two sequence reads.The band is expanded by a number
of bases specified by the user with option "-a".
prompt: Band of diagonals
name: band_diagonals
children:
- !mobyle/inputprogramparameter
comment: 'The program determines a minimum band of diagonals for an overlapping
alignment between two sequence reads.The band is expanded by a number
of bases specified by the user with option "-a".Default value: 20'
prompt: Band expansion size (-a)
format: ( "" , " -a "+ str( value ) )[ value is not None and value !=
vdef ]
argpos: 10
name: band_expansion
command: false
type: !mobyle/integertype {default: 20}
- !mobyle/inputprogramparagraph
comment: Overlaps between reads are evaluated by many measures.The first measure
is based on base quality.If an overlap contains lots of differences at
bases of high quality, then the overlap is removed.Specifically,let b
be the base quality cutoff value and let d be the maximum difference score.The
values for the two parameters can be set using the "-b" and "-d" options.If
the overlap contains a difference at bases of quality values q1 and q2,
then the score at the difference is max(0, min(q1, q2) - b).The difference
score of an overlap is the sum of scores at each difference.For example,
an overlap contains two differences,one at bases of quality values 15
and 30 and the other at bases of quality values 40 and 50.With b = 20,
the difference score of the overlap is 0 + 20 = 20.If the difference score
of an overlap exceeds d, then the overlap is removed. With b = 20,an overlap
with 15 differences at bases of quality values 40 or higher has a difference
score of at least 300 and is removed if d = 250.
prompt: Quality difference score of an overlap
name: overlap_score
children:
- !mobyle/inputprogramparameter
prompt: Base quality cutoff for differences (-b)
format: ( "" , " -b "+ str( value ) )[ value is not None and value !=
vdef ]
argpos: 10
name: base_qual_cutoff
command: false
type: !mobyle/integertype {default: 20}
- !mobyle/inputprogramparameter
prompt: Maximum qscore sum at differences (-d)
format: ( "" , " -d "+ str( value ) )[ value is not None and value !=
vdef ]
argpos: 10
name: max_qscore
command: false
type: !mobyle/integertype {default: 200}
- !mobyle/inputprogramparagraph
comment: The second measure looks at the number of differences in an overlap.If
the number of differences in an overlap is higher than expected, than
the overlap is removed.Let an integer e be the maximum number of extra
differences.Consider an overlap between reads f and g.Let d1 be the estimated
number of sequencing errors for the region of f involved in the overlap
and let r2 be that forthe region of g involved in the overlap.If the observed
number of differences in the overlap is greater than r1 + r2 + e, then
the overlap is removed.The value for the parameter e can be changed using
the "-e" option.The expected number of differences in the overlap is about
r1 + r2.Giving a smaller value to e causes more overlaps to be removed.
prompt: Number of differences in an overlap
name: nb_diff_overlap
children:
- !mobyle/inputprogramparameter
prompt: Clearance between number of diff (-e)
format: ( "" , " -e "+ str( value ) )[ value is not None and value !=
vdef ]
argpos: 10
name: clearance
command: false
type: !mobyle/integertype {default: 30}
- !mobyle/inputprogramparagraph
comment: The third measure is based on overlap similarity score.The similarity
score of an overlapping alignment is defined using base quality values.Let
m be the match score factor, let n be the mismatch score factor, and let
g be the gap penalty factor.Values for these parameters can be set with
the "-m", "-n", and "-g" options.A match at bases of quality values q1
and q2 is given a score of m * min(q1,q2).A mismatch at bases of quality
values q1 and q2 is given a score of n * min(q1,q2).A base of quality
value q1 in a gap is given a score of -g * min(q1,q2),where q2 is the
quality value of the base in the other sequence right before the gap.The
score of a gap is the sum of scores of each base in the gap minus a gap
open penalty.The similarity score of an overlapping alignment is the sum
of scores of each match, each mismatch, and each gap.With m = 2, an overlap
that consists of 25 matches at bases of quality value 10 has a score of
500.If the similarity score of an overlap is less than the overlap similarity
score cutoff s, then the overlap is removed.
prompt: Similarity score of an overlap
name: sim_score_overlap
children:
- !mobyle/inputprogramparameter
prompt: Match score factor (-m)
format: ( "" , " -m "+ str( value ) )[ value is not None and value !=
vdef ]
argpos: 10
name: match_score
command: false
type: !mobyle/integertype {default: 2}
- !mobyle/inputprogramparameter
prompt: Mismatch score factor (-n)
format: ( "" , " -n "+ str( value ) )[ value is not None and value !=
vdef ]
argpos: 10
name: mismatch_score
command: false
type: !mobyle/integertype {default: -5}
- !mobyle/inputprogramparameter
prompt: Gap penalty factor (-g)
format: ( "" , " -g "+ str( value ) )[ value is not None and value !=
vdef ]
argpos: 10
name: gap_penalty
command: false
type: !mobyle/integertype {default: 6}
- !mobyle/inputprogramparagraph
comment: The fourth requirement for an overlap is that the length in bp of
the overlap is no less than the value of the minimum overlap length cutoff
parameter.The value for this parameter can be changed with the "-o" option.The
fifth requirement for an overlap is that the percent identity of the overlap
is no less than the minimum percent identity cutoff.The value for this
parameter can be changed with the "-p" option.A value of 75 for p means
0.75 or 75%.
prompt: Length and percent identity of an overlap
name: percent_id_overlap
children:
- !mobyle/inputprogramparameter
prompt: Overlap length cutoff (-o)
format: ( "" , " -o "+ str( value ) )[ value is not None and value !=
vdef ]
argpos: 10
name: overlap_length
command: false
type: !mobyle/integertype {default: 40}
- !mobyle/inputprogramparameter
prompt: Overlap percent identity cutoff (-p)
format: ( "" , " -p "+ str( value ) )[ value is not None and value !=
vdef ]
argpos: 10
name: overlap_identity
command: false
type: !mobyle/integertype {default: 80}
- !mobyle/inputprogramparagraph
comment: The program provides a parameter (-f option) for the user to reject
overlaps with a long gap.Let an integer f be the maximum length of gaps
allowed in any overlap. Then any overlap with a gap longer than f is rejected
by the program.The value for this parameter can be changed using the "-f"
option.Note that a small value for this parameter may cause the program
to remove true overlaps and to produce incorrect results.The "-f" option
may be used by the user to split reads from alternative splicing forms
into separate contigs.Geo Pertea at TIGR suggested that this option be
added to the program.
prompt: Maximum length of gaps in an overlap
name: max_len_gaps_overlap
children:
- !mobyle/inputprogramparameter
prompt: Maximum gap length in any overlap (-f)
format: ( "" , " -f "+ str( value ) )[ value is not None and value !=
vdef ]
argpos: 10
name: max_gap_length
command: false
type: !mobyle/integertype {default: 20}
- !mobyle/inputprogramparagraph
comment: The total length of the different overhang regions in an overlap
is controlled with the -h option.TAn overhang region in an overlap is
a different terminal region before or after the overlap.TThe overhang
percent length of an overlap is 100 times the total length of the different
overhang regions in the overlap divided by the length of the overlap.TOverlaps
with an overhang percent length greater than the maximum cutoff are rejected.
prompt: Overhang percent length of an overlap
name: overhang_pcent_len_overlap
children:
- !mobyle/inputprogramparameter
prompt: Maximum overhang percent length (-h)
format: ( "" , " -h "+ str( value ) )[ value is not None and value !=
vdef ]
argpos: 10
name: max_overhang
command: false
type: !mobyle/integertype {default: 20}
- !mobyle/inputprogramparameter
prompt: Overlap similarity score cutoff (-s)
format: ( "" , " -s "+ str( value ) )[ value is not None and value != vdef
]
argpos: 10
name: overlap_similarity
command: false
type: !mobyle/integertype {default: 900}
- !mobyle/inputprogramparagraph
comment: The "-r" option is used to let CAP3 know whether to consider reads
in reverse orientation for assembly.The default value for the option is
1, meaning that reads in reverse orientation are also considered for assembly.Specifying
zero as "-r 0" instructs CAP3 to perform assembly of reads in forward
orientation only.This option was suggested by Patrick Schnable's lab.
prompt: Assembly of reads in forward orientation only
name: assembly_fwd
children:
- !mobyle/inputprogramparameter
prompt: Reverse orientation value (-r)
format: ( "" , " -r "+ str( value ) )[ value is not None and value !=
vdef ]
argpos: 10
name: reverse_orientation
command: false
type: !mobyle/integertype {default: 1}
- !mobyle/inputprogramparagraph
comment: This parameter (option -t) allows the user to trade off the efficiency
of the program for its accuracy.For a read f, the program computes overlaps
between read f and other reads by considering short word matches between
read f and other reads.A word match is examined to see if it can be extended
into a long overlap.If read f has overlaps with many other reads, then
read f has many short word matches with many other reads.This parameter
gives an upper limit, for any word, on the number of word matches between
read f and other reads that are considered by the program.Using a large
value for this parameter allows the program to consider more word matches
between read f and other reads, which can find more overlaps for read
f, but slows down the program.Using a small vlaue for this parameter has
the opposite effect.A large value may be used if the depth of coverage
is high for the data set.For example, a value of 150 is used for a data
set with a maximum depth of coverage of 30, and a value of 500 for a data
set with a maximum depth of coverage of 100. Using a very large value
may cause the program to run forever or run out of memory.
prompt: Maximum number of word matches
name: max_num_word_matches
children:
- !mobyle/inputprogramparameter
prompt: Maximum number of word matches (-t)
format: ( "" , " -t "+ str( value ) )[ value is not None and value !=
vdef ]
argpos: 10
name: word_matches
command: false
type: !mobyle/integertype {default: 300}
- !mobyle/inputprogramparagraph
comment: Corrections to an assembly are made using forward-reverse constraints.Let
an integer u be the minimum number of constraints for correction.Consider
an alternative overlap between two reads f and g.Assume that f is in contig
C1 and that g is in contig C2.If the number of unsatisfied constraints
that support the overlap between f and g is greater than the value of
the u parameter plus the number of satisfied constraints that support
the current joins involving f and g, then the current joins involving
f and g are disconnected and the overlap between f and g is implemented.The
value for this parameter can be changed with the "-u" option.Contigs that
are linked by forward-reverse constraints are reported.The minimum number
of constraints for reporting a link between two contigs is specified with
the "-v" option.
prompt: Forward-reverse constraints
name: fwd_rev_const
children:
- !mobyle/inputprogramparameter
prompt: Minimum number of constraints for correction (-u)
format: ( "" , " -u "+ str( value ) )[ value is not None and value !=
vdef ]
argpos: 10
name: min_constraints_corr
command: false
type: !mobyle/integertype {default: 3}
- !mobyle/inputprogramparameter
prompt: Minimum number of constraints for linking (-v)
format: ( "" , " -v "+ str( value ) )[ value is not None and value !=
vdef ]
argpos: 10
name: min_constraints_linking
command: false
type: !mobyle/integertype {default: 2}
outputs: !mobyle/outputparagraph
children:
- !mobyle/outputprogramparameter {prompt: Assembly in ace format, filenames: '"*.cap.ace"',
name: ace}
- !mobyle/outputprogramparameter
prompt: Contigs
filenames: '"*.cap.contigs"'
name: contig
type: !mobyle/formattedtype
format_terms: ['EDAM_format:2200']
data_terms: ['EDAM_data:2977']
- !mobyle/outputprogramparameter {prompt: Contig links, filenames: '"*.cap.contigs.links"',
name: contig_link}
- !mobyle/outputprogramparameter {prompt: Quality of contigs, filenames: '"*.cap.contigs.qual"',
name: contig_qual}
- !mobyle/outputprogramparameter {prompt: Assembly informations, filenames: '"*.cap.info"',
name: info}
- !mobyle/outputprogramparameter
prompt: Singlets
filenames: '"*.cap.singlets"'
name: singlet
type: !mobyle/formattedtype
data_terms: ['EDAM_data:2977']
- !mobyle/outputprogramparameter
prompt: Standard output
filenames: '"cap3.out"'
name: stdout
output_type: stdout
type: !mobyle/formattedtype
data_terms: ['EDAM_data:2048']
- !mobyle/outputprogramparameter
prompt: Standard error
filenames: '"cap3.err"'
name: stderr
type: !mobyle/formattedtype
data_terms: ['EDAM_data:2048']
operations: ['EDAM_operation:0310']
topics: ['EDAM_topic:0196']
references:
- {doi: null, label: 'Huang, X. and Madan, A. (1999) CAP3: A DNA Sequence Assembly
Program. Genome Research, 9: 868-877.', url: null}
homepage_links: ['http://seq.cs.iastate.edu/']
command: cap3
env: {}
source_links: ['http://seq.cs.iastate.edu/cap3.html']