This repository has been archived by the owner on May 28, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
dnadist.yaml
365 lines (364 loc) · 15.7 KB
/
dnadist.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
!mobyle/program
name: dnadist
title: dnadist
description: Compute distance matrix from nucleotide sequences
inputs: !mobyle/inputparagraph
children:
- !mobyle/inputprogramparameter
format: '"dnadist <dnadist.params"'
argpos: 0
name: dnadist
command: true
hidden: true
type: !mobyle/stringtype {}
- !mobyle/inputprogramparameter
comment: The input file must contained aligned sequences in PHYLIP format
obtained by sequence alignment programs.
prompt: Alignment File
format: '"ln -s " + str( infile ) + " infile && "'
simple: true
argpos: -10
mandatory: true
name: infile
command: false
type: !mobyle/formattedtype
format_terms: ['EDAM_format:1955']
data_terms: ['EDAM_data:1383']
ctrls:
- message: the name of this data can't be "infile" or "outfile"
test:
value:
'#nin': [infile, outfile]
- !mobyle/inputprogramparagraph
prompt: Dnadist options
name: dnadist_opt
children:
- !mobyle/inputprogramparameter
prompt: Distance (D)
argpos: 1
paramfile: dnadist.params
name: distance
command: false
type: !mobyle/stringtype {default: F84}
- !mobyle/inputprogramparameter
comment: "The T option in this program does not stand for Threshold, but\
\ instead is the Transition/transversion option.\n The\
\ user is prompted for a real number greater than 0.0, as the expected\
\ ratio of transitions to transversions. \n Note that\
\ this is not the ratio of the first to the second kinds of events,\
\ but the resulting expected ratio of transitions to transversions.\
\ \n The exact relationship between these two quantities\
\ depends on the frequencies in the base pools. \n The\
\ default value of the T parameter if you do not use the T option\
\ is 2.0."
prompt: Transition/transversion ratio (T)
format: ( "" , "T\n"+ str( value )+"\n" )[value is not None and value
!= vdef ]
argpos: 1
paramfile: dnadist.params
name: ratio
precond:
'#or':
- {distance: F84}
- {distance: K}
command: false
type: !mobyle/floattype {default: 2.0}
ctrls:
- message: Transition/transversion ratio (T) must be a real number greater
than 0.0
test:
value: {'#gte': '0.0'}
- !mobyle/inputprogramparameter
prompt: Gamma distributed rates across sites (G)
argpos: 5
paramfile: dnadist.params
name: gamma
precond:
'#or':
- {distance: F84}
- {distance: K}
- {distance: JC}
command: false
type: !mobyle/stringtype {default: 'No'}
- !mobyle/inputprogramparameter
comment: In gamma distribution parameters, this is 1/(square root of alpha)
prompt: Coefficient of variation of substitution rate among sites (must
be positive) (if Gamma)
format: ( "" , str( value ) + "\n" )[ value is not None ]
argpos: 1010
paramfile: dnadist.params
mandatory: true
name: variation_coeff
precond:
gamma: {'#ne': 'No'}
command: false
type: !mobyle/floattype {}
- !mobyle/inputprogramparameter
prompt: Fraction of invariant sites (if Gamma)
format: ( "" , str( value ) + "\n" )[ value is not None ]
argpos: 1011
paramfile: dnadist.params
mandatory: true
name: invariant_sites
precond: {gamma: GI}
command: false
type: !mobyle/floattype {}
- !mobyle/inputprogramparagraph
comment: These must add to 1
prompt: Base frequencies for A, C, G, T/U (if not empirical)
name: ACGT_frequencies
precond: {distance: F84}
children:
- !mobyle/inputprogramparameter
prompt: Use empirical base frequencies (F)
argpos: 1
paramfile: dnadist.params
name: empirical_frequencies
command: false
type: !mobyle/booleantype {default: false}
- !mobyle/inputprogramparameter
prompt: Base frequencies for A
format: '""'
argpos: 2
mandatory: true
name: A_frequency
precond: {'#not': empirical_frequencies}
command: false
type: !mobyle/floattype {default: 0.25}
- !mobyle/inputprogramparameter
prompt: Base frequencies for C
format: '""'
argpos: 2
mandatory: true
name: C_frequency
precond: {'#not': empirical_frequencies}
command: false
type: !mobyle/floattype {default: 0.25}
- !mobyle/inputprogramparameter
prompt: Base frequencies for G
format: '""'
argpos: 2
mandatory: true
name: G_frequency
precond: {'#not': empirical_frequencies}
command: false
type: !mobyle/floattype {default: 0.25}
- !mobyle/inputprogramparameter
prompt: Base frequencies for T/U
format: '""'
argpos: 2
mandatory: true
name: T_frequency
precond: {'#not': empirical_frequencies}
command: false
type: !mobyle/floattype {default: 0.25}
- !mobyle/inputprogramparameter
format: '"F\n" + str( A_frequency ) + " " + str( C_frequency ) + "
" + str( G_frequency ) + " " + str( T_frequency ) + "\n"'
argpos: 2
paramfile: dnadist.params
name: base_frequencies
precond: {'#not': empirical_frequencies}
command: false
hidden: true
type: !mobyle/floattype {}
- !mobyle/inputprogramparagraph
prompt: Weight options
name: weight_opt
children:
- !mobyle/inputprogramparameter
prompt: Use weights for sites (W)
format: ( "" , "W\n" )[ value ]
argpos: 1
paramfile: dnadist.params
name: weights
command: false
type: !mobyle/booleantype {default: false}
- !mobyle/inputprogramparameter
comment: "It selects a set of sites to be analyzed, ignoring the others.\
\ \n The sites selected are those with weight 1.\n \
\ The weights in it are a simple string of digits. \n \
\ Blanks in the weightfile are skipped over and ignored, and\
\ the weights can continue to a new line."
prompt: Weights file
format: ( "" , " ln -s " + str( weights_file ) + " weights && " )[ value
is not None]
argpos: -1
mandatory: true
name: weights_file
precond: weights
command: false
ctrls:
- message: the name of this data can't be "infile" or "outfile"
test:
value:
'#nin': [infile, outfile]
- !mobyle/inputprogramparagraph
prompt: Bootstrap options
name: bootstrap
children:
- !mobyle/inputprogramparameter
comment: By selecting this option, the bootstrap will be performed on
your sequence file. So you don't need to perform a separated seqboot
before.Don't give an already bootstrapped file to the program, this
won't work!
prompt: Perform a bootstrap before analysis
format: ( "" , "seqboot < seqboot.params && mv outfile seqboot.outfile
&& rm infile && ln -s seqboot.outfile infile && " )[ value ]
simple: true
argpos: -5
name: seqboot
command: false
type: !mobyle/booleantype {default: false}
- !mobyle/inputprogramparameter
comment: "1. The bootstrap. Bootstrapping was invented by Bradley Efron\
\ in 1979, and its use in phylogeny estimation was introduced by me\
\ (Felsenstein, 1985b). It involves creating a new data set by sampling\
\ N characters randomly with replacement, so that the resulting data\
\ set has the same size as the original, but some characters have\
\ been left out and others are duplicated. The random variation of\
\ the results from analyzing these bootstrapped data sets can be shown\
\ statistically to be\n typical of the variation that\
\ you would get from collecting new data sets. The method assumes\
\ that the characters evolve independently, an assumption that may\
\ not be realistic for many kinds of data.2. Delete-half-jackknifing.\
\ This alternative to the bootstrap involves sampling a random half\
\ of the characters, and including them in the data but dropping the\
\ others. The resulting data sets are half the size of the original,\
\ and no characters are duplicated. The random variation from doing\
\ this should be very similar to that obtained from the bootstrap.\
\ The method is advocated by Wu (1986).3. Permuting species for each\
\ characters. This method of resampling (well, OK, it may not be best\
\ to call it resampling) was introduced by Archie (1989) and Faith\
\ (1990; see also Faith and Cranston, 1991). It involves permuting\
\ the columns of the data matrix separately. This produces data matrices\
\ that have the same number and kinds of characters but no taxonomic\
\ structure. It is used for different purposes than the bootstrap,\
\ as it tests not the variation around an estimated tree but the hypothesis\
\ that there is no taxonomic structure in the data: if a statistic\
\ such as number of steps is significantly smaller in the actual data\
\ than it is in replicates that are permuted, then we can argue that\
\ there is some taxonomic structure in the data (though perhaps it\
\ might be just the presence of aa pair of sibling species).4. Permuting\
\ characters order. This simply permutes the order of the characters,\
\ the same reordering being applied to all species. For many methods\
\ of tree inference this will make no difference to the outcome (unless\
\ one has rates of evolution correlated among adjacent sites). It\
\ is included as a possible step in carrying out a permutation test\
\ of homogeneity of characters (such as the Incongruence Length Difference\
\ test).5. Permuting characters separately for each species. This\
\ is a method introduced by Steel, Lockhart, and Penny (1993) to permute\
\ data so as to destroy all phylogenetic structure, while keeping\
\ the base composition of each species the same as before. It shuffles\
\ the character order separately for each species."
prompt: Resampling methods (J)
simple: true
argpos: 1
paramfile: seqboot.params
mandatory: true
name: Method
precond: seqboot
command: false
type: !mobyle/stringtype {default: bootstrap}
- !mobyle/inputprogramparameter
prompt: Random number seed (must be odd)
format: str( value ) +"\n"
simple: true
argpos: 1000
paramfile: seqboot.params
mandatory: true
name: seqboot_seed
precond: seqboot
command: false
type: !mobyle/integertype {}
- !mobyle/inputprogramparameter
prompt: How many replicates
format: ( "" , "R\n" + str( value ) + "\n" )[ value is not None and value
!= vdef ]
simple: true
argpos: 1
paramfile: seqboot.params
mandatory: true
name: replicates
precond: seqboot
command: false
type: !mobyle/integertype {default: 100}
- !mobyle/inputprogramparameter
format: '"y\n"'
argpos: 1000
paramfile: dnadist.params
name: confirm
command: false
hidden: true
type: !mobyle/stringtype {}
- !mobyle/inputprogramparameter
format: '"0\n"'
argpos: -1
paramfile: dnadist.params
name: terminal_type
command: false
hidden: true
type: !mobyle/stringtype {}
- !mobyle/inputprogramparameter
format: '"M\nD\n" + str( replicates ) + str("\n")'
argpos: 1
paramfile: dnadist.params
name: multiple_dataset
precond:
'#and':
- seqboot
- replicates: {'#gt': '1'}
command: false
hidden: true
type: !mobyle/stringtype {}
- !mobyle/inputprogramparameter
format: '"y\n"'
argpos: 100
paramfile: seqboot.params
name: seqboot_confirm
precond: seqboot
command: false
hidden: true
type: !mobyle/stringtype {}
- !mobyle/inputprogramparameter
format: '"0\n"'
argpos: -1
paramfile: seqboot.params
name: seqboot_terminal_type
precond: seqboot
command: false
hidden: true
type: !mobyle/stringtype {}
outputs: !mobyle/outputparagraph
children:
- !mobyle/outputprogramparameter
prompt: Outfile
filenames: '"dnadist.outfile"'
argpos: 40
name: outfile
type: !mobyle/formattedtype
data_terms: ['EDAM_data:0870']
- !mobyle/outputprogramparameter {prompt: seqboot outfile, filenames: '"seqboot.outfile"',
argpos: 40, name: seqboot_out, precond: seqboot}
- !mobyle/outputprogramparameter
prompt: Standard output
filenames: '"dnadist.out"'
name: stdout
output_type: stdout
type: !mobyle/formattedtype
data_terms: ['EDAM_data:2048']
- !mobyle/outputprogramparameter
prompt: Standard error
filenames: '"dnadist.err"'
name: stderr
type: !mobyle/formattedtype
data_terms: ['EDAM_data:2048']
comment: 'This program uses nucleotide sequences to compute a distance matrix, under
four different models of nucleotide substitution. It can also
compute a table of similarity between the nucleotide sequences. The distance for
each pair of species estimates the total branch length between the two species,
and can be used in the distance matrix programs FITCH, KITSCH or NEIGHBOR.'
operations: ['EDAM_operation:0289']
topics: ['EDAM_topic:0084']
documentation_links: ['http://bioweb2.pasteur.fr/docs/phylip/doc/dnadist.html']
env: {}