This repository has been archived by the owner on May 28, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
BMGE_Version 1.0.yaml
397 lines (396 loc) · 19.6 KB
/
BMGE_Version 1.0.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
!mobyle/program
name: BMGE
version: Version 1.0
title: BMGE
description: Block Mapping and Gathering using Entropy
authors: Alexis Criscuolo and Simonetta Gribaldo
inputs: !mobyle/inputparagraph
children:
- !mobyle/inputprogramparagraph
prompt: Input
name: input
argpos: 2
children:
- !mobyle/inputprogramparameter
comment: "BMGE uses FASTA or PHYLIP sequential format for input. These\
\ are\n plain text files. There is no limit on\
\ the length of the\n alignment. There is also\
\ no limit on the length of the label of\n a sequence\
\ (i.e. its FASTA annotation line), although a too long\n \
\ label (e.g. more than 100 letters) will be truncated\
\ if the\n output format is PHYLIP sequential."
prompt: Alignment (-i)
format: '" -i "+str(value)'
simple: true
argpos: 1
mandatory: true
name: infile
command: false
type: !mobyle/formattedtype
format_terms: ['EDAM_format:2200', 'EDAM_format:1956']
data_terms: ['EDAM_data:0863']
- !mobyle/inputprogramparameter
comment: "Both standard single-letter amino acid and nucleotide alphabets\n\
\ are used by BMGE. When using amino acid\
\ sequences, degenerated\n character states\
\ B and Z are understood by BMGE; similarly,\n \
\ degenerated nucleotide characters are also understood. The\n\
\ character state X is understood to be any\
\ of the 4 or 20\n character states when\
\ using as input nucleotide or amino acid\n \
\ sequences, respectively. Dashes (i.e. '-') are understood as\n\
\ gaps, whereas dots (i.e. '.'), as any other\
\ single letter that\n are not inside standard\
\ alphabets, are considered as unknown\n \
\ character state (i.e. '?'). Nucleotide sequences can be set as\n\
\ codon ones. In this case, each successive\
\ nucleotide character\n triplet is considered\
\ as one codon character."
prompt: Type of sequence (-t)
format: ("", " -t "+str(value))[value is not None and value!=vdef]
simple: true
argpos: 2
mandatory: true
name: input_type
command: false
type: !mobyle/stringtype
default: 'null'
options:
- {label: Set the input sequence coding, value: 'null'}
- {label: Amino acid, value: AA}
- {label: Nucleotide, value: DNA}
- {label: Codon, value: CODON}
- !mobyle/inputprogramparagraph
prompt: Control options
name: options
children:
- !mobyle/inputprogramparameter
comment: "For each character, BMGE computes a score mainly determined\
\ by the entropy induced by\n the respective proportion\
\ of each residue. To estimate realistic\n scores that\
\ take into account biologically relevant substitution processes,\
\ BMGE weights the\n entropy estimation with substitution\
\ matrices.These option can be used with the 15 estimated BLOSUM matrices.\
\ BMGE uses by \n default the popular BLOSUM62 matrix.\
\ The character\n trimming is progressively more stringent\
\ as the BLOSUM index\n increases (e.g. BLOSUM95); reciprocally,\
\ the trimming is\n progressively more relaxed as the\
\ BLOSUM index is lower (e.g.\n BLOSUM30). In practice,\
\ it is recommended to use BLOSUM95 with\n closely related\
\ sequences, and BLOSUM30 with distantly related\n sequences.If\
\ input sequences are set as codons, BMGE performs a conversion\n\
\ into amino acid sequences (following the universal\
\ genetic code)\n and uses BLOSUM matrices to estimate\
\ the entropy-like score for\n each codon character.\
\ So, with option -t set as CODON, one can\n modify the\
\ option -m only with BLOSUM matrices.\n It is also possible\
\ to use the identity matrix with any sequence\n types."
prompt: Similarity Matrices for amino acid and codon sequences (-m)
format: ("", " -m "+str(value))[value is not None and value!=vdef]
argpos: 3
name: matrixaa
precond:
input_type:
'#in': [AA, CODON]
command: false
type: !mobyle/stringtype
default: BLOSUM62
options:
- {label: BLOSUM62, value: BLOSUM62}
- {label: Identity, value: ID}
- {label: BLOSUM30, value: BLOSUM30}
- {label: BLOSUM35, value: BLOSUM35}
- {label: BLOSUM40, value: BLOSUM40}
- {label: BLOSUM45, value: BLOSUM45}
- {label: BLOSUM50, value: BLOSUM50}
- {label: BLOSUM55, value: BLOSUM55}
- {label: BLOSUM60, value: BLOSUM60}
- {label: BLOSUM65, value: BLOSUM65}
- {label: BLOSUM70, value: BLOSUM70}
- {label: BLOSUM75, value: BLOSUM75}
- {label: BLOSUM80, value: BLOSUM80}
- {label: BLOSUM85, value: BLOSUM85}
- {label: BLOSUM90, value: BLOSUM90}
- {label: BLOSUM95, value: BLOSUM95}
- !mobyle/inputprogramparameter
comment: "For nucleotide input sequences, BMGE uses PAM matrices with\
\ a\n fixed transition/transition ratio. BMGE can be\
\ used with all\n possible PAM matrices, from the most\
\ stringent (i.e. DNAPAM1) to\n highly relaxed ones (e.g.\
\ DNAPAM500). By default with nucleotide\n sequences,\
\ BMGE uses the PAM-100 matrix.It is also possible to use the identity\
\ matrix."
prompt: Similarity Matrices for nucleotide sequences (-m)
format: ("", " -m "+str(value))[value is not None and value!=vdef]
argpos: 3
name: matrixan
precond:
input_type:
'#in': [DNA]
command: false
type: !mobyle/stringtype
default: DNAPAM100
options:
- {label: DNAPAM100, value: DNAPAM100}
- {label: Identity, value: ID}
- {label: DNAPAM1, value: DNAPAM1}
- {label: DNAPAM5, value: DNAPAM5}
- {label: DNAPAM10, value: DNAPAM10}
- {label: DNAPAM20, value: DNAPAM20}
- {label: DNAPAM30, value: DNAPAM30}
- {label: DNAPAM40, value: DNAPAM40}
- {label: DNAPAM50, value: DNAPAM50}
- {label: DNAPAM60, value: DNAPAM60}
- {label: DNAPAM70, value: DNAPAM70}
- {label: DNAPAM80, value: DNAPAM80}
- {label: DNAPAM90, value: DNAPAM90}
- {label: DNAPAM110, value: DNAPAM110}
- {label: DNAPAM120, value: DNAPAM120}
- {label: DNAPAM130, value: DNAPAM130}
- {label: DNAPAM140, value: DNAPAM140}
- {label: DNAPAM150, value: DNAPAM150}
- {label: DNAPAM160, value: DNAPAM160}
- {label: DNAPAM170, value: DNAPAM170}
- {label: DNAPAM180, value: DNAPAM180}
- {label: DNAPAM190, value: DNAPAM190}
- {label: DNAPAM200, value: DNAPAM200}
- {label: DNAPAM210, value: DNAPAM210}
- {label: DNAPAM220, value: DNAPAM220}
- {label: DNAPAM230, value: DNAPAM230}
- {label: DNAPAM240, value: DNAPAM240}
- {label: DNAPAM250, value: DNAPAM250}
- {label: DNAPAM260, value: DNAPAM260}
- {label: DNAPAM270, value: DNAPAM270}
- {label: DNAPAM280, value: DNAPAM280}
- {label: DNAPAM290, value: DNAPAM290}
- {label: DNAPAM300, value: DNAPAM300}
- {label: DNAPAM310, value: DNAPAM310}
- {label: DNAPAM320, value: DNAPAM320}
- {label: DNAPAM330, value: DNAPAM330}
- {label: DNAPAM340, value: DNAPAM340}
- {label: DNAPAM350, value: DNAPAM350}
- {label: DNAPAM360, value: DNAPAM360}
- {label: DNAPAM370, value: DNAPAM370}
- {label: DNAPAM380, value: DNAPAM380}
- {label: DNAPAM390, value: DNAPAM390}
- {label: DNAPAM400, value: DNAPAM400}
- {label: DNAPAM410, value: DNAPAM410}
- {label: DNAPAM420, value: DNAPAM420}
- {label: DNAPAM430, value: DNAPAM430}
- {label: DNAPAM440, value: DNAPAM440}
- {label: DNAPAM450, value: DNAPAM450}
- {label: DNAPAM460, value: DNAPAM460}
- {label: DNAPAM470, value: DNAPAM470}
- {label: DNAPAM480, value: DNAPAM480}
- {label: DNAPAM490, value: DNAPAM490}
- {label: DNAPAM500, value: DNAPAM500}
- !mobyle/inputprogramparameter
comment: "It is possible to indicate a transition/transversion ratio to\n\
\ better define the PAM matrices with nucleotide sequences.\
\ By\n default, BMGE uses a transition/transversion ratio\
\ of 2."
prompt: Transition/transversion ratio for nucleotide sequences.
format: ("", ":" +str(value) + " ")[value is not None]
argpos: 4
name: transition
precond:
'#and':
- input_type:
'#in': [DNA]
- '#and':
- matrixan: {'#ne': DNAPAM100}
- matrixan: {'#ne': ID}
command: false
type: !mobyle/floattype {default: 2.0}
- !mobyle/inputprogramparameter
comment: "BMGE allows characters containing too many gaps to be removed\n\
\ with this option. By default, BMGE removes all characters\
\ with a\n gap frequency greater than 0.2."
prompt: Gap Rate Cut-off (-g)
format: ("", " -g "+str(value))[value is not None and value!=vdef]
argpos: 5
name: gap_rate_cutoff
command: false
type: !mobyle/floattype {default: 0.2}
- !mobyle/inputprogramparameter
comment: "Following the smoothing operation of the entropy-like score\n\
\ values across characters, BMGE selects characters associated\
\ with\n a score value greater than a fixed threshold.\
\ This cut-off is set\n to 0.0 by default."
prompt: Minimum entropy Score Cut-off (-h)
format: ("", " -h %s:%s " % ( value, max_entropy) )[ (max_entropy != 0.5
or value !=vdef) and (max_entropy > value) ]
argpos: 6
name: min_entropy
command: false
type: !mobyle/floattype {default: 0.0}
- !mobyle/inputprogramparameter
comment: "Following the smoothing operation of the entropy-like score\n\
\ values across characters, BMGE selects characters associated\
\ with\n a score value below a fixed threshold. This\
\ cut-off is set to 0.5\n by default."
prompt: Maximum entropy Score Cut-off (-h)
argpos: 6
name: max_entropy
precond:
min_entropy: {'#ne': None}
command: false
type: !mobyle/floattype {default: 0.5}
- !mobyle/inputprogramparameter
comment: "BMGE only selects regions of size greater than or equal to 5.\
\ Use\n this option to modify this minimum block size\
\ parameter."
prompt: Minimum Block Size (-b)
format: ("", " -b "+str(value) )[value is not None and value!=vdef]
argpos: 7
name: minimun_block_size
command: false
type: !mobyle/integertype {default: 5}
- !mobyle/inputprogramparagraph
prompt: Output format options
name: output_option
children:
- !mobyle/inputprogramparameter
prompt: Output in phylip sequential format (-op)
format: ("", " -op "+ infile.split('.')[0] + ".phy ")[ value ]
argpos: 9
name: phylip
precond:
infile: {'#ne': None}
command: false
type: !mobyle/booleantype {default: false}
- !mobyle/inputprogramparameter
comment: "If input sequences are in FASTA format with NCBI-formatted annotation\
\ lines, e.g.\n >field1|field2|field3|field4|\
\ field5 [field6]\n the options -oppp allow naming sequences\
\ by field6_____field4 ; knowing that field4 is generally\n \
\ an accession number, and field6 a\n taxon name,\
\ this option leads to PHYLIP files where each sequence is labelled\
\ as a\n taxon name and an accession number."
prompt: Output in phylip sequential format. Special formating (-oppp)
format: ("", " -oppp "+ infile.split('.')[0] + ".phyp ")[ value]
argpos: 9
name: phylip_oppp
precond:
infile: {'#ne': None}
command: false
type: !mobyle/booleantype {default: false}
- !mobyle/inputprogramparameter
prompt: Output in nexus format (-on)
format: ("", " -on "+ infile.split('.')[0] + ".nex ")[ value ]
argpos: 9
name: nexus
precond:
infile: {'#ne': None}
command: false
type: !mobyle/booleantype {default: false}
- !mobyle/inputprogramparameter
comment: "If input sequences are in FASTA format with NCBI-formatted\n\
\ annotation lines,\n e.g. >field1|field2|field3|field4|\
\ field5 [field6]\n the option -onnn allow naming sequences\
\ by field6_____field4 ;\n knowing that field4 is generally\
\ an accession number and field6 a\n taxon name, this\
\ option leads to NEXUS files where each sequence\n is\
\ labelled as a taxon name and an accession number."
prompt: Output in nexus format. Special formating (-onnn)
format: ("", " -onnn "+ infile.split('.')[0] + ".nexn ")[ value ]
argpos: 9
name: nexus_onnn
precond:
infile: {'#ne': None}
command: false
type: !mobyle/booleantype {default: false}
- !mobyle/inputprogramparameter
prompt: Output in fasta format (-of)
format: ("", " -of "+ infile.split('.')[0] + ".fa ")[ value ]
argpos: 10
name: fasta
precond:
infile: {'#ne': None}
command: false
type: !mobyle/booleantype {default: false}
- !mobyle/inputprogramparameter
prompt: Output in html format (-oh)
format: ("", " -oh "+ infile.split('.')[0] + ".html ")[ value ]
argpos: 11
name: html
precond:
infile: {'#ne': None}
command: false
type: !mobyle/booleantype {default: false}
outputs: !mobyle/outputparagraph
children:
- !mobyle/outputprogramparagraph
prompt: Output format options
name: output_option
children:
- !mobyle/outputprogramparameter
prompt: Output in phylip sequential format
filenames: infile.split('.')[0] + ".phy"
name: phylipout
precond: phylip
type: !mobyle/formattedtype
format_terms: ['EDAM_format:1956']
data_terms: ['EDAM_data:0863']
- !mobyle/outputprogramparameter
prompt: Output in phylip sequential format
filenames: infile.split('.')[0] + ".phyp"
name: phylipout_oppp
precond: phylip_oppp
type: !mobyle/formattedtype
format_terms: ['EDAM_format:1956']
data_terms: ['EDAM_data:0863']
- !mobyle/outputprogramparameter
prompt: Output in nexus format
filenames: infile.split('.')[0] + ".nex"
name: nexusout
precond: nexus
type: !mobyle/formattedtype
format_terms: ['EDAM_format:1912']
data_terms: ['EDAM_data:0863']
- !mobyle/outputprogramparameter
prompt: Output in nexus format
filenames: infile.split('.')[0] + ".nexn"
name: nexusout_onnn
precond: nexus_onnn
type: !mobyle/formattedtype
format_terms: ['EDAM_format:1912']
data_terms: ['EDAM_data:0863']
- !mobyle/outputprogramparameter
prompt: Output in fasta format
filenames: infile.split('.')[0] + ".fa"
name: fastaout
precond: fasta
type: !mobyle/formattedtype
format_terms: ['EDAM_format:2200']
data_terms: ['EDAM_data:0863']
- !mobyle/outputprogramparameter
prompt: Output in html format
filenames: infile.split('.')[0] + ".html"
name: htmlout
precond: html
type: !mobyle/formattedtype
data_terms: ['EDAM_data:2048']
- !mobyle/outputprogramparameter
prompt: Standard output
filenames: '"BMGE.out"'
name: stdout
output_type: stdout
type: !mobyle/formattedtype
data_terms: ['EDAM_data:2048']
- !mobyle/outputprogramparameter
prompt: Standard error
filenames: '"BMGE.err"'
name: stderr
type: !mobyle/formattedtype
data_terms: ['EDAM_data:2048']
operations: ['EDAM_operation:2946']
topics: ['EDAM_topic:0084', 'EDAM_topic:0182']
references:
- {doi: null, label: 'Criscuolo A, Gribaldo S (2010) BMGE (Block Mapping and Gathering
with Entropy): selection of phylogenetic informative regions from multiple
sequence alignments. BMC Evolutionary Biology 10:210.', url: null}
documentation_links: ['http://bioweb2.pasteur.fr/docs/BMGE/BMGE_doc.pdf']
command: BMGE
env: {}
source_links: ['ftp://ftp.pasteur.fr/pub/gensoft/projects/BMGE/']