This repository has been archived by the owner on May 28, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
consensus_6d.yaml
274 lines (274 loc) · 13.3 KB
/
consensus_6d.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
!mobyle/program
name: consensus
version: 6d
title: CONSENSUS
description: Identification of consensus patterns in unaligned DNA and protein sequences
authors: Gerald Z.Hertz, G.D. Stormo
inputs: !mobyle/inputparagraph
children:
- !mobyle/inputprogramparameter
prompt: Program to run
format: '"fasta-consensus <" + str(sequence) + " >" + str(sequence) + ".wcons ;
" + str(prog)'
argpos: 0
mandatory: true
name: prog
command: true
type: !mobyle/stringtype
default: consensus
options:
- {label: Search for fixed width patterns (consensus), value: consensus}
- {label: 'Same as consensus, width not fixed (wconsensus)', value: wconsensus}
- !mobyle/inputprogramparameter
prompt: Sequences file (-f)
format: '" -f " + str(sequence) + ".wcons"'
simple: true
argpos: 1
mandatory: true
name: sequence
command: false
type: !mobyle/formattedtype
format_terms: ['EDAM_format:2200']
data_terms: ['EDAM_data:2044']
- !mobyle/inputprogramparagraph
prompt: Required parameter
name: required
argpos: 2
children:
- !mobyle/inputprogramparameter
prompt: Width of pattern for consensus program (-L)
format: ( "" , " -L" + str(value) )[ value is not None ]
simple: true
mandatory: true
name: width
precond: {prog: consensus}
command: false
type: !mobyle/integertype {}
- !mobyle/inputprogramparameter
comment: "A range of values should be tried. For example,\n try\
\ values of 0.5, 1, 1.5, and 2. The overall best alignment is\n \
\ the one having the smallest e-value."
prompt: Number of standard deviations to lower the information content
at each position before identifying information peaks for wconsensus
program (-s)
format: '" -s" + str(value)'
simple: true
mandatory: true
name: standard_deviation
precond: {prog: wconsensus}
command: false
type: !mobyle/floattype {default: 1.0}
- !mobyle/inputprogramparagraph
prompt: Basic options
name: basic_options
argpos: 2
children:
- !mobyle/inputprogramparagraph
prompt: Alphabet options
name: alphabet_options
children:
- !mobyle/inputprogramparameter
prompt: Choose an alphabet
format: ("", " -a /path/to/consensus/alphabet/dir/"+ value)[ value
in( "dna-alphabet" , "prot-alphabet" )]
simple: true
mandatory: true
name: alphabet
command: false
type: !mobyle/stringtype
default: 'null'
options:
- {label: Choose an alphabet, value: 'null'}
- {label: DNA, value: dna-alphabet}
- {label: Protein, value: prot-alphabet}
- {label: Users file, value: users}
- !mobyle/inputprogramparameter
comment: 'Each line contains a letter (a symbol in the alphabet) followed
by an optional normalization number (default: 1.0). The normalization
is based on the relative prior probabilities of the letters. For
nucleic acids, this might be the genomic frequency of the bases;
however, if the -d option is not used, the frequencies observed
in your own sequence data are used. In nucleic acid alphabets,
a letter and its complement appear on the same line, separated
by a colon (a letter can be its own complement, e.g. when using
a dimer alphabet).Complementary letters may use the same normalization
number. Only the standard 26 letters are permissible; however,
when the -CS option is used, the alphabet is case sensitive so
that a total of 52 different characters are possible.POSSIBLE
LINE FORMATS WITHOUT COMPLEMENTARY LETTERS:letterletter normalizationPOSSIBLE
LINE FORMATS WITH COMPLEMENTARY LETTERS:letter:complementletter:complement
normalizationletter:complement normalization:complement''s_normalization'
prompt: User Alphabet file (-a)
format: ( "" , " -a " + str(value) )[ value is not None ]
name: ascii_alphabet
precond: {alphabet: users}
command: false
- !mobyle/inputprogramparameter
comment: By default, the program uses the frequencies observed in
your own sequence data for the prior probabilities of the letters.
However, if the -d option is set, the prior probabilities designated
by the alphabet options. If the -d option is not set, they are
still used to determine the sequence alphabet, but any prior probability
information is ignored.
prompt: Use the designated prior probabilities of the letters to override
the observed frequencies (-d)
format: ( "" , " -d" )[ value ]
name: prior
command: false
type: !mobyle/booleantype {default: false}
- !mobyle/inputprogramparameter
prompt: Complement of nucleic acid sequences (-c)
format: ( "" , " -c" + str(value) )[ value is not None and value != vdef]
name: complement
command: false
type: !mobyle/stringtype
default: '0'
options:
- {label: Ignore the complementary strand (0), value: '0'}
- {label: Include both strands as separate sequences (1), value: '1'}
- {label: Include both strands as a single sequence (2), value: '2'}
- {label: Assume that the pattern is symmetrical (consensus program
only) (3), value: '3'}
- !mobyle/inputprogramparameter
comment: "-n integer: repeat the matrix building cycle a maximum of \"\
integer\"\n\t times and allow each sequence to contribute zero\
\ or more words\n\t per matrix.-N integer: repeat the matrix\
\ building cycle a maximum of \"integer\"\n\t times and allow\
\ each sequence to contribute one or more words\n\t per matrix"
prompt: How many words per matrix for each sequence to contribute
format: ( "" , " " + str(value) + str(max_cycle_nb) )[ value != vdef ]
name: max_cycle
command: false
type: !mobyle/stringtype
default: 'null'
options:
- {label: No repeat, value: 'null'}
- {label: Allow each sequence to contribute zero or more words per
matrix (-n), value: -n}
- {label: Allow each sequence to contribute one or more words per
matrix (-N), value: -N}
- !mobyle/inputprogramparameter
prompt: Maximum repeat of the matrix building cycle for -n or -N
format: '""'
name: max_cycle_nb
command: false
type: !mobyle/integertype {}
- !mobyle/inputprogramparagraph
prompt: Advanced options
name: advenced_options
argpos: 2
children:
- !mobyle/inputprogramparameter
prompt: Maximum number of matrices to save between cycles of the program
(-q)
format: ( "" , " -q " + str(value) )[ value is not None ]
name: queue
command: false
type: !mobyle/integertype {default: 200}
- !mobyle/inputprogramparameter
comment: The minimum distance between the starting points of words within
the same matrix pattern; must be a positive integer; can only be used
when the "-n" or "-N" option is also used.For wconsensus, the default
value is 1.For consensus, this number is indicated by the width (-L).
prompt: Minimum distance between the starting points of words within the
same matrix pattern (-m)
format: ( "" , " -m" + str(value) )[ value is not None and value != vdef]
name: distance
precond:
max_cycle: {'#ne': 'null'}
command: false
type: !mobyle/integertype {}
- !mobyle/inputprogramparameter
comment: 'default: terminate only when the maximum number of matrix building
cycles is completed.'
prompt: Terminate the program this number of cycles after the current
most significant alignment is identified (-t)
format: ( "" , " -t" + str(value) )[ value is not None ]
name: terminate
command: false
type: !mobyle/integertype {}
- !mobyle/inputprogramparameter
comment: -pr2 option prevents a strong pattern found in only a subset
of the sequences from overwhelming the algorithm and eliminating other
potential patterns. This undesirable situation can occur when a subset
of the sequences share an evolutionary relationship not common to
the majority of the sequences.
prompt: Save the top progeny matrices
format: ( "" , " " + str(value) )[ value is not None and value != vdef]
name: progeny
command: false
type: !mobyle/stringtype
default: -pr2
options:
- {label: Regardless of parentage (-pr1), value: -pr1}
- {label: For each parental matrix (-pr2), value: -pr2}
- !mobyle/inputprogramparameter
comment: This option results in a significant speed up in the program,
but the algorithm becomes dependent on the order of the sequence-file
names.
prompt: Seed with the first sequence and proceed linearly through the
list (-l)
format: ( "" , " -l" )[ value ]
name: linearly
command: false
type: !mobyle/booleantype {default: false}
- !mobyle/inputprogramparameter
prompt: Permit terminal gaps for wconsensus program
format: ( "" , " " + str(value) )[ value is not None and value != vdef]
name: terminal_gap
precond: {prog: wconsensus}
command: false
type: !mobyle/stringtype
default: -pg0
options:
- {label: Do NOT permit terminal gaps (-pg0), value: -pg0}
- {label: Permit penalized terminal gaps (-pg1), value: -pg1}
- !mobyle/inputprogramparagraph
prompt: Output options
name: output_options
argpos: 2
children:
- !mobyle/inputprogramparameter
comment: A negative value means print all the top matrices.
prompt: Number of top matrices to print (-pt)
format: ( "" , " -pt" + str(value) )[ value is not None and value != vdef]
argpos: 2
name: top_matrices
command: false
type: !mobyle/integertype {default: 4}
- !mobyle/inputprogramparameter
comment: 'Default when NOT using -n or -N option: print 4 matrices; default
when using -n or -N option: print no matrices.'
prompt: Number of final matrices to print (-pf)
format: ( "" , " -pf" + str(value) )[ value is not None and value != vdef]
name: final_matrices
command: false
type: !mobyle/integertype {default: 4}
outputs: !mobyle/outputparagraph
children:
- !mobyle/outputprogramparameter {prompt: Results file, filenames: str(prog) +
".results", argpos: 50, name: outfile}
- !mobyle/outputprogramparameter {prompt: wcons file, filenames: '"*.wcons"',
name: consensus_format}
- !mobyle/outputprogramparameter
prompt: Standard output
filenames: '"consensus.out"'
name: stdout
output_type: stdout
type: !mobyle/formattedtype
data_terms: ['EDAM_data:2048']
- !mobyle/outputprogramparameter
prompt: Standard error
filenames: '"consensus.err"'
name: stderr
type: !mobyle/formattedtype
data_terms: ['EDAM_data:2048']
references:
- {doi: null, label: 'G.Z. Hertz and G.D. Stormo. Identification of consensus patterns
in unaligned DNA and protein sequences: a large-deviation statistical basis
for penalizing gaps. In: Proceedings of the Third International Conference
on Bioinformatics and Genome Research (H.A. Lim, and C.R. Cantor, editors).
World Scientific Publishing Co., Ltd. Singapore, 1995. pages 201--216.', url: null}
homepage_links: ['http://gzhertz.home.comcast.net/~gzhertz/']
env: {}
source_links: ['http://gzhertz.home.comcast.net/~gzhertz/CONSENSUS_2004-04-14.TAR.gz']