consensus_6d.yaml

!mobyle/program
name: consensus
version: 6d
title: CONSENSUS
description: Identification of consensus patterns in unaligned DNA and protein sequences
authors: Gerald Z.Hertz, G.D. Stormo
inputs: !mobyle/inputparagraph
    children:
    - !mobyle/inputprogramparameter
        prompt: Program to run
        format: '"fasta-consensus <" + str(sequence) + " >" + str(sequence) + ".wcons  ;
            " + str(prog)'
        argpos: 0
        mandatory: true
        name: prog
        command: true
        type: !mobyle/stringtype
            default: consensus
            options:
            - {label: Search for fixed width patterns (consensus), value: consensus}
            - {label: 'Same as consensus, width not fixed (wconsensus)', value: wconsensus}
    - !mobyle/inputprogramparameter
        prompt: Sequences file (-f)
        format: '" -f " + str(sequence) + ".wcons"'
        simple: true
        argpos: 1
        mandatory: true
        name: sequence
        command: false
        type: !mobyle/formattedtype
            format_terms: ['EDAM_format:2200']
            data_terms: ['EDAM_data:2044']
    - !mobyle/inputprogramparagraph
        prompt: Required parameter
        name: required
        argpos: 2
        children:
        - !mobyle/inputprogramparameter
            prompt: Width of pattern for consensus program (-L)
            format: ( "" , " -L" + str(value) )[ value is not None ]
            simple: true
            mandatory: true
            name: width
            precond: {prog: consensus}
            command: false
            type: !mobyle/integertype {}
        - !mobyle/inputprogramparameter
            comment: "A range of values should be tried.  For example,\n       try\
                \ values of 0.5, 1, 1.5, and 2.  The overall best alignment is\n \
                \      the one having the smallest e-value."
            prompt: Number of standard deviations to lower the information content
                at each position before identifying information peaks for wconsensus
                program (-s)
            format: '" -s" + str(value)'
            simple: true
            mandatory: true
            name: standard_deviation
            precond: {prog: wconsensus}
            command: false
            type: !mobyle/floattype {default: 1.0}
    - !mobyle/inputprogramparagraph
        prompt: Basic options
        name: basic_options
        argpos: 2
        children:
        - !mobyle/inputprogramparagraph
            prompt: Alphabet options
            name: alphabet_options
            children:
            - !mobyle/inputprogramparameter
                prompt: Choose an alphabet
                format: ("", " -a /path/to/consensus/alphabet/dir/"+ value)[ value
                    in( "dna-alphabet" , "prot-alphabet" )]
                simple: true
                mandatory: true
                name: alphabet
                command: false
                type: !mobyle/stringtype
                    default: 'null'
                    options:
                    - {label: Choose an alphabet, value: 'null'}
                    - {label: DNA, value: dna-alphabet}
                    - {label: Protein, value: prot-alphabet}
                    - {label: Users file, value: users}
            - !mobyle/inputprogramparameter
                comment: 'Each line contains a letter (a symbol in the alphabet) followed
                    by an optional normalization number (default: 1.0). The normalization
                    is based on the relative prior probabilities of the letters. For
                    nucleic acids, this might be the genomic frequency of the bases;
                    however, if the -d option is not used, the frequencies observed
                    in your own sequence data are used. In nucleic acid alphabets,
                    a letter and its complement appear on the same line, separated
                    by a colon (a letter can be its own complement, e.g. when using
                    a dimer alphabet).Complementary letters may use the same normalization
                    number. Only the standard 26 letters are permissible; however,
                    when the -CS option is used, the alphabet is case sensitive so
                    that a total of 52 different characters are possible.POSSIBLE
                    LINE FORMATS WITHOUT COMPLEMENTARY LETTERS:letterletter normalizationPOSSIBLE
                    LINE FORMATS WITH COMPLEMENTARY LETTERS:letter:complementletter:complement
                    normalizationletter:complement normalization:complement''s_normalization'
                prompt: User Alphabet file (-a)
                format: ( "" , " -a " + str(value) )[ value is not None ]
                name: ascii_alphabet
                precond: {alphabet: users}
                command: false
            - !mobyle/inputprogramparameter
                comment: By default, the program uses the frequencies observed in
                    your own sequence data for the prior probabilities of the letters.
                    However, if the -d option is set, the prior probabilities designated
                    by the alphabet options. If the -d option is not set, they are
                    still used to determine the sequence alphabet, but any prior probability
                    information is ignored.
                prompt: Use the designated prior probabilities of the letters to override
                    the observed frequencies (-d)
                format: ( "" , " -d" )[ value ]
                name: prior
                command: false
                type: !mobyle/booleantype {default: false}
        - !mobyle/inputprogramparameter
            prompt: Complement of nucleic acid sequences (-c)
            format: ( "" , " -c" + str(value) )[ value is not None and value != vdef]
            name: complement
            command: false
            type: !mobyle/stringtype
                default: '0'
                options:
                - {label: Ignore the complementary strand (0), value: '0'}
                - {label: Include both strands as separate sequences (1), value: '1'}
                - {label: Include both strands as a single sequence (2), value: '2'}
                - {label: Assume that the pattern is symmetrical (consensus program
                        only) (3), value: '3'}
        - !mobyle/inputprogramparameter
            comment: "-n integer: repeat the matrix building cycle a maximum of \"\
                integer\"\n\t      times and allow each sequence to contribute zero\
                \ or more words\n\t      per matrix.-N integer: repeat the matrix\
                \ building cycle a maximum of \"integer\"\n\t      times and allow\
                \ each sequence to contribute one or more words\n\t      per matrix"
            prompt: How many words per matrix for each sequence to contribute
            format: ( "" , " " + str(value) + str(max_cycle_nb) )[ value != vdef ]
            name: max_cycle
            command: false
            type: !mobyle/stringtype
                default: 'null'
                options:
                - {label: No repeat, value: 'null'}
                - {label: Allow each sequence to contribute zero or more words per
                        matrix (-n), value: -n}
                - {label: Allow each sequence to contribute one or more words per
                        matrix (-N), value: -N}
        - !mobyle/inputprogramparameter
            prompt: Maximum repeat of the matrix building cycle for -n or -N
            format: '""'
            name: max_cycle_nb
            command: false
            type: !mobyle/integertype {}
    - !mobyle/inputprogramparagraph
        prompt: Advanced options
        name: advenced_options
        argpos: 2
        children:
        - !mobyle/inputprogramparameter
            prompt: Maximum number of matrices to save between cycles of the program
                (-q)
            format: ( "" , " -q " + str(value) )[ value is not None ]
            name: queue
            command: false
            type: !mobyle/integertype {default: 200}
        - !mobyle/inputprogramparameter
            comment: The minimum distance between the starting points of words within
                the same matrix pattern; must be a positive integer; can only be used
                when the "-n" or "-N" option is also used.For wconsensus, the default
                value is 1.For consensus, this number is indicated by the width (-L).
            prompt: Minimum distance between the starting points of words within the
                same matrix pattern (-m)
            format: ( "" , " -m" + str(value) )[ value is not None and value != vdef]
            name: distance
            precond:
                max_cycle: {'#ne': 'null'}
            command: false
            type: !mobyle/integertype {}
        - !mobyle/inputprogramparameter
            comment: 'default: terminate only when the maximum number of matrix building
                cycles is completed.'
            prompt: Terminate the program this number of cycles after the current
                most significant alignment is identified (-t)
            format: ( "" , " -t" + str(value) )[ value is not None ]
            name: terminate
            command: false
            type: !mobyle/integertype {}
        - !mobyle/inputprogramparameter
            comment: -pr2 option prevents a strong pattern found in only a subset
                of the sequences from overwhelming the algorithm and eliminating other
                potential patterns. This undesirable situation can occur when a subset
                of the sequences share an evolutionary relationship not common to
                the majority of the sequences.
            prompt: Save the top progeny matrices
            format: ( "" , " " + str(value) )[ value is not None and value != vdef]
            name: progeny
            command: false
            type: !mobyle/stringtype
                default: -pr2
                options:
                - {label: Regardless of parentage (-pr1), value: -pr1}
                - {label: For each parental matrix (-pr2), value: -pr2}
        - !mobyle/inputprogramparameter
            comment: This option results in a significant speed up in the program,
                but the algorithm becomes dependent on the order of the sequence-file
                names.
            prompt: Seed with the first sequence and proceed linearly through the
                list (-l)
            format: ( "" , " -l" )[ value ]
            name: linearly
            command: false
            type: !mobyle/booleantype {default: false}
        - !mobyle/inputprogramparameter
            prompt: Permit terminal gaps for wconsensus program
            format: ( "" , " " + str(value) )[ value is not None and value != vdef]
            name: terminal_gap
            precond: {prog: wconsensus}
            command: false
            type: !mobyle/stringtype
                default: -pg0
                options:
                - {label: Do NOT permit terminal gaps (-pg0), value: -pg0}
                - {label: Permit penalized terminal gaps (-pg1), value: -pg1}
    - !mobyle/inputprogramparagraph
        prompt: Output options
        name: output_options
        argpos: 2
        children:
        - !mobyle/inputprogramparameter
            comment: A negative value means print all the top matrices.
            prompt: Number of top matrices to print (-pt)
            format: ( "" , " -pt" + str(value) )[ value is not None and value != vdef]
            argpos: 2
            name: top_matrices
            command: false
            type: !mobyle/integertype {default: 4}
        - !mobyle/inputprogramparameter
            comment: 'Default when NOT using -n or -N option: print 4 matrices; default
                when using -n or -N option: print no matrices.'
            prompt: Number of final matrices to print (-pf)
            format: ( "" , " -pf" + str(value) )[ value is not None and value != vdef]
            name: final_matrices
            command: false
            type: !mobyle/integertype {default: 4}
outputs: !mobyle/outputparagraph
    children:
    - !mobyle/outputprogramparameter {prompt: Results file, filenames: str(prog) +
            ".results", argpos: 50, name: outfile}
    - !mobyle/outputprogramparameter {prompt: wcons file, filenames: '"*.wcons"',
        name: consensus_format}
    - !mobyle/outputprogramparameter
        prompt: Standard output
        filenames: '"consensus.out"'
        name: stdout
        output_type: stdout
        type: !mobyle/formattedtype
            data_terms: ['EDAM_data:2048']
    - !mobyle/outputprogramparameter
        prompt: Standard error
        filenames: '"consensus.err"'
        name: stderr
        type: !mobyle/formattedtype
            data_terms: ['EDAM_data:2048']
references:
- {doi: null, label: 'G.Z. Hertz and G.D. Stormo. Identification of consensus patterns
        in unaligned DNA and protein sequences: a large-deviation statistical basis
        for penalizing gaps. In: Proceedings of the Third International Conference
        on Bioinformatics and Genome Research (H.A. Lim, and C.R. Cantor, editors).
        World Scientific Publishing Co., Ltd. Singapore, 1995. pages 201--216.', url: null}
homepage_links: ['http://gzhertz.home.comcast.net/~gzhertz/']
env: {}
source_links: ['http://gzhertz.home.comcast.net/~gzhertz/CONSENSUS_2004-04-14.TAR.gz']