BMGE_Version 1.0.yaml

!mobyle/program
name: BMGE
version: Version 1.0
title: BMGE
description: Block Mapping and Gathering using Entropy
authors: Alexis Criscuolo and Simonetta Gribaldo
inputs: !mobyle/inputparagraph
    children:
    - !mobyle/inputprogramparagraph
        prompt: Input
        name: input
        argpos: 2
        children:
        - !mobyle/inputprogramparameter
            comment: "BMGE uses FASTA or PHYLIP sequential format for input. These\
                \ are\n                    plain text files. There is no limit on\
                \ the length of the\n                    alignment. There is also\
                \ no limit on the length of the label of\n                    a sequence\
                \ (i.e. its FASTA annotation line), although a too long\n        \
                \            label (e.g. more than 100 letters) will be truncated\
                \ if the\n                    output format is PHYLIP sequential."
            prompt: Alignment (-i)
            format: '" -i "+str(value)'
            simple: true
            argpos: 1
            mandatory: true
            name: infile
            command: false
            type: !mobyle/formattedtype
                format_terms: ['EDAM_format:2200', 'EDAM_format:1956']
                data_terms: ['EDAM_data:0863']
        - !mobyle/inputprogramparameter
            comment: "Both standard single-letter amino acid and nucleotide alphabets\n\
                \                          are used by BMGE. When using amino acid\
                \ sequences, degenerated\n                          character states\
                \ B and Z are understood by BMGE; similarly,\n                   \
                \       degenerated nucleotide characters are also understood. The\n\
                \                          character state X is understood to be any\
                \ of the 4 or 20\n                          character states when\
                \ using as input nucleotide or amino acid\n                      \
                \    sequences, respectively. Dashes (i.e. '-') are understood as\n\
                \                          gaps, whereas dots (i.e. '.'), as any other\
                \ single letter that\n                          are not inside standard\
                \ alphabets, are considered as unknown\n                         \
                \ character state (i.e. '?'). Nucleotide sequences can be set as\n\
                \                          codon ones. In this case, each successive\
                \ nucleotide character\n                          triplet is considered\
                \ as one codon character."
            prompt: Type of sequence (-t)
            format: ("", " -t "+str(value))[value is not None and value!=vdef]
            simple: true
            argpos: 2
            mandatory: true
            name: input_type
            command: false
            type: !mobyle/stringtype
                default: 'null'
                options:
                - {label: Set the input sequence coding, value: 'null'}
                - {label: Amino acid, value: AA}
                - {label: Nucleotide, value: DNA}
                - {label: Codon, value: CODON}
    - !mobyle/inputprogramparagraph
        prompt: Control options
        name: options
        children:
        - !mobyle/inputprogramparameter
            comment: "For each character, BMGE computes a score mainly determined\
                \ by the entropy induced by\n              the respective proportion\
                \ of each residue. To estimate realistic\n              scores that\
                \ take into account biologically relevant substitution processes,\
                \ BMGE weights the\n              entropy estimation with substitution\
                \ matrices.These option can be used with the 15 estimated BLOSUM matrices.\
                \ BMGE uses by \n              default the popular BLOSUM62 matrix.\
                \ The character\n              trimming is progressively more stringent\
                \ as the BLOSUM index\n              increases (e.g. BLOSUM95); reciprocally,\
                \ the trimming is\n              progressively more relaxed as the\
                \ BLOSUM index is lower (e.g.\n              BLOSUM30). In practice,\
                \ it is recommended to use BLOSUM95 with\n              closely related\
                \ sequences, and BLOSUM30 with distantly related\n              sequences.If\
                \ input sequences are set as codons, BMGE performs a conversion\n\
                \              into amino acid sequences (following the universal\
                \ genetic code)\n              and uses BLOSUM matrices to estimate\
                \ the entropy-like score for\n              each codon character.\
                \ So, with option -t set as CODON, one can\n              modify the\
                \ option -m only with BLOSUM matrices.\n              It is also possible\
                \ to use the identity matrix with any sequence\n              types."
            prompt: Similarity Matrices for amino acid and codon sequences (-m)
            format: ("", " -m "+str(value))[value is not None and value!=vdef]
            argpos: 3
            name: matrixaa
            precond:
                input_type:
                    '#in': [AA, CODON]
            command: false
            type: !mobyle/stringtype
                default: BLOSUM62
                options:
                - {label: BLOSUM62, value: BLOSUM62}
                - {label: Identity, value: ID}
                - {label: BLOSUM30, value: BLOSUM30}
                - {label: BLOSUM35, value: BLOSUM35}
                - {label: BLOSUM40, value: BLOSUM40}
                - {label: BLOSUM45, value: BLOSUM45}
                - {label: BLOSUM50, value: BLOSUM50}
                - {label: BLOSUM55, value: BLOSUM55}
                - {label: BLOSUM60, value: BLOSUM60}
                - {label: BLOSUM65, value: BLOSUM65}
                - {label: BLOSUM70, value: BLOSUM70}
                - {label: BLOSUM75, value: BLOSUM75}
                - {label: BLOSUM80, value: BLOSUM80}
                - {label: BLOSUM85, value: BLOSUM85}
                - {label: BLOSUM90, value: BLOSUM90}
                - {label: BLOSUM95, value: BLOSUM95}
        - !mobyle/inputprogramparameter
            comment: "For nucleotide input sequences, BMGE uses PAM matrices with\
                \ a\n              fixed transition/transition ratio. BMGE can be\
                \ used with all\n              possible PAM matrices, from the most\
                \ stringent (i.e. DNAPAM1) to\n              highly relaxed ones (e.g.\
                \ DNAPAM500). By default with nucleotide\n              sequences,\
                \ BMGE uses the PAM-100 matrix.It is also possible to use the identity\
                \ matrix."
            prompt: Similarity Matrices for nucleotide sequences (-m)
            format: ("", " -m "+str(value))[value is not None and value!=vdef]
            argpos: 3
            name: matrixan
            precond:
                input_type:
                    '#in': [DNA]
            command: false
            type: !mobyle/stringtype
                default: DNAPAM100
                options:
                - {label: DNAPAM100, value: DNAPAM100}
                - {label: Identity, value: ID}
                - {label: DNAPAM1, value: DNAPAM1}
                - {label: DNAPAM5, value: DNAPAM5}
                - {label: DNAPAM10, value: DNAPAM10}
                - {label: DNAPAM20, value: DNAPAM20}
                - {label: DNAPAM30, value: DNAPAM30}
                - {label: DNAPAM40, value: DNAPAM40}
                - {label: DNAPAM50, value: DNAPAM50}
                - {label: DNAPAM60, value: DNAPAM60}
                - {label: DNAPAM70, value: DNAPAM70}
                - {label: DNAPAM80, value: DNAPAM80}
                - {label: DNAPAM90, value: DNAPAM90}
                - {label: DNAPAM110, value: DNAPAM110}
                - {label: DNAPAM120, value: DNAPAM120}
                - {label: DNAPAM130, value: DNAPAM130}
                - {label: DNAPAM140, value: DNAPAM140}
                - {label: DNAPAM150, value: DNAPAM150}
                - {label: DNAPAM160, value: DNAPAM160}
                - {label: DNAPAM170, value: DNAPAM170}
                - {label: DNAPAM180, value: DNAPAM180}
                - {label: DNAPAM190, value: DNAPAM190}
                - {label: DNAPAM200, value: DNAPAM200}
                - {label: DNAPAM210, value: DNAPAM210}
                - {label: DNAPAM220, value: DNAPAM220}
                - {label: DNAPAM230, value: DNAPAM230}
                - {label: DNAPAM240, value: DNAPAM240}
                - {label: DNAPAM250, value: DNAPAM250}
                - {label: DNAPAM260, value: DNAPAM260}
                - {label: DNAPAM270, value: DNAPAM270}
                - {label: DNAPAM280, value: DNAPAM280}
                - {label: DNAPAM290, value: DNAPAM290}
                - {label: DNAPAM300, value: DNAPAM300}
                - {label: DNAPAM310, value: DNAPAM310}
                - {label: DNAPAM320, value: DNAPAM320}
                - {label: DNAPAM330, value: DNAPAM330}
                - {label: DNAPAM340, value: DNAPAM340}
                - {label: DNAPAM350, value: DNAPAM350}
                - {label: DNAPAM360, value: DNAPAM360}
                - {label: DNAPAM370, value: DNAPAM370}
                - {label: DNAPAM380, value: DNAPAM380}
                - {label: DNAPAM390, value: DNAPAM390}
                - {label: DNAPAM400, value: DNAPAM400}
                - {label: DNAPAM410, value: DNAPAM410}
                - {label: DNAPAM420, value: DNAPAM420}
                - {label: DNAPAM430, value: DNAPAM430}
                - {label: DNAPAM440, value: DNAPAM440}
                - {label: DNAPAM450, value: DNAPAM450}
                - {label: DNAPAM460, value: DNAPAM460}
                - {label: DNAPAM470, value: DNAPAM470}
                - {label: DNAPAM480, value: DNAPAM480}
                - {label: DNAPAM490, value: DNAPAM490}
                - {label: DNAPAM500, value: DNAPAM500}
        - !mobyle/inputprogramparameter
            comment: "It is possible to indicate a transition/transversion ratio to\n\
                \              better define the PAM matrices with nucleotide sequences.\
                \ By\n              default, BMGE uses a transition/transversion ratio\
                \ of 2."
            prompt: Transition/transversion ratio for nucleotide sequences.
            format: ("", ":" +str(value) + " ")[value is not None]
            argpos: 4
            name: transition
            precond:
                '#and':
                -   input_type:
                        '#in': [DNA]
                -   '#and':
                    -   matrixan: {'#ne': DNAPAM100}
                    -   matrixan: {'#ne': ID}
            command: false
            type: !mobyle/floattype {default: 2.0}
        - !mobyle/inputprogramparameter
            comment: "BMGE allows characters containing too many gaps to be removed\n\
                \                with this option. By default, BMGE removes all characters\
                \ with a\n                gap frequency greater than 0.2."
            prompt: Gap Rate Cut-off (-g)
            format: ("", " -g "+str(value))[value is not None and value!=vdef]
            argpos: 5
            name: gap_rate_cutoff
            command: false
            type: !mobyle/floattype {default: 0.2}
        - !mobyle/inputprogramparameter
            comment: "Following the smoothing operation of the entropy-like score\n\
                \              values across characters, BMGE selects characters associated\
                \ with\n              a score value greater than a fixed threshold.\
                \ This cut-off is set\n              to 0.0 by default."
            prompt: Minimum entropy Score Cut-off (-h)
            format: ("", " -h %s:%s " % ( value, max_entropy) )[ (max_entropy != 0.5
                or value !=vdef) and  (max_entropy > value)  ]
            argpos: 6
            name: min_entropy
            command: false
            type: !mobyle/floattype {default: 0.0}
        - !mobyle/inputprogramparameter
            comment: "Following the smoothing operation of the entropy-like score\n\
                \              values across characters, BMGE selects characters associated\
                \ with\n              a score value below a fixed threshold. This\
                \ cut-off is set to 0.5\n              by default."
            prompt: Maximum entropy Score Cut-off (-h)
            argpos: 6
            name: max_entropy
            precond:
                min_entropy: {'#ne': None}
            command: false
            type: !mobyle/floattype {default: 0.5}
        - !mobyle/inputprogramparameter
            comment: "BMGE only selects regions of size greater than or equal to 5.\
                \ Use\n              this option to modify this minimum block size\
                \ parameter."
            prompt: Minimum Block Size (-b)
            format: ("", " -b "+str(value) )[value is not None and value!=vdef]
            argpos: 7
            name: minimun_block_size
            command: false
            type: !mobyle/integertype {default: 5}
    - !mobyle/inputprogramparagraph
        prompt: Output format options
        name: output_option
        children:
        - !mobyle/inputprogramparameter
            prompt: Output in phylip sequential format (-op)
            format: ("", " -op "+ infile.split('.')[0] + ".phy ")[ value ]
            argpos: 9
            name: phylip
            precond:
                infile: {'#ne': None}
            command: false
            type: !mobyle/booleantype {default: false}
        - !mobyle/inputprogramparameter
            comment: "If input sequences are in FASTA format with NCBI-formatted annotation\
                \ lines, e.g.\n                      >field1|field2|field3|field4|\
                \ field5 [field6]\n            the options -oppp allow naming sequences\
                \ by field6_____field4 ; knowing that field4 is generally\n      \
                \      an accession number, and field6 a\n            taxon name,\
                \ this option leads to PHYLIP files where each sequence is labelled\
                \ as a\n            taxon name and an accession number."
            prompt: Output in phylip sequential format. Special formating (-oppp)
            format: ("", " -oppp "+ infile.split('.')[0] + ".phyp ")[ value]
            argpos: 9
            name: phylip_oppp
            precond:
                infile: {'#ne': None}
            command: false
            type: !mobyle/booleantype {default: false}
        - !mobyle/inputprogramparameter
            prompt: Output in nexus format (-on)
            format: ("", " -on "+ infile.split('.')[0] + ".nex ")[ value ]
            argpos: 9
            name: nexus
            precond:
                infile: {'#ne': None}
            command: false
            type: !mobyle/booleantype {default: false}
        - !mobyle/inputprogramparameter
            comment: "If input sequences are in FASTA format with NCBI-formatted\n\
                \                 annotation lines,\n               e.g. >field1|field2|field3|field4|\
                \ field5 [field6]\n               the option -onnn allow naming sequences\
                \ by field6_____field4 ;\n               knowing that field4 is generally\
                \ an accession number and field6 a\n              taxon name, this\
                \ option leads to NEXUS files where each sequence\n              is\
                \ labelled as a taxon name and an accession number."
            prompt: Output in nexus format. Special formating (-onnn)
            format: ("", " -onnn "+ infile.split('.')[0] + ".nexn ")[ value ]
            argpos: 9
            name: nexus_onnn
            precond:
                infile: {'#ne': None}
            command: false
            type: !mobyle/booleantype {default: false}
        - !mobyle/inputprogramparameter
            prompt: Output in fasta format (-of)
            format: ("", " -of "+ infile.split('.')[0] + ".fa ")[ value ]
            argpos: 10
            name: fasta
            precond:
                infile: {'#ne': None}
            command: false
            type: !mobyle/booleantype {default: false}
        - !mobyle/inputprogramparameter
            prompt: Output in html format (-oh)
            format: ("", " -oh "+ infile.split('.')[0] + ".html ")[ value ]
            argpos: 11
            name: html
            precond:
                infile: {'#ne': None}
            command: false
            type: !mobyle/booleantype {default: false}
outputs: !mobyle/outputparagraph
    children:
    - !mobyle/outputprogramparagraph
        prompt: Output format options
        name: output_option
        children:
        - !mobyle/outputprogramparameter
            prompt: Output in phylip sequential format
            filenames: infile.split('.')[0] + ".phy"
            name: phylipout
            precond: phylip
            type: !mobyle/formattedtype
                format_terms: ['EDAM_format:1956']
                data_terms: ['EDAM_data:0863']
        - !mobyle/outputprogramparameter
            prompt: Output in phylip sequential format
            filenames: infile.split('.')[0] + ".phyp"
            name: phylipout_oppp
            precond: phylip_oppp
            type: !mobyle/formattedtype
                format_terms: ['EDAM_format:1956']
                data_terms: ['EDAM_data:0863']
        - !mobyle/outputprogramparameter
            prompt: Output in nexus format
            filenames: infile.split('.')[0] + ".nex"
            name: nexusout
            precond: nexus
            type: !mobyle/formattedtype
                format_terms: ['EDAM_format:1912']
                data_terms: ['EDAM_data:0863']
        - !mobyle/outputprogramparameter
            prompt: Output in nexus format
            filenames: infile.split('.')[0] + ".nexn"
            name: nexusout_onnn
            precond: nexus_onnn
            type: !mobyle/formattedtype
                format_terms: ['EDAM_format:1912']
                data_terms: ['EDAM_data:0863']
        - !mobyle/outputprogramparameter
            prompt: Output in fasta format
            filenames: infile.split('.')[0] + ".fa"
            name: fastaout
            precond: fasta
            type: !mobyle/formattedtype
                format_terms: ['EDAM_format:2200']
                data_terms: ['EDAM_data:0863']
        - !mobyle/outputprogramparameter
            prompt: Output in html format
            filenames: infile.split('.')[0] + ".html"
            name: htmlout
            precond: html
            type: !mobyle/formattedtype
                data_terms: ['EDAM_data:2048']
    - !mobyle/outputprogramparameter
        prompt: Standard output
        filenames: '"BMGE.out"'
        name: stdout
        output_type: stdout
        type: !mobyle/formattedtype
            data_terms: ['EDAM_data:2048']
    - !mobyle/outputprogramparameter
        prompt: Standard error
        filenames: '"BMGE.err"'
        name: stderr
        type: !mobyle/formattedtype
            data_terms: ['EDAM_data:2048']
operations: ['EDAM_operation:2946']
topics: ['EDAM_topic:0084', 'EDAM_topic:0182']
references:
- {doi: null, label: 'Criscuolo A, Gribaldo S (2010) BMGE (Block Mapping and Gathering
        with Entropy): selection of phylogenetic informative regions from multiple

        sequence alignments. BMC Evolutionary Biology 10:210.', url: null}
documentation_links: ['http://bioweb2.pasteur.fr/docs/BMGE/BMGE_doc.pdf']
command: BMGE
env: {}
source_links: ['ftp://ftp.pasteur.fr/pub/gensoft/projects/BMGE/']