-
Notifications
You must be signed in to change notification settings - Fork 0
/
uf-random
executable file
·101 lines (83 loc) · 3.25 KB
/
uf-random
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#!/bin/bash
#
# uf-random - Generate unfasta format random sequences.
# Copyright (C) 2016 Marco van Zwetselaar <[email protected]>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# This utility is part of http://io.zwets.it/unfasta
SEQ_CHARS="ACGT"
SEQ_COUNT=1
SEQ_LENGTH=100
# Function to exit this script with an error message on stderr
err_exit() {
echo "$(basename "$0"): $*" >&2
exit 1
}
# Function to show usage information and exit
usage_exit() {
echo "
Usage: $(basename $0) [OPTIONS] [CHARS]
Write to standard output an unfasta file with randomly generated sequences.
Characters are picked randomly uniform from alphabet CHARS (default: $SEQ_CHARS).
Sequences can be of fixed or random lengths.
Options
-c, --count COUNT Number of sequences (default: $SEQ_COUNT)
-l, --length LEN Length of sequences (default: $SEQ_LENGTH), excludes -r.
-r, --range MIN,MAX Random length sequences in range [MIN,MAX], excludes -l.
The CHARS argument can be used to create sequences from any alphabet.
To create biased sequences, repeat characters in CHARS so they have the
desired proportions. E.g. 'AACCCTTGGG' yields sequences with 60% GC.
" >&2
exit ${1:-1}
}
# Parse options
while [ $# -ne 0 -a "$(expr "$1" : '\(.\)..*')" = "-" ]; do
case $1 in
--count=*) SEQ_COUNT=${1#--count=} ;;
-c|--count) shift; SEQ_COUNT=$1 ;;
--length=*) SEQ_LENGTH=${1#--length=} ;;
-l|--length) shift; SEQ_LENGTH=$1 ;;
--range=*) RANGE=${1#--range=} ;;
-r|--range) shift; RANGE=$1 ;;
--help) usage_exit 0 ;;
*) usage_exit ;;
esac
shift || usage_exit
done
# Parse arguments
if [ $# -eq 1 ]; then
SEQ_CHARS="$1"
shift
fi
[ $# -eq 0 ] || usage_exit
# Parse MIN and MAX if random lengths are required
if [ -n "$RANGE" ]; then
MIN_LENGTH="$(expr "$RANGE" : '\([0-9]\+\),[0-9]\+$')" || err_exit "not a valid range specification: $RANGE"
MAX_LENGTH="$(expr "$RANGE" : '[0-9]\+,\([0-9]\+\)$')" || err_exit "not a valid range specification: $RANGE"
RANGE_SIZE=$((MAX_LENGTH - MIN_LENGTH + 1))
[ $RANGE_SIZE -gt 0 ] || err_exit "invalid range: $RANGE"
fi
# Check that we have RANDOM, which is a bash extension - @TODO migrate to /dev/urandom
expr "$RANDOM" : '[0-9]\+$' >/dev/null || err_exit "\$RANDOM not supported by shell; bash is required."
# Generate the sequences
for (( S = 1 ; S <= $SEQ_COUNT ; S += 1 )); do
[ -z "$RANGE" ] || SEQ_LENGTH=$((MIN_LENGTH + ($RANDOM % RANGE_SIZE)))
echo ">lcl|$S Random sequence $S (length $SEQ_LENGTH)"
for (( C = 1 ; C <= SEQ_LENGTH ; C += 1 )); do
echo -n ${SEQ_CHARS:$(($RANDOM % ${#SEQ_CHARS})):1}
done
echo
done
# vim: sts=4:sw=4:et:si:ai