forked from NCBI-Hackathons/SC3
-
Notifications
You must be signed in to change notification settings - Fork 0
/
prototype.sh
executable file
·70 lines (57 loc) · 1.77 KB
/
prototype.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/bin/bash
display_usage() {
echo "Usage: $0 clinvar.vcf.gz [GENE] or [DISEASE] [CLINSIG]";
echo "";
echo "[GENE] = GENE ID ";
echo "[DISEASE] = name of associated disase in clinvar ";
echo "[CLINSIG] = 5 for pathogenic, otherwise report all SNPs";
echo "";
echo " This program takes clinvar database in GRCh37, a gene or disase,";
echo " and weather you want pathogenic or all variants as input and ";
echo " outputs a list of RSids related to that disease. ";
echo "";
echo " You must list the clinvar.vcf.gz download file must have one of Gene or Disease";
echo "";
echo " The program will automatically check to see if you have the latest";
echo " version of clinvar from the NCBI ftp site and automatically download";
echo " the file if your version does not match file on the ftp site ";
echo "";
}
# if less than two arguments supplied, display usage
if [ $# -le 1 ]
then
display_usage
exit 1
fi
ADDRESS="ftp.ncbi.nlm.nih.gov"
SRC_DIR="pub/clinvar/vcf_GRCh37/"
command="open -e \"get $SRC_DIR$1.md5 ; exit \" \"$ADDRESS\""
command2="open -e \"get $SRC_DIR$1 ; exit \" \"$ADDRESS\""
file1=`md5 -q $1`
if [ ! -e $1.md5 ]; then
lftp -c "$command"
fi
file2=`cut -d* -f1 $1.md5`
if [ ! -e $1 ]; then
lftp -c "$command2"
fi
echo "Checking file: $1"
echo "Using MD5 file: $1.md5"
echo $file1
echo $file2
if [ $file1 != $file2 ]
then
echo "md5 sums mismatch"
mv $1 $1.old
lftp -c "$command2"
else
echo "checksums OK"
fi
#actual search
if [ "$3" = 5 ]; then
zgrep -i $2 $1 | grep CLNSIG=5 | awk '{print $8}' | awk -F ";" '/1/ {print $1}' | awk -F "=" '/1/ {print $2}' > $2-$3.snps
echo "SNPs written to $2-$3.snps"
else
zgrep -i $2 $1 | awk '{print $8}' | awk -F ";" '/1/ {print $1}' | awk -F "=" '/1/ {print $2}' > $2.snps
echo "SNPs written to $2.snps"
fi