forked from eisenlab/SliceSeq
-
Notifications
You must be signed in to change notification settings - Fork 0
/
MakeMultigenome.py
73 lines (57 loc) · 2.13 KB
/
MakeMultigenome.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
from Bio import SeqIO
from os import listdir, chdir
from sys import argv
from subprocess import Popen
chdir(argv[1])
base = argv[2]
rest = argv[3:]
print "Base species: ", base
print "Other species: ", rest
## Make the BASE_only.fa file
only_out_fh = open(base+"_only.fa", "w")
for fname in listdir('.'):
if fname.startswith('d') and ('.fa' in fname) and (base in fname):
specname = fname.split('-')[0]
print specname
for rec in SeqIO.parse(fname, 'fasta'):
rec.id = specname + '_' + rec.id
SeqIO.write(rec, only_out_fh, 'fasta')
only_out_fh.close()
bowtie_build_procs = []
## Make the joint fasta files with each species
for other in rest:
print base+other
out_fh = open(base + other + '.fa', 'w')
for fname in listdir('.'):
if fname.startswith('d') and '.fa' in fname and ((base in fname) or (other in fname)):
specname = fname.split('-')[0]
print specname
for rec in SeqIO.parse(fname, 'fasta'):
rec.id = specname + '_' + rec.id
SeqIO.write(rec, out_fh, 'fasta')
out_fh.close()
bowtie_build_procs.append(Popen(['bowtie2-build', out_fh.name, base+other],
stdout = open(base+other+'_bowtiebuild.log',
'w')))
## Make the BASE_only gtf file
only_out_fh = open(base + '_only.gtf', 'w')
for fname in listdir('.'):
if fname.startswith('d') and ('.gtf' in fname) and (base in fname):
specname = fname.split('-')[0]
print specname
for line in open(fname):
only_out_fh.write(specname+"_"+line)
only_out_fh.close()
## Make the joint gtf files with each species
for other in rest:
print "GTF", base+other
out_fh = open(base + other + '.gtf', 'w')
for fname in listdir('.'):
if fname.startswith('d') and '.gtf' in fname and ((base in fname) or (other in fname)):
specname = fname.split('-')[0]
print specname
for line in open(fname):
out_fh.write(specname+"_"+line)
out_fh.close()
for proc in bowtie_build_procs:
proc.wait()