-
Notifications
You must be signed in to change notification settings - Fork 3
/
makeMatrix.py
97 lines (76 loc) · 1.84 KB
/
makeMatrix.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
from __future__ import print_function
'''
python makeMatrix.py [kmcOut] [arrInd] [antibioList] [micMethods] [out1] [out2] [contigs]
'''
from sys import argv,stderr
# open up the contigs file
f = open(argv[7])
# create array to hold contigs
contigs = []
# for each line in file
# append contig
for i in f:
i = i.split('\t')[0]
contigs.append(i)
f.close()
# open up arr index file
f = open(argv[2])
# init feature hash
featureHash = {}
# for each line in file, get the feature and index
# and set it in the hash
for i in f:
i = i.strip().split('\t')
if len(i) < 2:
continue
featureHash[i[0]] = i[1]
f.close()
# open kmc file
f = open(argv[1])
# init kmc contigs hash
kmcContigs = {}
# for each kmr in file, map kmr to count in hash
for i in f:
i = i.strip().split('\t')
kmcContigs[i[0]] = i[1]
# turn these into a line that is formatted:
# [kmer]:[count] [kmer]:[count] ... [kmer]:[count]
kmcLine = ''
for i in contigs:
if i in kmcContigs:
if i not in featureHash:
continue
kmcLine += featureHash[i] + ':' + kmcContigs[i] + ' '
f.close()
# open list of antibiotics
f = open(argv[3])
# for each antibiotic in file, append to list
antibioList = []
for i in f:
# skip comment char
if i[0] == '#':
continue
i = i.strip()
if i in featureHash:
antibioList.append(i)
f.close()
# open list of MIC testing methods
f = open(argv[4])
# for each method in file, append to list
micMethods = []
for i in f:
i = i.strip()
if i in featureHash:
micMethods.append(i)
f.close()
# open two output files
# 1: libsvm out
# 2: antibiotic, MIC testing method order of lbsvm out
f = [open(argv[5], 'w'), open(argv[6], 'w')]
# loop through antibiotic list and MIC methods and print out
# to files
for i in antibioList:
for j in micMethods:
line = kmcLine + featureHash[i] + ':1 ' + featureHash[i] + ':1\n'
f[0].write(line)
f[1].write(i + '\t' + j + '\n')