-
Notifications
You must be signed in to change notification settings - Fork 23
/
preprocess_openimage.py
76 lines (58 loc) · 2.06 KB
/
preprocess_openimage.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import pickle
import numpy as np
trainfile = 'oidv6-train-annotations-human-imagelabels.csv'
testfile = 'test-annotations-human-imagelabels.csv'
print('Selecting train ids.')
with open(trainfile) as f:
f.readline()
images = [r.split(',')[0] for r in f]
images = sorted(list(set(images)))
np.random.seed(0)
res = np.random.choice(images, size=100000, replace=False)
print('Loading test ids.')
with open(testfile) as f:
f.readline()
images = [r.split(',')[0] for r in f]
images = sorted(list(set(images)))
print('Writing ids.')
with open('openimage_id.txt', 'w') as f:
for i in res:
print('train/{}'.format(i), file=f)
for i in images:
print('test/{}'.format(i), file=f)
ss = set(res) | set(images)
print('Loading tag names.')
tag_to_name = {}
with open('oidv6-class-descriptions.csv') as f:
for r in f:
split = r.split(',')
tag = split[0]
name = ','.join(split[1:])
tag_to_name[tag] = name.strip()
def create_dictionaries(filename):
image_to_tag = {i: [] for i in ss}
tag_to_image = {}
with open(filename) as f:
f.readline()
for r in f:
imageid, source, tag, confidence = r.split(',')
tag = tag_to_name[tag]
if imageid in ss and int(confidence) == 1:
image_to_tag[imageid].append(tag)
if tag not in tag_to_image:
tag_to_image[tag] = []
tag_to_image[tag].append(imageid)
return image_to_tag, tag_to_image
print('Creating dictionaries.')
image_to_tag, tag_to_image = create_dictionaries(trainfile)
image_to_tag_test, tag_to_image_test = create_dictionaries(testfile)
print('Creating pickle files.')
with open('openimage_image_to_tag.pickle', 'wb') as f:
pickle.dump(image_to_tag, f)
with open('openimage_tag_to_image.pickle', 'wb') as f:
pickle.dump(tag_to_image, f)
with open('openimage_image_to_tag_test.pickle', 'wb') as f:
pickle.dump(image_to_tag_test, f)
with open('openimage_tag_to_image_test.pickle', 'wb') as f:
pickle.dump(tag_to_image_test, f)
print('Done!')