-
Notifications
You must be signed in to change notification settings - Fork 0
/
build_vehicle_records.py
120 lines (101 loc) · 4.47 KB
/
build_vehicle_records.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# import packages
from config import dlib_front_rear_config as config
from pipeline.utils.tfannotation import TFAnnotation
from bs4 import BeautifulSoup
from PIL import Image
import tensorflow as tf
import os
def main(_):
# open the classes output file
f = open(config.CLASSES_FILE, "w")
# loop over the output classes file
for (k, v) in config.CLASSES.items():
# construct the class information and write to file
item = ("item {\n"
"\tid: " + str(v) + "\n"
"\tname: '" + k + "'\n"
"}\n")
f.write(item)
# close the output classes file
f.close()
# initialize the data split files
datasets = [
("train", config.TRAIN_XML, config.TRAIN_RECORD),
("test", config.TEST_XML, config.TEST_RECORD)
]
# loop over the dataset
for (dType, inputPath, outputPath) in datasets:
# build the soup
print("[INFO] processing '{}'...".format(dType))
contents = open(inputPath).read()
soup = BeautifulSoup(contents, "lxml")
# initialize the tensorflow writer
# and initialize the total number of examples written to file
writer = tf.python_io.TFRecordWriter(outputPath)
total = 0
# loop over all image elements
for image in soup.find_all("image"):
# load the input image from disk as a tensorflow object
p = os.path.sep.join([config.BASE_PATH, image["file"]])
encoded = tf.gfile.GFile(p, "rb").read()
encoded = bytes(encoded)
# load the image from disk again, this time as PIL object
pilImage = Image.open(p)
(w, h) = pilImage.size[:2]
# parse the filename and encoding from the input path
filename = image["file"].split(os.path.sep)[-1]
encoding = filename[filename.rfind(".") + 1:]
# initialize the annotation object used to store information
# regarding the bounding box and labels
tfAnnot = TFAnnotation()
tfAnnot.image = encoded
tfAnnot.encoding = encoding
tfAnnot.filename = filename
tfAnnot.width = w
tfAnnot.height = h
# loop over all bounding boxes associated with the image
# print(image.find_all("box"))
for box in image.find_all("box"):
# print(box)
# check to see if the bounding box should be ignored
if box.has_attr("ignore"):
continue
# extract the bounding box information and label
# ensuring that all bounding box dimensiosn fit inside the image
startX = max(0, float(box["left"]))
startY = max(0, float(box["top"]))
endX = min(w, float(box["width"]) + startX)
endY = min(h, float(box["height"]) + startY)
label = box.find("label").text
# tensorflow assumes all bounding boxes are in [0, 1] range
xMin = startX / w
xMax = endX / w
yMin = startY / h
yMax = endY / h
# there are errors in annotation that it might be possible that
# minimum values are larger than the maximum values or vice versa
if xMin > xMax or yMin > yMax:
continue
elif xMax < xMin or yMax < yMin:
continue
# update the bounding boxes and labels lists
tfAnnot.xMins.append(xMin)
tfAnnot.xMaxs.append(xMax)
tfAnnot.yMins.append(yMin)
tfAnnot.yMaxs.append(yMax)
tfAnnot.textLabels.append(label.encode("utf-8"))
tfAnnot.classes.append(config.CLASSES[label])
tfAnnot.difficult.append(0)
# increment the total number of examples
total += 1
# encode the data point attributes using the tensorflow helper functions
features = tf.train.Features(feature = tfAnnot.build())
example = tf.train.Example(features = features)
# add the example to the writer
writer.write(example.SerializeToString())
# close the writer and print diagnostic information to user
writer.close()
print("[INFO] {} examples saved for '{}'".format(total, dType))
# check to see if the main thread should be started
if __name__ == "__main__":
tf.app.run()