This repository has been archived by the owner on Feb 5, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 4
/
convert_labels_to_records.py
291 lines (229 loc) · 11.1 KB
/
convert_labels_to_records.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
#!/usr/bin/env python3
# Copyright 2018 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Take a folder, write a TFRecord into it
# https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md
import functools
import time
import random
import sys
import collections
import multiprocessing
import multiprocessing.pool
import io
from PIL import Image
import numpy as np
import os
import argparse
import bbox_writer
import tensorflow as tf
from object_detection.utils import dataset_util
description_text = """\
Use this script to convert labeled images into TFRecord files for training.
Both scripts in the labeling pipeline (labeler.py, tracking.py) store annotated
images as an image with a corresponding .txt file containing labels. While that
format is convenient for human readability, it is not the format TensorFlow is
expecting. This script converts the paired image format to the TFRecord format
that TensorFlow is expecting. Furthermore, this script can automatically
generate an evaluation split, or create one from a separate folder. Finally, the
label map required for the Object Detection API is generated and placed into the
specified folder.
"""
epilog_text = """\
example:
./convert_labels_to_records.py [folder] convert without eval
./convert_labels_to_records.py [folder] --eval generate an eval split
"""
parser = argparse.ArgumentParser(
description=description_text,
epilog=epilog_text,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument("folder", type=str,
help="Folder containing training data. All converted data output here.")
parser.add_argument("-n", "--number", type=int, default=10,
help="Number of records")
parser.add_argument("-s", "--split", type=float, default=0.20,
help="Percentage of training data to use for eval. Ignored if"
" --eval_folder is specified.")
eval_options = parser.add_mutually_exclusive_group()
eval_options.add_argument("-e", "--eval", action="store_true", default=False,
help="Automatically generate eval split from train folder")
eval_options.add_argument("--eval_folder", type=str, default=None,
help="Folder containing eval data")
args = parser.parse_args()
# NamedTuple to store results from example writing workers
Result = collections.namedtuple("RecordWritingResult",
["id", "record_count", "class_count", "negative_count"])
def create_tf_example(labels, txt_full_path):
# Check to see whether the first line is the path or not.
with open(txt_full_path, "r") as f:
first = f.readline().strip()
if first.startswith("#") and os.path.isfile(first[1:]): # On disk
print("Found filename %s in the first line!" % first[1:])
image_full_path = first[1:]
else:
txt_name = os.path.basename(txt_full_path)
image_name = os.path.splitext(txt_name)[0] + ".png"
image_full_path = os.path.join(os.path.dirname(txt_full_path),
image_name)
im = Image.open(image_full_path)
arr = io.BytesIO()
im.save(arr, format='PNG')
height = im.height # Image height
width = im.width # Image width
channels = np.array(im).shape[2]
filename = image_full_path # Filename of the image.
encoded_image_data = arr.getvalue() # Encoded image bytes in png
image_fmt = 'png' # Compression for the image bytes (encoded_image_data)
rects, classes = bbox_writer.read_rects(txt_full_path)
# List of normalized coordinates, 1 per box, capped to [0, 1]
xmins = [max(min(rect[0] / width, 1), 0) for rect in rects] # left x
xmaxs = [max(min(rect[2] / width, 1), 0) for rect in rects] # right x
ymins = [max(min(rect[1] / height, 1), 0) for rect in rects] # top y
ymaxs = [max(min(rect[3] / height, 1), 0) for rect in rects] # bottom y
classes_txt = [cls.encode('utf-8') for cls in classes] # String names
class_ids = [labels[cls] for cls in classes]
tf_example = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(height),
'image/width': dataset_util.int64_feature(width),
'image/channels': dataset_util.int64_feature(channels),
'image/colorspace': dataset_util.bytes_feature("RGB".encode('utf-8')),
'image/filename': dataset_util.bytes_feature(filename.encode('utf-8')),
'image/source_id': dataset_util.bytes_feature(filename.encode('utf-8')),
'image/image_key': dataset_util.bytes_feature(filename.encode('utf-8')),
'image/encoded': dataset_util.bytes_feature(encoded_image_data),
'image/format': dataset_util.bytes_feature(image_fmt.encode('utf-8')),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
'image/object/class/text': dataset_util.bytes_list_feature(classes_txt),
'image/object/class/label': dataset_util.int64_list_feature(class_ids),
}))
is_negative = len(rects) == 0
return tf_example, collections.Counter(classes), is_negative
def write_record_from_list(id, labels, data, out_path):
examples_count = collections.Counter()
negative_count = 0
with tf.python_io.TFRecordWriter(out_path) as writer:
for filename in data:
print("[%s] Writing record for" % id, filename)
example, count, is_negative = create_tf_example(labels, filename)
writer.write(example.SerializeToString())
examples_count += count
negative_count += is_negative
return Result(id, len(data), examples_count, negative_count)
def get_shuffled_filenames(folder):
# Grab the names of all of the pieces of train data
train_txts = []
for root, dirs, files in os.walk(folder):
for name in files:
if not name.endswith(".txt"): continue
if name.endswith("rects.txt"): continue # ignore init files
txt_name = name
txt_full_path = os.path.join(root, txt_name)
train_txts.append(txt_full_path)
random.shuffle(train_txts) # So that we get a spread of eval data
return train_txts
def get_labels_from_filenames(filenames):
labels = set()
for filename in filenames:
_, classes = bbox_writer.read_rects(filename)
labels.update(set(classes))
# Should be stable now
labels = sorted(labels)
return {label: i for i, label in enumerate(labels)}
def get_train_eval_split_filenames(filenames, split):
eval_size = int(len(filenames) * split)
print("Generating eval split of %d elements" % eval_size)
eval_filenames = filenames[:eval_size]
train_filenames = filenames[eval_size:]
return train_filenames, eval_filenames
def write_labels(folder, labels):
# Write out the label map file to disk
label_name = os.path.join(folder, "label.pbtxt")
label_txt = []
for cls, i in labels.items():
label_txt.append("item {\n id: %d\n name:'%s'\n}\n" % (i + 1, cls))
with open(label_name, "w") as f:
f.write("\n".join(label_txt))
print("Wrote %d labels: %s" % (len(labels), labels))
def print_results(results):
example_counts = collections.Counter()
record_counts = 0
negative_counts = 0
for result in results:
print("[%s] has %d examples, %s split" %
(result.id, result.record_count, result.class_count))
record_counts += result.record_count
example_counts += result.class_count
negative_counts += result.negative_count
print("Overall records:", record_counts)
print("Overall examples:", example_counts)
print("Overall negatives:", negative_counts)
def get_record_writing_tasks():
"""Assign all record writing work into different lists.
This function parses flags to determine training and eval splits. Eval data
will either not be generated at all (neither --eval nor --eval_folder
specified), generated automatically from train data (--eval), or sourced
from a separate location (--eval_folder).
"""
train_filenames = get_shuffled_filenames(args.folder)
print("Got train filenames", len(train_filenames))
if args.eval_folder is not None:
print("Getting filenames from eval folder")
eval_filenames = get_shuffled_filenames(args.eval_folder)
elif args.eval:
print("Splitting train to get eval")
train_filenames, eval_filenames = get_train_eval_split_filenames(
train_filenames, args.split)
print("Got new train filenames", len(train_filenames))
else: # No eval at all
print("Not generating an eval set")
eval_filenames = []
print("Got eval filenames", len(eval_filenames))
labels = get_labels_from_filenames(train_filenames + eval_filenames)
print("Generated labels:", labels)
tasks = []
# Generate the training tasks by splitting up the train filenames
train_record_number = min(max(args.number, 1), len(train_filenames))
train_lists = [[] for i in range(train_record_number)]
for i, filename in enumerate(train_filenames):
train_lists[i % train_record_number].append(filename)
for i, train_list in enumerate(train_lists):
out_path = os.path.join(args.folder, "train-%02d.record" % i)
tasks.append(("train-%02d" % i, labels, train_list, out_path))
# Generate eval tasks by splitting up the eval filenames
eval_record_number = min(max(args.number, 1), len(eval_filenames))
eval_lists = [[] for i in range(eval_record_number)]
for i, filename in enumerate(eval_filenames):
eval_lists[i % eval_record_number].append(filename)
for i, eval_list in enumerate(eval_lists):
out_path = os.path.join(args.folder, "eval-%02d.record" % i)
tasks.append(("eval-%02d" % i, labels, eval_list, out_path))
return tasks, labels
if __name__ == "__main__":
t_start = time.time()
random.seed(3017+4781) # Make sure the shuffle order is the same
# Make the tasks for the workers to process
tasks, labels = get_record_writing_tasks()
print(len(tasks))
# Actually have the workers generate the records
pool = multiprocessing.pool.ThreadPool()
results = pool.starmap(write_record_from_list, tasks)
# Write out the labels at the end so the summary gets printed here
write_labels(args.folder, labels)
print_results(results)
t_end = time.time()
print("Took %5.2fs to write records" % (t_end - t_start))