-
Notifications
You must be signed in to change notification settings - Fork 0
/
restructure_dataset.py
70 lines (51 loc) · 2.76 KB
/
restructure_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
"""
#### This assumes that you have a dataset with the following structure
dataset
|--> train
<br>
|--> images
|--> labels
|--> valid
<br>
|--> images
|--> labels
|--> test
|--> images
|--> labels
data.yaml
"""
import os
import argparse
import utils.restructure_obj
import utils.misc
import shutil
from dataset import Dataset
# get command line arguments
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--input', type=str, help='path to old dataset - within the datasets folder', required=True)
parser.add_argument('--output', type=str, help='path to new dataset - within the datasets folder')
parser.add_argument('--detection_type', type=str, default='object', help='type of detection', choices=['object', 'segmentation', 'classification'])
parser.add_argument('--format', type=str, default='yolo', help='format of dataset', choices=['yolo', 'coco'])
parser.add_argument('--split_type', type=str, default='images', help='type of split', choices=['images', 'objects'])
parser.add_argument('--train_split', type=float, default=0.8, help='train ratio')
parser.add_argument('--valid_split', type=float, default=0.1, help='valid ratio')
parser.add_argument('--test_split', type=float, default=0.1, help='test ratio')
parser.add_argument('--resize_to', type=str, default='416x416', help='resize images in dataset - format: widthxheight')
parser.add_argument('--contrast', type=str, default='CLAHE', help='contrast type of images in dataset', choices=['CLAHE', 'AHE', ''])
parser.add_argument('--grayscale', type=bool, default=False, help='grayscale or not')
# parser.add_argument('--custom_preprocess', type=str, default="", help='path to file with custom preprocessing techniques')
parser.add_argument('--backgrounds', type=int, default=0, help='maximum ratio of background to total dataset size')
parser.add_argument('--b_delimiter', type=str, default='_background', help='delimiter for background images')
# comma delimited list of classes to exclude
parser.add_argument('--exclude', type=str, default="", help='comma delimited list of classes to exclude, no spaces')
# only for offline augmentation
parser.add_argument('--augment_by', type=bool, default=False, help='augment by (multiplier)')
parser.add_argument('--augment_options', type=str, default="", help='comma delimited list of augmentations to use, no spaces. Available: flip, rotate, blur, noise, brightness, contrast, sharpness, saturation, hue, mosaic')
parser.add_argument('--synth_aug', type=bool, default=False, help='synth augment or not')
args = parser.parse_args()
if args.output is None:
args.output = args.input + "_processed"
ds = Dataset(args)
ds.split_dataset_obj()
main()