-
Notifications
You must be signed in to change notification settings - Fork 12
/
datasets.py
135 lines (108 loc) · 4.61 KB
/
datasets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import os
import numpy as np
import torch
from torch.utils.data.dataset import Dataset
if __package__ is None or __package__ == '':
# uses current directory visibility
from utils import paint, plot_pie, plot_segment
else:
# uses current package visibility
from .utils import paint, plot_pie, plot_segment
__all__ = ["SensorDataset"]
class SensorDataset(Dataset):
"""
A dataset class for multi-channel time-series data captured by wearable sensors.
This class is slightly modified from the original implementation at:
https://github.com/AdelaideAuto-IDLab/Attend-And-Discriminate
"""
def __init__(
self,
dataset,
window,
stride,
stride_test,
path_processed,
prefix,
verbose=False,
):
"""
Initialize instance.
:param dataset: str. Name of target dataset.
:param window: int. Sliding window size in samples.
:param stride: int. Step size of the sliding window for training and validation data.
:param stride_test: int. Step size of the sliding window for testing data.
:param path_processed: str. Path to directory containing processed training, validation and test data.
:param prefix: str. Prefix for the filename of the processed data. Options 'train', 'val', or 'test'.
:param verbose: bool. Whether to print detailed information about the dataset when initializing.
"""
self.dataset = dataset
self.window = window
self.stride = stride
self.prefix = prefix
self.path_processed = path_processed
self.verbose = verbose
self.path_dataset = os.path.join(path_processed, f"{prefix}_data.npz")
if prefix == "test" and stride_test == 1:
self.path_dataset = os.path.join(path_processed, "test_sample_wise.npz")
else:
self.path_dataset = os.path.join(path_processed, f"{prefix}_data.npz".format(prefix))
dataset = np.load(self.path_dataset)
self.data = dataset["data"]
self.target = dataset["target"]
self.len = self.data.shape[0]
assert self.data.shape[0] == self.target.shape[0]
print(
paint(
f"Creating {self.dataset} {self.prefix} HAR dataset of size {self.len} ..."
)
)
if self.verbose:
self.get_info()
self.get_distribution()
if prefix == "train":
self.weight_samples = self.get_weights()
self.n_channels = self.data.shape[-1]
self.n_classes = self.target.shape[0]
def __len__(self):
return self.len
def __getitem__(self, index):
data = torch.FloatTensor(self.data[index])
target = torch.LongTensor([int(self.target[index])])
idx = torch.from_numpy(np.array(index))
return data, target, idx
def get_info(self, n_samples=3):
print(paint(f"[-] Information on {self.prefix} dataset:"))
print("\t data: ", self.data.shape, self.data.dtype, type(self.data))
print("\t target: ", self.target.shape, self.target.dtype, type(self.target))
target_idx = [np.where(self.target == label)[0] for label in set(self.target)]
target_idx_samples = np.array(
[np.random.choice(idx, n_samples, replace=False) for idx in target_idx]
).flatten()
for i, random_idx in enumerate(target_idx_samples):
data, target, index = self.__getitem__(random_idx)
if i == 0:
print(paint(f"[-] Information on segment #{random_idx}/{self.len}:"))
print("\t data: ", data.shape, data.dtype, type(data))
print("\t target: ", target.shape, target.dtype, type(target))
print("\t index: ", index, index.shape, index.dtype, type(index))
path_save = os.path.join(self.path_processed, "segments")
plot_segment(
data,
target,
index=index,
prefix=self.prefix,
path_save=path_save,
num_class=len(target_idx),
)
def get_distribution(self):
plot_pie(
self.target, self.prefix, os.path.join(self.path_processed, "distribution")
)
def get_weights(self):
target = self.target
target_count = np.array([np.sum(target == label) for label in set(target)])
weight_target = 1.0 / target_count
weight_samples = np.array([weight_target[t] for t in target])
weight_samples = torch.from_numpy(weight_samples)
weight_samples = weight_samples.double()
return weight_samples