-
Notifications
You must be signed in to change notification settings - Fork 0
/
Dataset.h
177 lines (136 loc) · 4.44 KB
/
Dataset.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
#ifndef HPC2022_DATASET_H
#define HPC2022_DATASET_H
#include "utility"
#include "vector"
#include "iostream"
#include "set"
/**
* Struct for Dataset
*/
typedef struct Dataset {
double *predictor_matrix;
int *class_vector;
unsigned int predictors_column_number;
unsigned int rows_number;
int *unique_classes;
unsigned int number_of_unique_classes;
} Dataset;
/**
* Accessing the dataset
*/
unsigned int index(unsigned int row, unsigned int column, unsigned int column_width) {
return row * column_width + column;
}
double get_x_element(const Dataset &df, unsigned int row, unsigned int column) {
return *(df.predictor_matrix + index(row, column, df.predictors_column_number));
}
/**
* Utility functions (for matrix)
*/
void get_row(const double *x, /*in*/
unsigned int row_index, /*in*/
unsigned int column_width, /*in*/
double *output_buffer) {
for (int j = 0; j < column_width; j++) {
*(output_buffer + j) = *(x + index(row_index, j, column_width));
}
}
void get_column(const double *x, /*in*/
unsigned int column_index, /*in*/
unsigned int column_width, /*in*/
unsigned int row_width,
double *output_buffer /*out*/) {
for (int i = 0; i < row_width; i++) {
*(output_buffer + i) = *(x + index(i, column_index, column_width));
}
}
/**
* Utility functions (for Dataset)
*/
void get_row(const Dataset &df, /*in*/
unsigned int row_index, /*in*/
bool include_y, /*in*/
double *output_buffer /*out*/) {
unsigned int i = 0;
for (; i < df.predictors_column_number; i++) {
*(output_buffer + i) = get_x_element(df, row_index, i);
}
if (include_y) {
*(output_buffer + i) = (double) (*(df.class_vector + row_index)); // put y as the last
++i;
}
}
void get_column(const Dataset &df, /*in*/
unsigned int column_index, /*in*/
double *output_buffer /*out*/) {
int i = 0;
for (; i < df.rows_number; i++) {
*(output_buffer + i) = get_x_element(df, i, column_index);
}
}
void get_unique_classes(int *classes_vector, /*in*/
unsigned int length, /*in*/
int *output_buffer) {
unsigned int i = 0;
std::set<int> s;
for (; i < length; i++) {
s.insert(*(classes_vector + i)); // create set
}
std::set<int>::iterator it;
i = 0;
for (it = s.begin(); it != s.end(); ++it) {
int ans = *it;
*(output_buffer + i) = ans;
++i;
}
}
int get_number_of_unique_classes(int *classes_vector, /*in*/
unsigned int length /*in*/) {
unsigned int i = 0;
std::set<int> s;
for (; i < length; i++) {
s.insert(*(classes_vector + i)); // create set
}
return (int) s.size();
}
void modify_matrix_value(double *x, double value, unsigned int row, unsigned int column, unsigned int column_width) {
*(x + index(row, column, column_width)) = value;
}
/**
* Presentation functions
*/
template<typename T>
void print_matrix(T *x, unsigned int rows, unsigned int columns, bool metadata = false) {
if (metadata) {
std::cout << "The matrix has " << rows << " rows and " << columns << " columns" << std::endl;
}
for (unsigned int i = 0; i < rows; i++) {
for (int j = 0; j < columns; j++) {
std::cout << x[index(i, j, columns)] << ",\t";
}
std::cout << std::endl;
}
}
template<typename T>
void print_vector(T *x, unsigned int rows, bool metadata = false) {
if (metadata) {
std::cout << "The vector has " << rows << std::endl;
}
for (unsigned int i = 0; i < rows; i++) {
std::cout << x[i] << "\t";
}
std::cout << std::endl;
}
void print_dataset(const Dataset &df, bool matrix = true) {
std::cout << "The dataset has " << df.rows_number << " rows and " << df.predictors_column_number + 1 << " columns"
<< std::endl;
if (matrix) {
std::cout << "Predictor matrix:" << std::endl;
print_matrix(df.predictor_matrix, df.rows_number, df.predictors_column_number);
}
std::cout << "Class vector:" << std::endl;
print_vector(df.class_vector, df.rows_number);
std::cout << "Unique classes: (" << df.number_of_unique_classes << ")" << std::endl;
print_vector(df.unique_classes, df.number_of_unique_classes);
}
#endif //HPC2022_DATASET_H