-
Notifications
You must be signed in to change notification settings - Fork 14
/
data_preprocessing.py
88 lines (66 loc) · 1.86 KB
/
data_preprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import numpy as np
from validation import check_X
def normalization(X):
"""normalize a samples matrix (n,m) .. math:: \|X_i\|^2 = 1 \forall i \in [1..n]
Parameters
----------
X : (n,m) ndarray,
where *n* is the number of samples and *m* is the number of features.
Returns
-------
Xn : (n,m) ndarray,
the normalized version of *X*.
"""
#return np.array([x/np.linalg.norm(x) for x in X])
check_X(X)
return (X.T / np.linalg.norm(X,axis=1) ).T
def rescale(X):
"""edit a samples matrix by rescaling the features in [-1,1]
Parameters
----------
X : (n,m) ndarray,
where *n* is the number of samples and *m* is the number of features.
Returns
-------
Xr : (n,m) ndarray,
the rescaled version of *X* in [-1,1].
"""
X = rescale_01(X)
return (X * 2) - 1
def rescale_01(X):
"""edit a samples matrix by rescaling the features in [0,1]
Parameters
----------
X : (n,m) ndarray,
where *n* is the number of samples and *m* is the number of features.
Returns
-------
Xr : (n,m) ndarray,
the rescaled version of *X* in [0,1].
"""
#d = X.shape[1]
#for i in range(d):
# mi_v = min(X[:,i])
# ma_v = max(X[:,i])
# if mi_v!=ma_v:
# X[:,i] = (X[:,i] - mi_v)/(ma_v-mi_v)
#return X
mi, ma = np.min(X,axis=0), np.max(X,axis=0)
d = ma-mi
np.putmask(d, d == 0, 1)
return (X - mi) / d
def centering(X):
"""move the data at the center of axis
Parameters
----------
X : (n,m) ndarray,
where *n* is the number of samples and *m* is the number of features.
Returns
-------
Xc : (n,m) ndarray,
the centered version of *X*.
"""
n = X.shape[0]
uno = np.ones((n,1))
Xm = 1.0/n * np.dot(uno.T,X)
return X - np.dot(uno,Xm)