Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add MinMaxScaling as a preprocessor #1537

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 63 additions & 9 deletions pylearn2/datasets/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,10 +215,10 @@ def apply(self, dataset, can_fit=False):
X = dataset.get_topological_view()
num_topological_dimensions = len(X.shape) - 2
if num_topological_dimensions != len(self.patch_shape):
raise ValueError("ExtractGridPatches with "
+ str(len(self.patch_shape))
+ " topological dimensions called on"
+ " dataset with " +
raise ValueError("ExtractGridPatches with " +
str(len(self.patch_shape)) +
" topological dimensions called on" +
" dataset with " +
str(num_topological_dimensions) + ".")
num_patches = X.shape[0]
max_strides = [X.shape[0] - 1]
Expand Down Expand Up @@ -414,11 +414,11 @@ def apply(self, dataset, can_fit=False):
num_topological_dimensions = len(X.shape) - 2

if num_topological_dimensions != len(self.patch_shape):
raise ValueError("ExtractPatches with "
+ str(len(self.patch_shape))
+ " topological dimensions called on "
+ "dataset with "
+ str(num_topological_dimensions) + ".")
raise ValueError("ExtractPatches with " +
str(len(self.patch_shape)) +
" topological dimensions called on " +
"dataset with " +
str(num_topological_dimensions) + ".")

# batch size
output_shape = [self.num_patches]
Expand Down Expand Up @@ -1913,3 +1913,57 @@ def apply(self, dataset, can_fit=False):
dataset.X = X[start:stop, :]
if y is not None:
dataset.y = y[start:stop, :]


class MinMaxScaling(ExamplewisePreprocessor):
"""
Subtracts the min and divides by the \|max - min\|.

Parameters
----------
global_mean : bool, optional
If `True`, subtract the (scalar) min over every element
in the design matrix. If `False`, subtract the min from
each column (feature) separately. Default is `False`.
global_std : bool, optional
If `True`, after centering, divide by the (scalar)
\|max - min\| of every element in the design matrix. If `False`,
divide by the column-wise (per-feature) \|max - min\|.
Default is `False`.
mm_eps : float, optional
Stabilization factor added to the \|max - min\| before
dividing, to prevent \|max - min\| very close to zero
from causing the feature values to blow up too much.
Default is `1e-4`.
"""

def __init__(self, global_min=False, global_max=False, mm_eps=1e-4):
self._global_min = global_min
self._global_max = global_max
self._mm_eps = mm_eps
self._min = None
self._max = None

def apply(self, dataset, can_fit=False):
"""
:math:`\hat{x} = \frac{x - min(x)}{\mid max(x) - min(x) \mid}`
"""
X = dataset.get_design_matrix()
if can_fit:
self._min = X.min() if self._global_min else X.min(axis=0)
self._max = X.max() if self._global_max else X.max(axis=0)
else:
if self._min is None or self._max is None:
raise ValueError("can_fit is False, but Normalization object "
"has no stored min or max")
new = (X - self._min) / (self._mm_eps +
numpy.abs(self._max - self._min))
dataset.set_design_matrix(new)

def as_block(self):
if self._min is None or self._max is None:
raise ValueError("can't convert %s to block without fitting"
% self.__class__.__name__)
return ExamplewiseAddScaleTransform(
add=-self._min,
multiply=numpy.abs(self._max - self._min) ** -1)