Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cost_per_example implementation for MPDBM #1510

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions pylearn2/costs/cost.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,37 @@ def expr(self, model, data, ** kwargs):

return sum_of_costs

def cost_per_example(self, model, data, ** kwargs):
"""
Returns the sum of the per-example constituent costs.

Parameters
----------
model : pylearn2.models.model.Model
the model for which we want to calculate the sum of
per-example costs
data : flat tuple of tensor_like variables.
data has to follow the format defined by self.get_data_specs(),
but this format will always be a flat tuple.
"""
self.get_data_specs(model)[0].validate(data)
composite_specs, mapping = self.get_composite_specs_and_mapping(model)
nested_data = mapping.nest(data)
costs = []
for cost, cost_data in safe_zip(self.costs, nested_data):
costs.append(cost.cost_per_example(model, cost_data, **kwargs))
assert len(costs) > 0

if any([cost is None for cost in costs]):
sum_of_costs = None
else:
costs = [coeff * cost
for coeff, cost in safe_zip(self.coeffs, costs)]
assert len(costs) > 0
sum_of_costs = reduce(lambda x, y: x + y, costs)

return sum_of_costs

def get_composite_data_specs(self, model):
"""
Build and return a composite data_specs of all costs.
Expand Down
244 changes: 243 additions & 1 deletion pylearn2/costs/dbm.py
Original file line number Diff line number Diff line change
Expand Up @@ -1467,6 +1467,99 @@ def expr(self, model, data, drop_mask=None, drop_mask_Y=None,

return total_cost

@wraps(Cost.cost_per_example)
def cost_per_example(self, model, data, drop_mask=None, drop_mask_Y=None,
return_locals=False, include_toronto=True, ** kwargs):
if self.supervised:
X, Y = data
else:
X = data
Y = None

if not self.supervised:
assert drop_mask_Y is None
# ignore Y if some other cost is supervised and has made it get
# passed in (can this still happen after the (space, source)
# interface change?)
Y = None
if self.supervised:
assert Y is not None
if drop_mask is not None:
assert drop_mask_Y is not None

if not hasattr(model, 'cost'):
model.cost = self
if not hasattr(model, 'mask_gen'):
model.mask_gen = self.mask_gen

dbm = model

X_space = model.get_input_space()

if drop_mask is None:
if self.supervised:
drop_mask, drop_mask_Y = self.mask_gen(X, Y, X_space=X_space)
else:
drop_mask = self.mask_gen(X, X_space=X_space)

if drop_mask_Y is not None:
assert drop_mask_Y.ndim == 1

if drop_mask.ndim < X.ndim:
if self.mask_gen is not None:
assert self.mask_gen.sync_channels
if X.ndim != 4:
raise NotImplementedError()
drop_mask = drop_mask.dimshuffle(0, 1, 2, 'x')

if not hasattr(self, 'noise'):
self.noise = False

history = dbm.do_inpainting(X, Y=Y, drop_mask=drop_mask,
drop_mask_Y=drop_mask_Y,
return_history=True,
noise=self.noise,
niter=self.niter,
block_grad=self.block_grad)
final_state = history[-1]

new_drop_mask = None
new_drop_mask_Y = None
new_history = [None for state in history]

if not hasattr(self, 'both_directions'):
self.both_directions = False
if self.both_directions:
raise NotImplementedError("both_directions was a research "
"feature not supported going forward.")

new_final_state = new_history[-1]

cfs = self.per_example_cost_from_states
out = cfs(final_state, new_final_state, dbm, X, Y, drop_mask,
drop_mask_Y, new_drop_mask, new_drop_mask_Y,
return_locals=True)
total_per_example_cost, sublocals = out
assert total_per_example_cost.ndim == 1

if not hasattr(self, 'robustness'):
self.robustness = None
if self.robustness is not None:
raise NotImplementedError("robustness was a research "
"feature not supported going forward.")

if not hasattr(self, 'toronto_act_targets'):
self.toronto_act_targets = None
toronto_act_cost = None
if self.toronto_act_targets is not None and include_toronto:
raise NotImplementedError("Toronto sparsity was a research "
"feature not supported going forward.")

if return_locals:
return locals()

return total_per_example_cost

def get_fixed_var_descr(self, model, data):
"""
Returns the FixedVarDescr object responsible for making sure the
Expand Down Expand Up @@ -1603,7 +1696,7 @@ def get_gradients(self, model, X, Y=None, **kwargs):
def get_inpaint_cost(self, dbm, X, V_hat_unmasked, drop_mask, state,
Y, drop_mask_Y):
"""
Returns the generalized pseudolikelihood giving raw data, a mask,
Returns the generalized pseudolikelihood given raw data, a mask,
and the output of inference.

Parameters
Expand Down Expand Up @@ -1632,6 +1725,41 @@ def get_inpaint_cost(self, dbm, X, V_hat_unmasked, drop_mask, state,

return rval

def get_inpaint_cost_per_example(
self, dbm, X, V_hat_unmasked, drop_mask, state,
Y, drop_mask_Y):
"""
Returns the generalized pseudolikelihood per example given raw
data, a mask, and the output of inference.

Parameters
----------
dbm : DBM
X : a batch of inputs
V_hat_unmasked : A batch of reconstructions of X
drop_mask : A batch of mask values
state : Hidden states of the DBM
Y : a batch of labels
drop_mask_Y : A batch of Y mask values
"""
rcpe = dbm.visible_layer.recons_cost_per_example
rval = rcpe(X, V_hat_unmasked, drop_mask, use_sum=self.use_sum)
assert rval.ndim == 1

if self.supervised:
# pyflakes is too dumb to see that both branches define `scale`
scale = None
if self.use_sum:
scale = 1.
else:
scale = 1. / float(dbm.get_input_space().get_total_dimension())
Y_hat_unmasked = state['Y_hat_unmasked']
rc = dbm.hidden_layers[-1].recons_cost_per_example
rval = rval + rc(Y, Y_hat_unmasked, drop_mask_Y, scale)
assert rval.ndim == 1

return rval

def cost_from_states(self, state, new_state, dbm, X, Y, drop_mask,
drop_mask_Y, new_drop_mask, new_drop_mask_Y,
return_locals=False):
Expand Down Expand Up @@ -1820,6 +1948,120 @@ def cost_from_states(self, state, new_state, dbm, X, Y, drop_mask,

return total_cost

def per_example_cost_from_states(self, state, new_state, dbm, X, Y,
drop_mask, drop_mask_Y, new_drop_mask,
new_drop_mask_Y, return_locals=False):
"""
Returns the total cost per example, given the states produced by
inference.
This includes activity regularization costs, not just generalized
pseudolikelihood costs.

Parameters
----------
state : The state of the model after inference.
new_state : OrderedDict
The state of the model after inference with a different mask.
dbm : DBM.
X : A batch of input pixels.
Y : A batch of output labels.
drop_mask : A batch of mask values determining which pixels are inputs.
drop_mask_Y : Theano matrix
A batch of mask values determining which labels are inputs.
new_drop_mask : The second mask.
new_drop_mask_Y : The second label mask.
return_locals : bool
If True, return all local variables

Returns
-------
cost_per_example : 1D Theano tensor containing cost per example
locals : Optional
If return_locals is True, returns the dictionary of all local
variables. Note that this means all implementation changes are
now API changes.
"""

if hasattr(self, 'both_directions') and self.both_directions:
raise NotImplementedError("both_directions was a research"
"feature that is no longer supported.")

if not self.supervised:
assert drop_mask_Y is None
assert new_drop_mask_Y is None
if self.supervised:
assert drop_mask_Y is not None
assert Y is not None

V_hat_unmasked = state['V_hat_unmasked']
assert V_hat_unmasked.ndim == X.ndim

if not hasattr(self, 'use_sum'):
self.use_sum = False

gicpe = self.get_inpaint_cost_per_example
inpaint_cost_per_example = gicpe(dbm, X, V_hat_unmasked, drop_mask,
state, Y, drop_mask_Y)
assert inpaint_cost_per_example.ndim == 1

total_cost_per_example = inpaint_cost_per_example

if hasattr(self, 'range_rewards') and self.range_rewards is not None:
raise NotImplementedError("range_rewards was a research feature "
"not supported going forward.")

if hasattr(self, 'stdev_rewards') and self.stdev_rewards is not None:
raise NotImplementedError("stdev_rewards was a research feature "
"not supported going forward.")

l1_act_cost_per_example = None
if self.l1_act_targets is not None:
l1_act_cost_per_example = T.zeros_like(total_cost_per_example)
if self.l1_act_eps is None:
self.l1_act_eps = [None] * len(self.l1_act_targets)
for layer, mf_state, targets, coeffs, eps in \
safe_izip(dbm.hidden_layers, state['H_hat'],
self.l1_act_targets, self.l1_act_coeffs,
self.l1_act_eps):

assert not isinstance(targets, str)

try:
gl1acpe = layer.get_l1_act_cost_per_example
layer_cost_per_example = gl1acpe(mf_state, targets,
coeffs, eps)
assert layer_cost_per_example.ndim == 1
except NotImplementedError:
if coeffs == 0.:
layer_cost_per_example = 0.
else:
raise
if layer_cost_per_example != 0.:
l1_act_cost_per_example += layer_cost_per_example
# end for substates
# end for layers
assert l1_act_cost_per_example.ndim == 1
total_cost_per_example += l1_act_cost_per_example
# end if act penalty

if not hasattr(self, 'hid_presynaptic_cost'):
self.hid_presynaptic_cost = None
if self.hid_presynaptic_cost is not None:
raise NotImplementedError("this was a research feature "
"not supported going forward.")

if not hasattr(self, 'reweighted_act_targets'):
self.reweighted_act_targets = None
if self.reweighted_act_targets is not None:
raise NotImplementedError("this was a research feature "
"not supported going forward.")

assert total_cost_per_example.ndim == 1
if return_locals:
return total_cost_per_example, locals()

return total_cost_per_example

default_seed = 20120712


Expand Down
Loading