From 88874b7708dcf305ff41c20519370054f51015c0 Mon Sep 17 00:00:00 2001 From: Michal Povinsky Date: Thu, 27 Apr 2017 09:39:13 +0200 Subject: [PATCH 1/3] Bring TemporalAdapter back This reverts commit cd8d0bc0d2cea1529efe51ccf2db33d9152fff0f. --- LanguageModel.lua | 38 ++++---------------------------------- 1 file changed, 4 insertions(+), 34 deletions(-) diff --git a/LanguageModel.lua b/LanguageModel.lua index d6248184..5688711e 100644 --- a/LanguageModel.lua +++ b/LanguageModel.lua @@ -1,6 +1,7 @@ require 'torch' require 'nn' +require 'TemporalAdapter' require 'VanillaRNN' require 'LSTM' @@ -28,10 +29,8 @@ function LM:__init(kwargs) local V, D, H = self.vocab_size, self.wordvec_dim, self.rnn_size - self.net = nn.Sequential() self.rnns = {} - self.bn_view_in = {} - self.bn_view_out = {} + self.net = nn.Sequential() self.net:add(nn.LookupTable(V, D)) for i = 1, self.num_layers do @@ -47,47 +46,18 @@ function LM:__init(kwargs) table.insert(self.rnns, rnn) self.net:add(rnn) if self.batchnorm == 1 then - local view_in = nn.View(1, 1, -1):setNumInputDims(3) - table.insert(self.bn_view_in, view_in) - self.net:add(view_in) - self.net:add(nn.BatchNormalization(H)) - local view_out = nn.View(1, -1):setNumInputDims(2) - table.insert(self.bn_view_out, view_out) - self.net:add(view_out) + self.net:add(nn.TemporalAdapter(nn.BatchNormalization(H))) end if self.dropout > 0 then self.net:add(nn.Dropout(self.dropout)) end end - -- After all the RNNs run, we will have a tensor of shape (N, T, H); - -- we want to apply a 1D temporal convolution to predict scores for each - -- vocab element, giving a tensor of shape (N, T, V). Unfortunately - -- nn.TemporalConvolution is SUPER slow, so instead we will use a pair of - -- views (N, T, H) -> (NT, H) and (NT, V) -> (N, T, V) with a nn.Linear in - -- between. Unfortunately N and T can change on every minibatch, so we need - -- to set them in the forward pass. - self.view1 = nn.View(1, 1, -1):setNumInputDims(3) - self.view2 = nn.View(1, -1):setNumInputDims(2) - - self.net:add(self.view1) - self.net:add(nn.Linear(H, V)) - self.net:add(self.view2) + self.net:add(nn.TemporalAdapter(nn.Linear(H, V))) end function LM:updateOutput(input) - local N, T = input:size(1), input:size(2) - self.view1:resetSize(N * T, -1) - self.view2:resetSize(N, T, -1) - - for _, view_in in ipairs(self.bn_view_in) do - view_in:resetSize(N * T, -1) - end - for _, view_out in ipairs(self.bn_view_out) do - view_out:resetSize(N, T, -1) - end - return self.net:forward(input) end From c3c20bb49f0f57623b612d7cda1f74f19b2dfbfe Mon Sep 17 00:00:00 2001 From: Michal Povinsky Date: Tue, 18 Oct 2016 09:59:11 +0200 Subject: [PATCH 2/3] Implement missing methods in TemporalAdapter --- TemporalAdapter.lua | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/TemporalAdapter.lua b/TemporalAdapter.lua index c64f4d9d..53ec1f68 100644 --- a/TemporalAdapter.lua +++ b/TemporalAdapter.lua @@ -42,3 +42,44 @@ function layer:updateGradInput(input, gradOutput) return self.gradInput end +function layer:training() + self.net:training() + parent.training(self) +end + +function layer:evaluate() + self.net:evaluate() + parent.evaluate(self) +end + +function layer:parameters() + return self.net:parameters() +end + +function layer:accGradParameters(input, gradOutput, scale) + return self.net:accGradParameters(input, gradOutput, scale) +end + +function layer:backward(input, gradOutput, scale) + return self.net:backward(input, gradOutput, scale) +end + +function layer:zeroGradParameters() + return self.net:zeroGradParameters() +end + +function layer:updateParameters(learningRate) + return self.net:updateParameters(learningRate) +end + +function layer:accUpdateGradParameters(input, gradOutput, learningRate) + return self.net:accUpdateGradParameters(input, gradOutput, learningRate) +end + +function layer:clearState() + self.net:clearState() +end + +function layer:__tostring__() + return 'nn.TemporalAdapter: ' .. tostring(self.net.modules[2]) +end From c5f38b390033a4711944416ee445de2c87722a00 Mon Sep 17 00:00:00 2001 From: Michal Povinsky Date: Tue, 18 Oct 2016 10:46:11 +0200 Subject: [PATCH 3/3] Implemented support of pre-TemporalAdapter models --- LanguageModel.lua | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/LanguageModel.lua b/LanguageModel.lua index 5688711e..c6bc3268 100644 --- a/LanguageModel.lua +++ b/LanguageModel.lua @@ -114,6 +114,31 @@ function LM:decode_string(encoded) return s end +function LM:read(file, version) + parent.read(self, file) + if self.view1 then + self:update_for_temporaladapter() + end +end + +function LM:update_for_temporaladapter() + local between_views, mods = false, self.net.modules + local i,v + self.net = nn.Sequential():float() + for i,v in ipairs(mods) do + if torch.type(mods[i]) == 'nn.View' then + between_views = not between_views + elseif between_views then + self.net:add(nn.TemporalAdapter(mods[i]):float()) + else + self.net:add(mods[i]) + end + end + self.view1 = nil + self.view2 = nil + self.bn_view_in = nil + self.bn_view_out = nil +end --[[ Sample from the language model. Note that this will reset the states of the