Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bring back and fix TemporalAdapter #185

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 29 additions & 34 deletions LanguageModel.lua
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
require 'torch'
require 'nn'

require 'TemporalAdapter'
require 'VanillaRNN'
require 'LSTM'

Expand Down Expand Up @@ -28,10 +29,8 @@ function LM:__init(kwargs)

local V, D, H = self.vocab_size, self.wordvec_dim, self.rnn_size

self.net = nn.Sequential()
self.rnns = {}
self.bn_view_in = {}
self.bn_view_out = {}
self.net = nn.Sequential()

self.net:add(nn.LookupTable(V, D))
for i = 1, self.num_layers do
Expand All @@ -47,47 +46,18 @@ function LM:__init(kwargs)
table.insert(self.rnns, rnn)
self.net:add(rnn)
if self.batchnorm == 1 then
local view_in = nn.View(1, 1, -1):setNumInputDims(3)
table.insert(self.bn_view_in, view_in)
self.net:add(view_in)
self.net:add(nn.BatchNormalization(H))
local view_out = nn.View(1, -1):setNumInputDims(2)
table.insert(self.bn_view_out, view_out)
self.net:add(view_out)
self.net:add(nn.TemporalAdapter(nn.BatchNormalization(H)))
end
if self.dropout > 0 then
self.net:add(nn.Dropout(self.dropout))
end
end

-- After all the RNNs run, we will have a tensor of shape (N, T, H);
-- we want to apply a 1D temporal convolution to predict scores for each
-- vocab element, giving a tensor of shape (N, T, V). Unfortunately
-- nn.TemporalConvolution is SUPER slow, so instead we will use a pair of
-- views (N, T, H) -> (NT, H) and (NT, V) -> (N, T, V) with a nn.Linear in
-- between. Unfortunately N and T can change on every minibatch, so we need
-- to set them in the forward pass.
self.view1 = nn.View(1, 1, -1):setNumInputDims(3)
self.view2 = nn.View(1, -1):setNumInputDims(2)

self.net:add(self.view1)
self.net:add(nn.Linear(H, V))
self.net:add(self.view2)
self.net:add(nn.TemporalAdapter(nn.Linear(H, V)))
end


function LM:updateOutput(input)
local N, T = input:size(1), input:size(2)
self.view1:resetSize(N * T, -1)
self.view2:resetSize(N, T, -1)

for _, view_in in ipairs(self.bn_view_in) do
view_in:resetSize(N * T, -1)
end
for _, view_out in ipairs(self.bn_view_out) do
view_out:resetSize(N, T, -1)
end

return self.net:forward(input)
end

Expand Down Expand Up @@ -144,6 +114,31 @@ function LM:decode_string(encoded)
return s
end

function LM:read(file, version)
parent.read(self, file)
if self.view1 then
self:update_for_temporaladapter()
end
end

function LM:update_for_temporaladapter()
local between_views, mods = false, self.net.modules
local i,v
self.net = nn.Sequential():float()
for i,v in ipairs(mods) do
if torch.type(mods[i]) == 'nn.View' then
between_views = not between_views
elseif between_views then
self.net:add(nn.TemporalAdapter(mods[i]):float())
else
self.net:add(mods[i])
end
end
self.view1 = nil
self.view2 = nil
self.bn_view_in = nil
self.bn_view_out = nil
end

--[[
Sample from the language model. Note that this will reset the states of the
Expand Down
41 changes: 41 additions & 0 deletions TemporalAdapter.lua
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,44 @@ function layer:updateGradInput(input, gradOutput)
return self.gradInput
end

function layer:training()
self.net:training()
parent.training(self)
end

function layer:evaluate()
self.net:evaluate()
parent.evaluate(self)
end

function layer:parameters()
return self.net:parameters()
end

function layer:accGradParameters(input, gradOutput, scale)
return self.net:accGradParameters(input, gradOutput, scale)
end

function layer:backward(input, gradOutput, scale)
return self.net:backward(input, gradOutput, scale)
end

function layer:zeroGradParameters()
return self.net:zeroGradParameters()
end

function layer:updateParameters(learningRate)
return self.net:updateParameters(learningRate)
end

function layer:accUpdateGradParameters(input, gradOutput, learningRate)
return self.net:accUpdateGradParameters(input, gradOutput, learningRate)
end

function layer:clearState()
self.net:clearState()
end

function layer:__tostring__()
return 'nn.TemporalAdapter: ' .. tostring(self.net.modules[2])
end