Skip to content

Commit

Permalink
Store all ids of an examples in a single tensor.
Browse files Browse the repository at this point in the history
Speedup!
  • Loading branch information
macournoyer committed Nov 4, 2015
1 parent afc16d4 commit ffedc96
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 29 deletions.
13 changes: 11 additions & 2 deletions dataset.lua
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,8 @@ end
function DataSet:removeLowFreqWords(input)
local unknown = self:makeWordId("<unknown>")

for i, id in ipairs(input) do
for i = 1, input:size(1) do
local id = input[i][1]
local word = self.id2word[id]

if word == nil then
Expand All @@ -107,6 +108,14 @@ function DataSet:removeLowFreqWords(input)
end
end

local function table2tensor(t)
local tensor = torch.IntTensor(#t, 1)
for i,v in ipairs(t) do
tensor[i] = v
end
return tensor
end

function DataSet:visitConversation(lines, start)
start = start or 1

Expand All @@ -119,7 +128,7 @@ function DataSet:visitConversation(lines, start)
local targetIds = self:visitText(target.text)

if inputIds and targetIds then
table.insert(self.examples, { inputIds, targetIds })
table.insert(self.examples, { table2tensor(inputIds), table2tensor(targetIds) })
end
end
end
Expand Down
21 changes: 12 additions & 9 deletions eval.lua
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@ require 'e'
local tokenizer = require "tokenizer"

if dataset == nil then
dataset = e.DataSet("data/cornell_movie_dialogs.t7",
e.CornellMovieDialogs("data/cornell_movie_dialogs"))
-- dataset = e.DataSet("data/cornell_movie_dialogs.t7",
-- e.CornellMovieDialogs("data/cornell_movie_dialogs"))
dataset = e.DataSet("data/cornell_movie_dialogs_tiny.t7",
e.CornellMovieDialogs("data/cornell_movie_dialogs"), 1000)
end

EOS = dataset.word2id["</s>"]
EOS = torch.IntTensor{dataset.word2id["</s>"]}

if model == nil then
print("-- Loading model")
Expand All @@ -26,22 +28,23 @@ function say(text)
local inputs = {}
for t, word in tokenizer.tokenize(text) do
local t = dataset.word2id[word:lower()]
table.insert(inputs, t)
table.insert(inputs, torch.IntTensor{t})
end

model:forget()

for i = #inputs, 1, -1 do
local input = inputs[i]
model:forward(torch.Tensor{input})
model:forward(input)
end

local input = EOS
repeat
local output = model:forward(torch.Tensor{input})
io.write(dataset.id2word[output2wordId(output)] .. " ")
input = output2wordId(output)
until input == EOS
local output = model:forward(input)
local outputWordId = output2wordId(output)
io.write(dataset.id2word[outputWordId] .. " ")
input = torch.IntTensor{outputWordId}
until input[1] == EOS[1]

print("")
end
41 changes: 23 additions & 18 deletions train.lua
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@ require 'e'
require 'xlua'

-- Data
dataset = e.DataSet("data/cornell_movie_dialogs.t7",
e.CornellMovieDialogs("data/cornell_movie_dialogs"))
-- dataset = e.DataSet("data/cornell_movie_dialogs_tiny.t7",
-- e.CornellMovieDialogs("data/cornell_movie_dialogs"), 1000)
-- dataset = e.DataSet("data/cornell_movie_dialogs.t7",
-- e.CornellMovieDialogs("data/cornell_movie_dialogs"))
dataset = e.DataSet("data/cornell_movie_dialogs_tiny.t7",
e.CornellMovieDialogs("data/cornell_movie_dialogs"), 1000)

EOS = dataset.word2id["</s>"]
EOS = torch.IntTensor{dataset.word2id["</s>"]}


-- Model
Expand All @@ -20,8 +20,8 @@ model:add(nn.LookupTable(dataset.wordsCount, inputSize))
model:add(nn.SplitTable(1,2))
model:add(nn.Sequencer(nn.FastLSTM(inputSize, hiddenSize)))
model:add(nn.Sequencer(nn.Dropout(dropout)))
model:add(nn.Sequencer(nn.FastLSTM(hiddenSize, hiddenSize)))
model:add(nn.Sequencer(nn.Dropout(dropout)))
-- model:add(nn.Sequencer(nn.FastLSTM(hiddenSize, hiddenSize)))
-- model:add(nn.Sequencer(nn.Dropout(dropout)))
model:add(nn.Sequencer(nn.Linear(hiddenSize, dataset.wordsCount)))
model:add(nn.JoinTable(1,2))
model:add(nn.LogSoftMax())
Expand All @@ -34,7 +34,7 @@ model:remember('both')
local criterion = nn.ClassNLLCriterion()
local learningRate = 0.05
local momentum = 0.9
local epochCount = 1
local epochCount = 5

for epoch = 1, epochCount do
print("-- Epoch " .. epoch)
Expand All @@ -44,20 +44,25 @@ for epoch = 1, epochCount do
local targets = example[2]

-- seq2seq paper recommends passing input in reverse order
for i = #inputs, 1, -1 do
for i = inputs:size(1), 1, -1 do
local input = inputs[i]
model:forward(torch.Tensor{input})
model:forward(input)
end

local input = EOS
for i = 1, #targets + 1 do
local target = targets[i] or EOS

local output = model:forward(torch.Tensor{input})
local err = criterion:forward(output, torch.Tensor{target})

local gradOutput = criterion:backward(output, torch.Tensor{target})
model:backward(torch.Tensor{input}, gradOutput)
for i = 1, targets:size(1) + 1 do
local target
if i > targets:size(1) then
target = EOS
else
target = targets[i]
end

local output = model:forward(input)
local err = criterion:forward(output, target)

local gradOutput = criterion:backward(output, target)
model:backward(input, gradOutput)

input = target
end
Expand Down

0 comments on commit ffedc96

Please sign in to comment.