5 dyncnn is a deep-learning algorithm for the prediction of
6 interacting object dynamics
8 Copyright (c) 2016 Idiap Research Institute, http://www.idiap.ch/
9 Written by Francois Fleuret <francois.fleuret@idiap.ch>
11 This file is part of dyncnn.
13 dyncnn is free software: you can redistribute it and/or modify it
14 under the terms of the GNU General Public License version 3 as
15 published by the Free Software Foundation.
17 dyncnn is distributed in the hope that it will be useful, but
18 WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with dyncnn. If not, see <http://www.gnu.org/licenses/>.
35 ----------------------------------------------------------------------
37 function printf(f, ...)
38 print(string.format(f, unpack({...})))
43 function printfc(c, f, ...)
44 printf(c .. string.format(f, unpack({...})) .. colors.black)
47 function logCommand(c)
48 print(colors.blue .. '[' .. c .. '] -> [' .. sys.execute(c) .. ']' .. colors.black)
51 ----------------------------------------------------------------------
52 -- Environment and command line arguments
54 local defaultNbThreads = 1
55 local defaultUseGPU = false
57 if os.getenv('TORCH_NB_THREADS') then
58 defaultNbThreads = os.getenv('TORCH_NB_THREADS')
59 print('Environment variable TORCH_NB_THREADS is set and equal to ' .. defaultNbThreads)
61 print('Environment variable TORCH_NB_THREADS is not set')
64 if os.getenv('TORCH_USE_GPU') then
65 defaultUseGPU = os.getenv('TORCH_USE_GPU') == 'yes'
66 print('Environment variable TORCH_USE_GPU is set and evaluated as ' .. tostring(defaultUseGPU))
68 print('Environment variable TORCH_USE_GPU is not set.')
71 ----------------------------------------------------------------------
73 local cmd = torch.CmdLine()
76 cmd:text('General setup')
78 cmd:option('-seed', 1, 'initial random seed')
79 cmd:option('-nbThreads', defaultNbThreads, 'how many threads (environment variable TORCH_NB_THREADS)')
80 cmd:option('-useGPU', defaultUseGPU, 'should we use cuda (environment variable TORCH_USE_GPU)')
85 cmd:option('-resultFreq', 100, 'at which epoch frequency should we save result images')
86 cmd:option('-exampleInternals', -1, 'should we save inner activation images')
87 cmd:option('-noLog', false, 'should we prevent logging')
88 cmd:option('-rundir', '', 'the directory for results')
93 cmd:option('-nbEpochs', 1000, 'nb of epochs for the heavy setting')
94 cmd:option('-learningRate', 0.1, 'learning rate')
95 cmd:option('-batchSize', 128, 'size of the mini-batches')
96 cmd:option('-filterSize', 5, 'convolution filter size')
97 cmd:option('-nbTrainSamples', 32768)
98 cmd:option('-nbValidationSamples', 1024)
99 cmd:option('-nbTestSamples', 1024)
102 cmd:text('Problem to solve')
104 cmd:option('-dataDir', './data/10p-mg', 'data directory')
107 cmd:text('Network structure')
109 cmd:option('-nbChannels', 16)
110 cmd:option('-nbBlocks', 8)
112 ------------------------------
115 cmd:addTime('DYNCNN','%F %T')
117 params = cmd:parse(arg)
119 if params.rundir == '' then
120 params.rundir = cmd:string('exp', params, { })
123 paths.mkdir(params.rundir)
125 if not params.noLog then
126 -- Append to the log if there is one
127 cmd:log(io.open(params.rundir .. '/log', 'a'), params)
130 ----------------------------------------------------------------------
131 -- The experiment per se
133 if params.predictGrasp then
134 params.targetDepth = 2
136 params.targetDepth = 1
139 ----------------------------------------------------------------------
142 torch.setnumthreads(params.nbThreads)
143 torch.setdefaulttensortype('torch.FloatTensor')
144 torch.manualSeed(params.seed)
146 ----------------------------------------------------------------------
147 -- Dealing with the CPU/GPU
149 -- mynn will take entries in that order: mynn, cudnn, cunn, nn
155 __index = function(table, key)
156 return (cudnn and cudnn[key]) or (cunn and cunn[key]) or nn[key]
161 -- These are the tensors that can be kept on the CPU
162 mynn.SlowTensor = torch.Tensor
164 -- These are the tensors that should be moved to the GPU
165 mynn.FastTensor = torch.Tensor
167 if params.useGPU then
171 cudnn.benchmark = true
173 mynn.FastTensor = torch.CudaTensor
176 ----------------------------------------------------------------------
178 function loadData(first, nb, name)
179 print('Loading data `' .. name .. '\'.')
188 data.input = mynn.SlowTensor(data.nbSamples, 2, data.height, data.width)
189 data.target = mynn.SlowTensor(data.nbSamples, 1, data.height, data.width)
191 for i = 1, data.nbSamples do
192 local n = i-1 + first-1
193 local frame = image.load(string.format('%s/%03d/dyn_%06d.png',
195 math.floor(n/1000), n))
197 frame:mul(-1.0):add(1.0)
198 frame = frame:max(1):select(1, 1)
200 data.input[i][1]:copy(frame:sub(0 * data.height + 1, 1 * data.height,
201 1 * data.width + 1, 2 * data.width))
203 data.input[i][2]:copy(frame:sub(0 * data.height + 1, 1 * data.height,
204 0 * data.width + 1, 1 * data.width))
206 data.target[i][1]:copy(frame:sub(1 * data.height + 1, 2 * data.height,
207 1 * data.width + 1, 2 * data.width))
213 ----------------------------------------------------------------------
215 function collectAllOutputs(model, collection, which)
216 if torch.type(model) == 'nn.Sequential' then
217 for i = 1, #model.modules do
218 collectAllOutputs(model.modules[i], collection, which)
220 elseif not which or which[torch.type(model)] then
221 if torch.isTensor(model.output) then
222 collection.nb = collection.nb + 1
223 collection.outputs[collection.nb] = model.output
228 function saveInternalsImage(model, data, n)
229 -- Explicitely copy to keep input as a mynn.FastTensor
230 local input = mynn.FastTensor(1, 2, data.height, data.width)
231 input:copy(data.input:narrow(1, n, 1))
233 local output = model:forward(input)
235 local collection = {}
236 collection.outputs = {}
238 collection.outputs[collection.nb] = input
240 collectAllOutputs(model, collection,
243 ['cunn.ReLU'] = true,
244 ['cudnn.ReLU'] = true,
248 if collection.outputs[collection.nb] ~= model.output then
249 collection.nb = collection.nb + 1
250 collection.outputs[collection.nb] = model.output
253 local fileName = string.format('%s/internals_%s_%06d.png',
257 print('Saving ' .. fileName)
258 image.save(fileName, imageFromTensors(collection.outputs))
261 ----------------------------------------------------------------------
263 function saveResultImage(model, data, nbMax)
264 local criterion = nn.MSECriterion()
266 if params.useGPU then
267 print('Moving the criterion to the GPU.')
271 local input = mynn.FastTensor(1, 2, data.height, data.width)
272 local target = mynn.FastTensor(1, 1, data.height, data.width)
274 local nbMax = nbMax or 50
276 local nb = math.min(nbMax, data.nbSamples)
280 printf('Write %d result images for `%s\'.', nb, data.name)
282 local lossFile = io.open(params.rundir .. '/result_' .. data.name .. '_losses.dat', 'w')
286 -- Explicitely copy to keep input as a mynn.FastTensor
287 input:copy(data.input:narrow(1, n, 1))
288 target:copy(data.target:narrow(1, n, 1))
290 local output = model:forward(input)
291 local loss = criterion:forward(output, target)
293 output = mynn.SlowTensor(output:size()):copy(output)
295 -- We use our magical img.lua to create the result images
299 { pad = 1, data.input[n][1] },
300 { pad = 1, data.input[n][2] },
301 { pad = 1, data.target[n][1] },
302 { pad = 1, output[1][1] },
310 { pad = 1, data.input[n][1] },
311 { pad = 1, data.input[n][2] }
313 torch.Tensor(4, 4):fill(1.0),
316 { pad = 1, data.target[n][1] },
317 { pad = 1, output[1][1] },
318 { pad = 1, torch.csub(data.target[n][1], output[1][1]):abs() }
323 local result = combineImages(1.0, comp)
325 result:mul(-1.0):add(1.0)
327 local fileName = string.format('result_%s_%06d.png', data.name, n)
328 image.save(params.rundir .. '/' .. fileName, result)
329 lossFile:write(string.format('%f %s\n', loss, fileName))
333 ----------------------------------------------------------------------
335 function createTower(filterSize, nbChannels, nbBlocks)
339 if nbBlocks == 0 then
341 tower = nn.Identity()
345 tower = mynn.Sequential()
347 for b = 1, nbBlocks do
348 local block = mynn.Sequential()
350 block:add(mynn.SpatialConvolution(nbChannels,
352 filterSize, filterSize,
354 (filterSize - 1) / 2, (filterSize - 1) / 2))
355 block:add(mynn.SpatialBatchNormalization(nbChannels))
356 block:add(mynn.ReLU(true))
358 block:add(mynn.SpatialConvolution(nbChannels,
360 filterSize, filterSize,
362 (filterSize - 1) / 2, (filterSize - 1) / 2))
364 local parallel = mynn.ConcatTable()
365 parallel:add(block):add(mynn.Identity())
367 tower:add(parallel):add(mynn.CAddTable(true))
369 tower:add(mynn.SpatialBatchNormalization(nbChannels))
370 tower:add(mynn.ReLU(true))
379 function createModel(imageWidth, imageHeight,
380 filterSize, nbChannels, nbBlocks)
382 local model = mynn.Sequential()
384 -- Encode the two input channels (grasping image and starting
385 -- configuration) into the internal number of channels
386 model:add(mynn.SpatialConvolution(2,
388 filterSize, filterSize,
390 (filterSize - 1) / 2, (filterSize - 1) / 2))
392 model:add(mynn.SpatialBatchNormalization(nbChannels))
393 model:add(mynn.ReLU(true))
395 -- Add the resnet modules
396 model:add(createTower(filterSize, nbChannels, nbBlocks))
398 -- Decode down to a single channel, which is the final image
399 model:add(mynn.SpatialConvolution(nbChannels,
401 filterSize, filterSize,
403 (filterSize - 1) / 2, (filterSize - 1) / 2))
408 ----------------------------------------------------------------------
410 function fillBatch(data, first, batch, permutation)
411 local actualBatchSize = math.min(params.batchSize, data.input:size(1) - first + 1)
413 if actualBatchSize ~= batch.input:size(1) then
414 local size = batch.input:size()
415 size[1] = actualBatchSize
416 batch.input:resize(size)
419 if actualBatchSize ~= batch.target:size(1) then
420 local size = batch.target:size()
421 size[1] = actualBatchSize
422 batch.target:resize(size)
425 for k = 1, batch.input:size(1) do
428 i = permutation[first + k - 1]
432 batch.input[k] = data.input[i]
433 batch.target[k] = data.target[i]
437 function trainModel(model, trainData, validationData)
439 local criterion = nn.MSECriterion()
440 local batchSize = params.batchSize
443 batch.input = mynn.FastTensor(batchSize, 2, trainData.height, trainData.width)
444 batch.target = mynn.FastTensor(batchSize, 1, trainData.height, trainData.width)
446 local startingEpoch = 1
449 startingEpoch = model.epoch + 1
452 if model.RNGState then
453 torch.setRNGState(model.RNGState)
456 if params.useGPU then
457 print('Moving the model and criterion to the GPU.')
462 print('Starting training.')
464 local parameters, gradParameters = model:getParameters()
465 printf('The model has %d parameters.', parameters:storage():size(1))
467 local averageTrainLoss, averageValidationLoss
468 local trainTime, validationTime
470 ----------------------------------------------------------------------
473 learningRate = params.learningRate,
475 learningRateDecay = 0
478 for e = startingEpoch, params.nbEpochs do
482 local permutation = torch.randperm(trainData.nbSamples)
486 local startTime = sys.clock()
488 for b = 1, trainData.nbSamples, batchSize do
490 fillBatch(trainData, b, batch, permutation)
492 local opfunc = function(x)
493 -- Surprisingly, copy() needs this check
494 if x ~= parameters then
498 local output = model:forward(batch.input)
500 local loss = criterion:forward(output, batch.target)
501 local dLossdOutput = criterion:backward(output, batch.target)
503 gradParameters:zero()
504 model:backward(batch.input, dLossdOutput)
506 accLoss = accLoss + loss
507 nbBatches = nbBatches + 1
509 return loss, gradParameters
512 optim.sgd(opfunc, parameters, sgdState)
516 trainTime = sys.clock() - startTime
517 averageTrainLoss = accLoss / nbBatches
519 ----------------------------------------------------------------------
527 local startTime = sys.clock()
529 for b = 1, validationData.nbSamples, batchSize do
530 fillBatch(validationData, b, batch)
531 local output = model:forward(batch.input)
532 accLoss = accLoss + criterion:forward(output, batch.target)
533 nbBatches = nbBatches + 1
536 validationTime = sys.clock() - startTime
537 averageValidationLoss = accLoss / nbBatches;
540 printf('Epoch train %0.2fs (%0.2fms / sample), validation %0.2fs (%0.2fms / sample).',
542 1000 * trainTime / trainData.nbSamples,
544 1000 * validationTime / validationData.nbSamples)
546 printfc(colors.green, 'LOSS %d %f %f', e, averageTrainLoss, averageValidationLoss)
548 ----------------------------------------------------------------------
549 -- Save a persistent state so that we can restart from there
552 model.RNGState = torch.getRNGState()
554 torch.save(params.rundir .. '/model_last.t7', model)
556 ----------------------------------------------------------------------
557 -- Save a duplicate of the persistent state from time to time
559 if params.resultFreq > 0 and e%params.resultFreq == 0 then
560 torch.save(string.format('%s/model_%04d.t7', params.rundir, e), model)
561 saveResultImage(model, trainData)
562 saveResultImage(model, validationData)
569 function createAndTrainModel(trainData, validationData)
571 -- Load the current training state, or create a new model from
574 if pcall(function () model = torch.load(params.rundir .. '/model_last.t7') end) then
577 'Found a learning state with %d epochs finished, starting from there.',
580 if params.exampleInternals > 0 then
581 saveInternalsImage(model, validationData, params.exampleInternals)
587 model = createModel(trainData.width, trainData.height,
588 params.filterSize, params.nbChannels,
593 trainModel(model, trainData, validationData)
599 ----------------------------------------------------------------------
605 'git log -1 --format=%H'
611 local trainData = loadData(1,
612 params.nbTrainSamples, 'train')
614 local validationData = loadData(params.nbTrainSamples + 1,
615 params.nbValidationSamples, 'validation')
617 local model = createAndTrainModel(trainData, validationData)
619 ----------------------------------------------------------------------
622 local testData = loadData(params.nbTrainSamples + params.nbValidationSamples + 1,
623 params.nbTestSamples, 'test')
625 if params.useGPU then
626 print('Moving the model and criterion to the GPU.')
630 saveResultImage(model, trainData)
631 saveResultImage(model, validationData)
632 saveResultImage(model, testData, 1024)