X-Git-Url: https://fleuret.org/cgi-bin/gitweb/gitweb.cgi?p=dyncnn.git;a=blobdiff_plain;f=dyncnn.lua;h=0bb780c717182ca04ebc6a389600baa49fe23720;hp=53625934e1cbfca2e779abf1b8ef245c91836feb;hb=HEAD;hpb=fe5dee151313b6abd8ffee2c5fc5593f326e663f diff --git a/dyncnn.lua b/dyncnn.lua index 5362593..0bb780c 100755 --- a/dyncnn.lua +++ b/dyncnn.lua @@ -28,64 +28,36 @@ require 'torch' require 'nn' require 'optim' require 'image' -require 'pl' -require 'img' - ----------------------------------------------------------------------- - -function printf(f, ...) - print(string.format(f, unpack({...}))) -end - -colors = sys.COLORS - -function printfc(c, f, ...) - printf(c .. string.format(f, unpack({...})) .. colors.black) -end - -function logCommand(c) - print(colors.blue .. '[' .. c .. '] -> [' .. sys.execute(c) .. ']' .. colors.black) -end - ----------------------------------------------------------------------- --- Environment and command line arguments - -local defaultNbThreads = 1 -local defaultUseGPU = false - -if os.getenv('TORCH_NB_THREADS') then - defaultNbThreads = os.getenv('TORCH_NB_THREADS') - print('Environment variable TORCH_NB_THREADS is set and equal to ' .. defaultNbThreads) -else - print('Environment variable TORCH_NB_THREADS is not set') -end - -if os.getenv('TORCH_USE_GPU') then - defaultUseGPU = os.getenv('TORCH_USE_GPU') == 'yes' - print('Environment variable TORCH_USE_GPU is set and evaluated as ' .. tostring(defaultUseGPU)) -else - print('Environment variable TORCH_USE_GPU is not set.') -end +require 'fftb' ---------------------------------------------------------------------- +-- Command line arguments local cmd = torch.CmdLine() -cmd:text('') cmd:text('General setup') cmd:option('-seed', 1, 'initial random seed') cmd:option('-nbThreads', defaultNbThreads, 'how many threads (environment variable TORCH_NB_THREADS)') cmd:option('-useGPU', defaultUseGPU, 'should we use cuda (environment variable TORCH_USE_GPU)') +cmd:option('-fastGPU', true, 'should we go as fast as possible, possibly non-deterministically') cmd:text('') cmd:text('Log') cmd:option('-resultFreq', 100, 'at which epoch frequency should we save result images') -cmd:option('-exampleInternals', -1, 'should we save inner activation images') +cmd:option('-exampleInternals', '', 'list of comma-separated indices for inner activation images') cmd:option('-noLog', false, 'should we prevent logging') cmd:option('-rundir', '', 'the directory for results') +cmd:option('-deltaImages', false, 'should we highlight the difference in result images') + +cmd:text('') +cmd:text('Network structure') + +cmd:option('-filterSize', 5) +cmd:option('-nbChannels', 16) +cmd:option('-nbBlocks', 8) cmd:text('') cmd:text('Training') @@ -93,7 +65,6 @@ cmd:text('Training') cmd:option('-nbEpochs', 1000, 'nb of epochs for the heavy setting') cmd:option('-learningRate', 0.1, 'learning rate') cmd:option('-batchSize', 128, 'size of the mini-batches') -cmd:option('-filterSize', 5, 'convolution filter size') cmd:option('-nbTrainSamples', 32768) cmd:option('-nbValidationSamples', 1024) cmd:option('-nbTestSamples', 1024) @@ -103,74 +74,21 @@ cmd:text('Problem to solve') cmd:option('-dataDir', './data/10p-mg', 'data directory') -cmd:text('') -cmd:text('Network structure') - -cmd:option('-nbChannels', 16) -cmd:option('-nbBlocks', 8) - ------------------------------- --- Log and stuff - cmd:addTime('DYNCNN','%F %T') params = cmd:parse(arg) -if params.rundir == '' then - params.rundir = cmd:string('exp', params, { }) -end - -paths.mkdir(params.rundir) - -if not params.noLog then - -- Append to the log if there is one - cmd:log(io.open(params.rundir .. '/log', 'a'), params) -end - ----------------------------------------------------------------------- --- The experiment per se - -if params.predictGrasp then - params.targetDepth = 2 -else - params.targetDepth = 1 -end - ----------------------------------------------------------------------- --- Initializations - -torch.setnumthreads(params.nbThreads) -torch.setdefaulttensortype('torch.FloatTensor') -torch.manualSeed(params.seed) - ---------------------------------------------------------------------- --- Dealing with the CPU/GPU - --- mynn will take entries in that order: mynn, cudnn, cunn, nn - -mynn = {} -setmetatable(mynn, - { - __index = function(table, key) - return (cudnn and cudnn[key]) or (cunn and cunn[key]) or nn[key] - end - } -) +fftbInit(cmd, params) --- These are the tensors that can be kept on the CPU -mynn.SlowTensor = torch.Tensor - --- These are the tensors that should be moved to the GPU -mynn.FastTensor = torch.Tensor - -if params.useGPU then - require 'cutorch' - require 'cunn' - require 'cudnn' - cudnn.benchmark = true - cudnn.fastest = true - mynn.FastTensor = torch.CudaTensor +for _, c in pairs({ + 'date', + 'uname -a', + 'git log -1 --format=%H' + }) +do + logCommand(c) end ---------------------------------------------------------------------- @@ -185,8 +103,8 @@ function loadData(first, nb, name) data.width = 64 data.height = 64 - data.input = mynn.SlowTensor(data.nbSamples, 2, data.height, data.width) - data.target = mynn.SlowTensor(data.nbSamples, 1, data.height, data.width) + data.input = ffnn.SlowTensor(data.nbSamples, 2, data.height, data.width) + data.target = ffnn.SlowTensor(data.nbSamples, 1, data.height, data.width) for i = 1, data.nbSamples do local n = i-1 + first-1 @@ -226,8 +144,8 @@ function collectAllOutputs(model, collection, which) end function saveInternalsImage(model, data, n) - -- Explicitely copy to keep input as a mynn.FastTensor - local input = mynn.FastTensor(1, 2, data.height, data.width) + -- Explicitely copy to keep input as a ffnn.FastTensor + local input = ffnn.FastTensor(1, 2, data.height, data.width) input:copy(data.input:narrow(1, n, 1)) local output = model:forward(input) @@ -260,6 +178,16 @@ end ---------------------------------------------------------------------- +function highlightImage(a, b) + if params.deltaImages then + local h = torch.csub(a, b):abs() + h:div(1/h:max()):mul(0.9):add(0.1) + return torch.cmul(a, h) + else + return a + end +end + function saveResultImage(model, data, nbMax) local criterion = nn.MSECriterion() @@ -268,8 +196,8 @@ function saveResultImage(model, data, nbMax) criterion:cuda() end - local input = mynn.FastTensor(1, 2, data.height, data.width) - local target = mynn.FastTensor(1, 1, data.height, data.width) + local input = ffnn.FastTensor(1, 2, data.height, data.width) + local target = ffnn.FastTensor(1, 1, data.height, data.width) local nbMax = nbMax or 50 @@ -283,51 +211,37 @@ function saveResultImage(model, data, nbMax) for n = 1, nb do - -- Explicitely copy to keep input as a mynn.FastTensor + -- Explicitely copy to keep input as a ffnn.FastTensor input:copy(data.input:narrow(1, n, 1)) target:copy(data.target:narrow(1, n, 1)) local output = model:forward(input) local loss = criterion:forward(output, target) - output = mynn.SlowTensor(output:size()):copy(output) + output = ffnn.SlowTensor(output:size()):copy(output) -- We use our magical img.lua to create the result images - local comp = { - { - { pad = 1, data.input[n][1] }, - { pad = 1, data.input[n][2] }, - { pad = 1, data.target[n][1] }, - { pad = 1, output[1][1] }, - } - } + local comp - --[[ - local comp = { + comp = { { vertical = true, { pad = 1, data.input[n][1] }, - { pad = 1, data.input[n][2] } - }, - torch.Tensor(4, 4):fill(1.0), - { - vertical = true, - { pad = 1, data.target[n][1] }, - { pad = 1, output[1][1] }, - { pad = 1, torch.csub(data.target[n][1], output[1][1]):abs() } + { pad = 1, data.input[n][2] }, + { pad = 1, highlightImage(data.target[n][1], data.input[n][1]) }, + { pad = 1, highlightImage(output[1][1], data.input[n][1]) }, } } - ]]-- -local result = combineImages(1.0, comp) + local result = combineImages(1.0, comp) -result:mul(-1.0):add(1.0) + result:mul(-1.0):add(1.0) -local fileName = string.format('result_%s_%06d.png', data.name, n) -image.save(params.rundir .. '/' .. fileName, result) -lossFile:write(string.format('%f %s\n', loss, fileName)) -end + local fileName = string.format('result_%s_%06d.png', data.name, n) + image.save(params.rundir .. '/' .. fileName, result) + lossFile:write(string.format('%f %s\n', loss, fileName)) + end end ---------------------------------------------------------------------- @@ -342,61 +256,60 @@ function createTower(filterSize, nbChannels, nbBlocks) else - tower = mynn.Sequential() + tower = ffnn.Sequential() for b = 1, nbBlocks do - local block = mynn.Sequential() + local block = ffnn.Sequential() - block:add(mynn.SpatialConvolution(nbChannels, + block:add(ffnn.SpatialConvolution(nbChannels, nbChannels, filterSize, filterSize, 1, 1, (filterSize - 1) / 2, (filterSize - 1) / 2)) - block:add(mynn.SpatialBatchNormalization(nbChannels)) - block:add(mynn.ReLU(true)) + block:add(ffnn.SpatialBatchNormalization(nbChannels)) + block:add(ffnn.ReLU(true)) - block:add(mynn.SpatialConvolution(nbChannels, + block:add(ffnn.SpatialConvolution(nbChannels, nbChannels, filterSize, filterSize, 1, 1, (filterSize - 1) / 2, (filterSize - 1) / 2)) - local parallel = mynn.ConcatTable() - parallel:add(block):add(mynn.Identity()) + local parallel = ffnn.ConcatTable() + parallel:add(block):add(ffnn.Identity()) - tower:add(parallel):add(mynn.CAddTable(true)) + tower:add(parallel):add(ffnn.CAddTable(true)) - tower:add(mynn.SpatialBatchNormalization(nbChannels)) - tower:add(mynn.ReLU(true)) + tower:add(ffnn.SpatialBatchNormalization(nbChannels)) + tower:add(ffnn.ReLU(true)) end end return tower - end function createModel(imageWidth, imageHeight, filterSize, nbChannels, nbBlocks) - local model = mynn.Sequential() + local model = ffnn.Sequential() -- Encode the two input channels (grasping image and starting -- configuration) into the internal number of channels - model:add(mynn.SpatialConvolution(2, + model:add(ffnn.SpatialConvolution(2, nbChannels, filterSize, filterSize, 1, 1, (filterSize - 1) / 2, (filterSize - 1) / 2)) - model:add(mynn.SpatialBatchNormalization(nbChannels)) - model:add(mynn.ReLU(true)) + model:add(ffnn.SpatialBatchNormalization(nbChannels)) + model:add(ffnn.ReLU(true)) -- Add the resnet modules model:add(createTower(filterSize, nbChannels, nbBlocks)) -- Decode down to a single channel, which is the final image - model:add(mynn.SpatialConvolution(nbChannels, + model:add(ffnn.SpatialConvolution(nbChannels, 1, filterSize, filterSize, 1, 1, @@ -407,42 +320,11 @@ end ---------------------------------------------------------------------- -function fillBatch(data, first, batch, permutation) - local actualBatchSize = math.min(params.batchSize, data.input:size(1) - first + 1) - - if actualBatchSize ~= batch.input:size(1) then - local size = batch.input:size() - size[1] = actualBatchSize - batch.input:resize(size) - end - - if actualBatchSize ~= batch.target:size(1) then - local size = batch.target:size() - size[1] = actualBatchSize - batch.target:resize(size) - end - - for k = 1, batch.input:size(1) do - local i - if permutation then - i = permutation[first + k - 1] - else - i = first + k - 1 - end - batch.input[k] = data.input[i] - batch.target[k] = data.target[i] - end -end - -function trainModel(model, trainData, validationData) +function trainModel(model, trainSet, validationSet) local criterion = nn.MSECriterion() local batchSize = params.batchSize - local batch = {} - batch.input = mynn.FastTensor(batchSize, 2, trainData.height, trainData.width) - batch.target = mynn.FastTensor(batchSize, 1, trainData.height, trainData.width) - local startingEpoch = 1 if model.epoch then @@ -450,6 +332,7 @@ function trainModel(model, trainData, validationData) end if model.RNGState then + printfc(colors.red, 'Using the RNG state from the loaded model.') torch.setRNGState(model.RNGState) end @@ -475,19 +358,21 @@ function trainModel(model, trainData, validationData) learningRateDecay = 0 } + local batch = {} + for e = startingEpoch, params.nbEpochs do model:training() - local permutation = torch.randperm(trainData.nbSamples) + local permutation = torch.randperm(trainSet.nbSamples) local accLoss = 0.0 local nbBatches = 0 local startTime = sys.clock() - for b = 1, trainData.nbSamples, batchSize do + for b = 1, trainSet.nbSamples, batchSize do - fillBatch(trainData, b, batch, permutation) + fillBatch(trainSet, b, batch, permutation) local opfunc = function(x) -- Surprisingly, copy() needs this check @@ -526,8 +411,8 @@ function trainModel(model, trainData, validationData) local nbBatches = 0 local startTime = sys.clock() - for b = 1, validationData.nbSamples, batchSize do - fillBatch(validationData, b, batch) + for b = 1, validationSet.nbSamples, batchSize do + fillBatch(validationSet, b, batch) local output = model:forward(batch.input) accLoss = accLoss + criterion:forward(output, batch.target) nbBatches = nbBatches + 1 @@ -537,13 +422,24 @@ function trainModel(model, trainData, validationData) averageValidationLoss = accLoss / nbBatches; end - printf('Epoch train %0.2fs (%0.2fms / sample), validation %0.2fs (%0.2fms / sample).', - trainTime, - 1000 * trainTime / trainData.nbSamples, - validationTime, - 1000 * validationTime / validationData.nbSamples) + ---------------------------------------------------------------------- - printfc(colors.green, 'LOSS %d %f %f', e, averageTrainLoss, averageValidationLoss) + printfc(colors.green, + + 'epoch %d acc_train_loss %f validation_loss %f [train %.02fs total %.02fms / sample, validation %.02fs total %.02fms / sample]', + + e, + + averageTrainLoss, + + averageValidationLoss, + + trainTime, + 1000 * trainTime / trainSet.nbSamples, + + validationTime, + 1000 * validationTime / validationSet.nbSamples + ) ---------------------------------------------------------------------- -- Save a persistent state so that we can restart from there @@ -558,75 +454,59 @@ function trainModel(model, trainData, validationData) if params.resultFreq > 0 and e%params.resultFreq == 0 then torch.save(string.format('%s/model_%04d.t7', params.rundir, e), model) - saveResultImage(model, trainData) - saveResultImage(model, validationData) + saveResultImage(model, trainSet) + saveResultImage(model, validationSet) end end end -function createAndTrainModel(trainData, validationData) - - -- Load the current training state, or create a new model from - -- scratch +---------------------------------------------------------------------- +-- main - if pcall(function () model = torch.load(params.rundir .. '/model_last.t7') end) then +local trainSet = loadData(1, + params.nbTrainSamples, 'train') - printfc(colors.red, - 'Found a learning state with %d epochs finished, starting from there.', - model.epoch) +local validationSet = loadData(params.nbTrainSamples + 1, + params.nbValidationSamples, 'validation') - if params.exampleInternals > 0 then - saveInternalsImage(model, validationData, params.exampleInternals) - os.exit(0) - end +local model - else +if pcall(function () model = torch.load(params.rundir .. '/model_last.t7') end) then - model = createModel(trainData.width, trainData.height, - params.filterSize, params.nbChannels, - params.nbBlocks) + printfc(colors.red, + 'Found a model with %d epochs completed, starting from there.', + model.epoch) + if params.exampleInternals ~= '' then + for _, i in ipairs(string.split(params.exampleInternals, ',')) do + saveInternalsImage(model, validationSet, tonumber(i)) + end + os.exit(0) end - trainModel(model, trainData, validationData) - - return model - -end +else ----------------------------------------------------------------------- --- main + model = createModel(trainSet.width, trainSet.height, + params.filterSize, params.nbChannels, + params.nbBlocks) -for _, c in pairs({ - 'date', - 'uname -a', - 'git log -1 --format=%H' - }) -do - logCommand(c) end -local trainData = loadData(1, - params.nbTrainSamples, 'train') - -local validationData = loadData(params.nbTrainSamples + 1, - params.nbValidationSamples, 'validation') - -local model = createAndTrainModel(trainData, validationData) +trainModel(model, trainSet, validationSet) ---------------------------------------------------------------------- -- Test -local testData = loadData(params.nbTrainSamples + params.nbValidationSamples + 1, - params.nbTestSamples, 'test') +local testSet = loadData(params.nbTrainSamples + params.nbValidationSamples + 1, + params.nbTestSamples, 'test') if params.useGPU then print('Moving the model and criterion to the GPU.') model:cuda() end -saveResultImage(model, trainData) -saveResultImage(model, validationData) -saveResultImage(model, testData, 1024) +saveResultImage(model, trainSet) +saveResultImage(model, validationSet) +saveResultImage(model, testSet, 1024)