8bfdde199bd1c858d30ee42fe3e0ebbe30eeb860
[pysvrt.git] / svrtset.py
1
2 #  svrt is the ``Synthetic Visual Reasoning Test'', an image
3 #  generator for evaluating classification performance of machine
4 #  learning systems, humans and primates.
5 #
6 #  Copyright (c) 2017 Idiap Research Institute, http://www.idiap.ch/
7 #  Written by Francois Fleuret <francois.fleuret@idiap.ch>
8 #
9 #  This file is part of svrt.
10 #
11 #  svrt is free software: you can redistribute it and/or modify it
12 #  under the terms of the GNU General Public License version 3 as
13 #  published by the Free Software Foundation.
14 #
15 #  svrt is distributed in the hope that it will be useful, but
16 #  WITHOUT ANY WARRANTY; without even the implied warranty of
17 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 #  General Public License for more details.
19 #
20 #  You should have received a copy of the GNU General Public License
21 #  along with svrt.  If not, see <http://www.gnu.org/licenses/>.
22
23 import torch
24 from math import sqrt
25 from torch import multiprocessing
26
27 from torch import Tensor
28 from torch.autograd import Variable
29
30 import svrt
31
32 ######################################################################
33
34 def generate_one_batch(s):
35     problem_number, batch_size, random_seed = s
36     svrt.seed(random_seed)
37     target = torch.LongTensor(batch_size).bernoulli_(0.5)
38     input = svrt.generate_vignettes(problem_number, target)
39     input = input.float().view(input.size(0), 1, input.size(1), input.size(2))
40     return [ input, target ]
41
42 class VignetteSet:
43
44     def __init__(self, problem_number, nb_samples, batch_size, cuda = False, logger = None):
45
46         if nb_samples%batch_size > 0:
47             print('nb_samples must be a multiple of batch_size')
48             raise
49
50         self.cuda = cuda
51         self.problem_number = problem_number
52
53         self.batch_size = batch_size
54         self.nb_samples = nb_samples
55         self.nb_batches = self.nb_samples // self.batch_size
56
57         seeds = torch.LongTensor(self.nb_batches).random_()
58         mp_args = []
59         for b in range(0, self.nb_batches):
60             mp_args.append( [ problem_number, batch_size, seeds[b] ])
61
62         self.data = []
63         for b in range(0, self.nb_batches):
64             self.data.append(generate_one_batch(mp_args[b]))
65             if logger is not None: logger(self.nb_batches * self.batch_size, b * self.batch_size)
66
67         # Weird thing going on with the multi-processing, waiting for more info
68
69         # pool = multiprocessing.Pool(multiprocessing.cpu_count())
70         # self.data = pool.map(generate_one_batch, mp_args)
71
72         acc = 0.0
73         acc_sq = 0.0
74         for b in range(0, self.nb_batches):
75             input = self.data[b][0]
76             acc += input.sum() / input.numel()
77             acc_sq += input.pow(2).sum() /  input.numel()
78
79         mean = acc / self.nb_batches
80         std = sqrt(acc_sq / self.nb_batches - mean * mean)
81         for b in range(0, self.nb_batches):
82             self.data[b][0].sub_(mean).div_(std)
83             if cuda:
84                 self.data[b][0] = self.data[b][0].cuda()
85                 self.data[b][1] = self.data[b][1].cuda()
86
87     def get_batch(self, b):
88         return self.data[b]
89
90 ######################################################################
91
92 class CompressedVignetteSet:
93     def __init__(self, problem_number, nb_samples, batch_size, cuda = False, logger = None):
94
95         if nb_samples%batch_size > 0:
96             print('nb_samples must be a multiple of batch_size')
97             raise
98
99         self.cuda = cuda
100         self.problem_number = problem_number
101
102         self.batch_size = batch_size
103         self.nb_samples = nb_samples
104         self.nb_batches = self.nb_samples // self.batch_size
105
106         self.targets = []
107         self.input_storages = []
108
109         acc = 0.0
110         acc_sq = 0.0
111         for b in range(0, self.nb_batches):
112             target = torch.LongTensor(self.batch_size).bernoulli_(0.5)
113             input = svrt.generate_vignettes(problem_number, target)
114             acc += input.float().sum() / input.numel()
115             acc_sq += input.float().pow(2).sum() /  input.numel()
116             self.targets.append(target)
117             self.input_storages.append(svrt.compress(input.storage()))
118             if logger is not None: logger(self.nb_batches * self.batch_size, b * self.batch_size)
119
120         self.mean = acc / self.nb_batches
121         self.std = sqrt(acc_sq / self.nb_batches - self.mean * self.mean)
122
123     def get_batch(self, b):
124         input = torch.ByteTensor(svrt.uncompress(self.input_storages[b])).float()
125         input = input.view(self.batch_size, 1, 128, 128).sub_(self.mean).div_(self.std)
126         target = self.targets[b]
127
128         if self.cuda:
129             input = input.cuda()
130             target = target.cuda()
131
132         return input, target
133
134 ######################################################################