boosted_classifier.cc

   1 /*
   2  *  svrt is the ``Synthetic Visual Reasoning Test'', an image
   3  *  generator for evaluating classification performance of machine
   4  *  learning systems, humans and primates.
   5  *
   6  *  Copyright (c) 2009 Idiap Research Institute, http://www.idiap.ch/
   7  *  Written by Francois Fleuret <francois.fleuret@idiap.ch>
   8  *
   9  *  This file is part of svrt.
  10  *
  11  *  svrt is free software: you can redistribute it and/or modify it
  12  *  under the terms of the GNU General Public License version 3 as
  13  *  published by the Free Software Foundation.
  14  *
  15  *  svrt is distributed in the hope that it will be useful, but
  16  *  WITHOUT ANY WARRANTY; without even the implied warranty of
  17  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  *  General Public License for more details.
  19  *
  20  *  You should have received a copy of the GNU General Public License
  21  *  along with svrt.  If not, see <http://www.gnu.org/licenses/>.
  22  *
  23  */
  24
  25 #include "boosted_classifier.h"
  26 #include "classifier_reader.h"
  27 #include "fusion_sort.h"
  28 #include "tools.h"
  29
  30 inline scalar_t loss_derivative(int label, scalar_t response) {
  31   return - scalar_t(label * 2 - 1) * exp( - scalar_t(label * 2 - 1) * response );
  32 }
  33
  34 BoostedClassifier::BoostedClassifier() {
  35   _nb_stumps = 0;
  36   _stumps = 0;
  37 }
  38
  39 BoostedClassifier::BoostedClassifier(int nb_weak_learners) {
  40   _nb_stumps = nb_weak_learners;
  41   _stumps = new Stump[_nb_stumps];
  42 }
  43
  44 BoostedClassifier::~BoostedClassifier() {
  45   delete[] _stumps;
  46 }
  47
  48 const char *BoostedClassifier::name() {
  49   return "ADABOOST";
  50 }
  51
  52 void BoostedClassifier::chose_stump_from_sampling(int t, int **integral_images, scalar_t *derivatives, int nb_samples) {
  53   int *indexes = new int[nb_samples];
  54   int *sorted_indexes = new int[nb_samples];
  55   scalar_t *stump_counts = new scalar_t[nb_samples];
  56
  57   scalar_t max_loss_derivative = 0;
  58   Stump tmp;
  59   tmp.weight0 = -1;
  60   tmp.weight1 =  1;
  61   tmp.threshold = 0;
  62
  63   for(int k = 0; k < global.nb_optimization_weak_learners; k++) {
  64     tmp.randomize();
  65     scalar_t s = 0;
  66
  67     for(int n = 0; n < nb_samples; n++) {
  68       stump_counts[n] = tmp.count(integral_images[n]);
  69       indexes[n] = n;
  70       s += derivatives[n];
  71     }
  72
  73     indexed_fusion_sort(nb_samples, indexes, sorted_indexes, stump_counts);
  74
  75     for(int n = 0; n < nb_samples - 1; n++) {
  76       int i = sorted_indexes[n];
  77       int j = sorted_indexes[n + 1];
  78       s -= 2 * derivatives[i];
  79       if(stump_counts[j] > stump_counts[i]) {
  80         if(abs(s) > abs(max_loss_derivative)) {
  81           max_loss_derivative = s;
  82           _stumps[t] = tmp;
  83           _stumps[t].threshold = (stump_counts[i] + stump_counts[j])/2;
  84         }
  85       }
  86     }
  87   }
  88
  89   delete[] stump_counts;
  90   delete[] indexes;
  91   delete[] sorted_indexes;
  92 }
  93
  94 void BoostedClassifier::chose_stump(int t, int **integral_images, scalar_t *derivatives, int nb_samples) {
  95   if(global.nb_sampled_samples <= 0) {
  96     chose_stump_from_sampling(t, integral_images, derivatives, nb_samples);
  97   } else {
  98     int *sampled_indexes = new int[global.nb_sampled_samples];
  99     scalar_t *weights = new scalar_t[nb_samples];
 100     for(int s = 0; s < nb_samples; s++) {
 101       weights[s] = abs(derivatives[s]);
 102     }
 103     robust_sampling(nb_samples, weights, global.nb_sampled_samples, sampled_indexes);
 104     delete[] weights;
 105
 106     int **sampled_integral_images = new int *[global.nb_sampled_samples];
 107     scalar_t *sampled_derivatives = new scalar_t[global.nb_sampled_samples];
 108
 109     for(int s = 0; s < global.nb_sampled_samples; s++) {
 110       sampled_integral_images[s] = integral_images[sampled_indexes[s]];
 111       if(derivatives[sampled_indexes[s]] > 0) {
 112         sampled_derivatives[s] = 1.0;
 113       } else {
 114         sampled_derivatives[s] = -1.0;
 115       }
 116     }
 117
 118     chose_stump_from_sampling(t, sampled_integral_images, sampled_derivatives, global.nb_sampled_samples);
 119
 120     delete[] sampled_derivatives;
 121     delete[] sampled_integral_images;
 122     delete[] sampled_indexes;
 123   }
 124 }
 125
 126 void BoostedClassifier::train(int nb_vignettes, Vignette *vignettes, int *labels) {
 127   int **integral_images = new int *[nb_vignettes];
 128
 129   for(int n = 0; n < nb_vignettes; n++) {
 130     integral_images[n] = new int[(Vignette::width + 1) * (Vignette::height + 1)];
 131     compute_integral_image(&vignettes[n], integral_images[n]);
 132   }
 133
 134   scalar_t *responses = new scalar_t[nb_vignettes];
 135   scalar_t *derivatives = new scalar_t[nb_vignettes];
 136
 137   for(int n = 0; n < nb_vignettes; n++) {
 138     responses[n] = 0.0;
 139   }
 140
 141   global.bar.init(&cout, _nb_stumps);
 142   for(int t = 0; t < _nb_stumps; t++) {
 143
 144     for(int n = 0; n < nb_vignettes; n++) {
 145       derivatives[n] = loss_derivative(labels[n], responses[n]);
 146     }
 147
 148     chose_stump(t, integral_images, derivatives, nb_vignettes);
 149
 150     scalar_t num0 = 0, den0 = 0, num1 = 0, den1 = 0;
 151
 152     for(int n = 0; n < nb_vignettes; n++) {
 153       if(_stumps[t].response(integral_images[n]) > 0) {
 154         if(labels[n] == 1) {
 155           num1 += exp( - responses[n] );
 156         } else {
 157           den1 += exp(   responses[n] );
 158         }
 159       } else {
 160         if(labels[n] == 1) {
 161           num0 += exp( - responses[n] );
 162         } else {
 163           den0 += exp(   responses[n] );
 164         }
 165       }
 166     }
 167
 168     scalar_t weight_max = 5.0;
 169
 170     _stumps[t].weight0 = 0.5 * log(num0 / den0);
 171
 172     if(_stumps[t].weight0 < -weight_max)
 173       _stumps[t].weight0 = -weight_max;
 174     else if(_stumps[t].weight0 > weight_max)
 175       _stumps[t].weight0 = weight_max;
 176
 177     _stumps[t].weight1 = 0.5 * log(num1 / den1);
 178     if(_stumps[t].weight1 < -weight_max)
 179       _stumps[t].weight1 = -weight_max;
 180     else if(_stumps[t].weight1 > weight_max)
 181       _stumps[t].weight1 = weight_max;
 182
 183     for(int n = 0; n < nb_vignettes; n++) {
 184       responses[n] += _stumps[t].response(integral_images[n]);
 185     }
 186
 187     // cout << "ADABOOST_STEP " << t + 1 << " " << loss << endl;
 188     global.bar.refresh(&cout, t);
 189   }
 190   global.bar.finish(&cout);
 191
 192   scalar_t loss = 0;
 193   for(int n = 0; n < nb_vignettes; n++) {
 194     loss += exp( - scalar_t(labels[n] * 2 - 1) * responses[n]);
 195   }
 196
 197   cout << "Final loss is " << loss << endl;
 198
 199   delete[] derivatives;
 200   delete[] responses;
 201
 202   for(int n = 0; n < nb_vignettes; n++) {
 203     delete[] integral_images[n];
 204   }
 205
 206   delete[] integral_images;
 207 }
 208
 209 scalar_t BoostedClassifier::classify(Vignette *vignette) {
 210   int integral_image[(Vignette::width + 1) * (Vignette::height + 1)];
 211   compute_integral_image(vignette, integral_image);
 212   scalar_t result = 0;
 213   for(int n = 0; n < _nb_stumps; n++) {
 214     result += _stumps[n].response(integral_image);
 215   }
 216   return result;
 217 }
 218
 219 void BoostedClassifier::read(istream *in) {
 220   delete[] _stumps;
 221   read_var(in, &_nb_stumps);
 222   cout << "Reading " << _nb_stumps << " stumps." << endl;
 223   _stumps = new Stump[_nb_stumps];
 224   in->read((char *) _stumps, sizeof(Stump) * _nb_stumps);
 225 }
 226
 227 void BoostedClassifier::write(ostream *out) {
 228   int t;
 229   t = CT_BOOSTED;
 230   write_var(out, &t);
 231   write_var(out, &_nb_stumps);
 232   out->write((char *) _stumps, sizeof(Stump) * _nb_stumps);
 233 }
 234
 235 scalar_t BoostedClassifier::partial_sum(int first, int nb, Vignette *vignette) {
 236   int integral_image[(Vignette::width + 1) * (Vignette::height + 1)];
 237   compute_integral_image(vignette, integral_image);
 238   scalar_t result = 0;
 239   for(int n = first; n < first + nb; n++) {
 240     result += _stumps[n].response(integral_image);
 241   }
 242   return result;
 243 }