neural.h

   1 /*
   2  *  mlp-mnist is an implementation of a multi-layer neural network.
   3  *
   4  *  Copyright (c) 2008 Idiap Research Institute, http://www.idiap.ch/
   5  *  Written by Francois Fleuret <francois.fleuret@idiap.ch>
   6  *
   7  *  This file is part of mlp-mnist.
   8  *
   9  *  mlp-mnist is free software: you can redistribute it and/or modify
  10  *  it under the terms of the GNU General Public License version 3 as
  11  *  published by the Free Software Foundation.
  12  *
  13  *  mlp-mnist is distributed in the hope that it will be useful, but
  14  *  WITHOUT ANY WARRANTY; without even the implied warranty of
  15  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  *  General Public License for more details.
  17  *
  18  *  You should have received a copy of the GNU General Public License
  19  *  along with mlp-mnist.  If not, see <http://www.gnu.org/licenses/>.
  20  *
  21  */
  22
  23 #ifndef NEURAL_H
  24 #define NEURAL_H
  25
  26 #include <cmath>
  27 #include <stdlib.h>
  28
  29 #include "misc.h"
  30 #include "images.h"
  31
  32 inline scalar_t normal_sample() {
  33   scalar_t a = drand48();
  34   scalar_t b = drand48();
  35   return cos(2 * M_PI * a) * sqrt(-2 * log(b));
  36 }
  37
  38 class MultiLayerPerceptron {
  39 protected:
  40   static const scalar_t output_amplitude;
  41
  42   int _nb_layers;
  43   int *_layer_sizes;
  44   int _nb_activations, _nb_weights;
  45
  46   // We can 'freeze' certain layers and let the learning only change
  47   // the others
  48   bool *_frozen_layers;
  49
  50   // Tell us where the layers begin
  51   int *_weights_index, *_activations_index;
  52
  53   scalar_t *_activations, *_pre_sigma_activations;
  54   scalar_t *_weights;
  55
  56 public:
  57   MultiLayerPerceptron(const MultiLayerPerceptron &mlp);
  58   MultiLayerPerceptron(int nb_layers, int *layer_sizes);
  59   MultiLayerPerceptron(istream &is);
  60   ~MultiLayerPerceptron();
  61
  62   void save(ostream &os);
  63
  64   void save_data();
  65
  66   inline int nb_layers() { return _nb_layers; }
  67   inline int layer_size(int l) { return _layer_sizes[l]; }
  68   inline int nb_weights() { return _nb_weights; }
  69   inline void freeze(int l, bool f) { _frozen_layers[l] = f; }
  70   scalar_t sigma(scalar_t x) { return 2 / (1 + exp(- x)) - 1; }
  71   scalar_t dsigma(scalar_t x) { scalar_t e = exp(- x); return 2 * e / sq(1 + e); }
  72
  73   // Init all the weights with a normal distribution of given standard
  74   // deviation
  75   void init_random_weights(scalar_t stdd);
  76
  77   // Compute the gradient based on one single sample
  78   void compute_gradient_1s(ImageSet *is, int p, scalar_t *gradient_1s);
  79   // Compute the gradient based on all samples from the set
  80   void compute_gradient(ImageSet *is, scalar_t *gradient);
  81
  82   // Compute the same gradient numerically (to check the one above)
  83   void compute_numerical_gradient(ImageSet *is, scalar_t *gradient);
  84
  85   // Print the gradient
  86   void print_gradient(ostream &os, scalar_t *gradient);
  87
  88   // Move all weights to origin + lambda * gradient
  89   void move_on_line(scalar_t *origin, scalar_t *gradient, scalar_t lambda);
  90
  91   // The 'basic' gradient just goes through all samples and add dt
  92   // time the gradient on each one
  93   void one_step_basic_gradient(ImageSet *is, scalar_t dt);
  94
  95   // The global gradient uses a conjugate gradient to minmize the
  96   // global quadratic error
  97   void one_step_global_gradient(ImageSet *is, scalar_t *xi, scalar_t *g, scalar_t *h);
  98
  99   // Performs gradient descent until the test error as increased
 100   // during 5 steps
 101   void train(ImageSet *training_set, ImageSet *validation_set);
 102
 103   // Compute the activation of the network from one sample. The input
 104   // layer has to be as large as the number of pixels in the images.
 105   void compute_activations_1s(ImageSet *is, int p);
 106
 107   // Compute the activation of the network on all samples. The
 108   // responses array has to be as large as the number of samples in is
 109   // time the dimension of the output layer
 110   void test(ImageSet *is, scalar_t *responses);
 111
 112   // Compute the quadratic error
 113   scalar_t error(ImageSet *is);
 114   // Compute the classification error
 115   scalar_t classification_error(ImageSet *is);
 116 };
 117
 118 #endif