neural.h

   1 /*
   2  *  mlp-mnist is an implementation of a multi-layer neural network.
   3  *
   4  *  Copyright (c) 2006 École Polytechnique Fédérale de Lausanne,
   5  *  http://www.epfl.ch
   6  *
   7  *  Written by Francois Fleuret <francois@fleuret.org>
   8  *
   9  *  This file is part of mlp-mnist.
  10  *
  11  *  mlp-mnist is free software: you can redistribute it and/or modify
  12  *  it under the terms of the GNU General Public License version 3 as
  13  *  published by the Free Software Foundation.
  14  *
  15  *  mlp-mnist is distributed in the hope that it will be useful, but
  16  *  WITHOUT ANY WARRANTY; without even the implied warranty of
  17  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  *  General Public License for more details.
  19  *
  20  *  You should have received a copy of the GNU General Public License
  21  *  along with mlp-mnist.  If not, see <http://www.gnu.org/licenses/>.
  22  *
  23  */
  24
  25 #ifndef NEURAL_H
  26 #define NEURAL_H
  27
  28 #include <cmath>
  29 #include <stdlib.h>
  30
  31 #include "misc.h"
  32 #include "images.h"
  33
  34 inline scalar_t normal_sample() {
  35   scalar_t a = drand48();
  36   scalar_t b = drand48();
  37   return cos(2 * M_PI * a) * sqrt(-2 * log(b));
  38 }
  39
  40 class MultiLayerPerceptron {
  41 protected:
  42   static const scalar_t output_amplitude;
  43
  44   int _nb_layers;
  45   int *_layer_sizes;
  46   int _nb_activations, _nb_weights;
  47
  48   // We can 'freeze' certain layers and let the learning only change
  49   // the others
  50   bool *_frozen_layers;
  51
  52   // Tell us where the layers begin
  53   int *_weights_index, *_activations_index;
  54
  55   scalar_t *_activations, *_pre_sigma_activations;
  56   scalar_t *_weights;
  57
  58 public:
  59   MultiLayerPerceptron(const MultiLayerPerceptron &mlp);
  60   MultiLayerPerceptron(int nb_layers, int *layer_sizes);
  61   MultiLayerPerceptron(istream &is);
  62   ~MultiLayerPerceptron();
  63
  64   void save(ostream &os);
  65
  66   void save_data();
  67
  68   inline int nb_layers() { return _nb_layers; }
  69   inline int layer_size(int l) { return _layer_sizes[l]; }
  70   inline int nb_weights() { return _nb_weights; }
  71   inline void freeze(int l, bool f) { _frozen_layers[l] = f; }
  72   scalar_t sigma(scalar_t x) { return 2 / (1 + exp(- x)) - 1; }
  73   scalar_t dsigma(scalar_t x) { scalar_t e = exp(- x); return 2 * e / sq(1 + e); }
  74
  75   // Init all the weights with a normal distribution of given standard
  76   // deviation
  77   void init_random_weights(scalar_t stdd);
  78
  79   // Compute the gradient based on one single sample
  80   void compute_gradient_1s(ImageSet *is, int p, scalar_t *gradient_1s);
  81   // Compute the gradient based on all samples from the set
  82   void compute_gradient(ImageSet *is, scalar_t *gradient);
  83
  84   // Compute the same gradient numerically (to check the one above)
  85   void compute_numerical_gradient(ImageSet *is, scalar_t *gradient);
  86
  87   // Print the gradient
  88   void print_gradient(ostream &os, scalar_t *gradient);
  89
  90   // Move all weights to origin + lambda * gradient
  91   void move_on_line(scalar_t *origin, scalar_t *gradient, scalar_t lambda);
  92
  93   // The 'basic' gradient just goes through all samples and add dt
  94   // time the gradient on each one
  95   void one_step_basic_gradient(ImageSet *is, scalar_t dt);
  96
  97   // The global gradient uses a conjugate gradient to minmize the
  98   // global quadratic error
  99   void one_step_global_gradient(ImageSet *is, scalar_t *xi, scalar_t *g, scalar_t *h);
 100
 101   // Performs gradient descent until the test error as increased
 102   // during 5 steps
 103   void train(ImageSet *training_set, ImageSet *validation_set);
 104
 105   // Compute the activation of the network from one sample. The input
 106   // layer has to be as large as the number of pixels in the images.
 107   void compute_activations_1s(ImageSet *is, int p);
 108
 109   // Compute the activation of the network on all samples. The
 110   // responses array has to be as large as the number of samples in is
 111   // time the dimension of the output layer
 112   void test(ImageSet *is, scalar_t *responses);
 113
 114   // Compute the quadratic error
 115   scalar_t error(ImageSet *is);
 116   // Compute the classification error
 117   scalar_t classification_error(ImageSet *is);
 118 };
 119
 120 #endif