2 * mlp-mnist is an implementation of a multi-layer neural network.
4 * Copyright (c) 2008 Idiap Research Institute, http://www.idiap.ch/
5 * Written by Francois Fleuret <francois.fleuret@idiap.ch>
7 * This file is part of mlp-mnist.
9 * mlp-mnist is free software: you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 3 as
11 * published by the Free Software Foundation.
13 * mlp-mnist is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with mlp-mnist. If not, see <http://www.gnu.org/licenses/>.
32 inline scalar_t normal_sample() {
33 scalar_t a = drand48();
34 scalar_t b = drand48();
35 return cos(2 * M_PI * a) * sqrt(-2 * log(b));
38 class MultiLayerPerceptron {
40 static const scalar_t output_amplitude;
44 int _nb_activations, _nb_weights;
46 // We can 'freeze' certain layers and let the learning only change
50 // Tell us where the layers begin
51 int *_weights_index, *_activations_index;
53 scalar_t *_activations, *_pre_sigma_activations;
57 MultiLayerPerceptron(const MultiLayerPerceptron &mlp);
58 MultiLayerPerceptron(int nb_layers, int *layer_sizes);
59 MultiLayerPerceptron(istream &is);
60 ~MultiLayerPerceptron();
62 void save(ostream &os);
66 inline int nb_layers() { return _nb_layers; }
67 inline int layer_size(int l) { return _layer_sizes[l]; }
68 inline int nb_weights() { return _nb_weights; }
69 inline void freeze(int l, bool f) { _frozen_layers[l] = f; }
70 scalar_t sigma(scalar_t x) { return 2 / (1 + exp(- x)) - 1; }
71 scalar_t dsigma(scalar_t x) { scalar_t e = exp(- x); return 2 * e / sq(1 + e); }
73 // Init all the weights with a normal distribution of given standard
75 void init_random_weights(scalar_t stdd);
77 // Compute the gradient based on one single sample
78 void compute_gradient_1s(ImageSet *is, int p, scalar_t *gradient_1s);
79 // Compute the gradient based on all samples from the set
80 void compute_gradient(ImageSet *is, scalar_t *gradient);
82 // Compute the same gradient numerically (to check the one above)
83 void compute_numerical_gradient(ImageSet *is, scalar_t *gradient);
86 void print_gradient(ostream &os, scalar_t *gradient);
88 // Move all weights to origin + lambda * gradient
89 void move_on_line(scalar_t *origin, scalar_t *gradient, scalar_t lambda);
91 // The 'basic' gradient just goes through all samples and add dt
92 // time the gradient on each one
93 void one_step_basic_gradient(ImageSet *is, scalar_t dt);
95 // The global gradient uses a conjugate gradient to minmize the
96 // global quadratic error
97 void one_step_global_gradient(ImageSet *is, scalar_t *xi, scalar_t *g, scalar_t *h);
99 // Performs gradient descent until the test error as increased
101 void train(ImageSet *training_set, ImageSet *validation_set);
103 // Compute the activation of the network from one sample. The input
104 // layer has to be as large as the number of pixels in the images.
105 void compute_activations_1s(ImageSet *is, int p);
107 // Compute the activation of the network on all samples. The
108 // responses array has to be as large as the number of samples in is
109 // time the dimension of the output layer
110 void test(ImageSet *is, scalar_t *responses);
112 // Compute the quadratic error
113 scalar_t error(ImageSet *is);
114 // Compute the classification error
115 scalar_t classification_error(ImageSet *is);