Renamed ann.c into mlp.c.

[mlp.git] / mlp.cc
diff --git a/mlp.cc b/mlp.cc

new file mode 100644 (file)

index 0000000..758b624
--- /dev/null
+++ b/mlp.cc
@@ -0,0 +1,322 @@
+/*
+ *  mlp-mnist is an implementation of a multi-layer neural network.
+ *
+ *  Copyright (c) 2006 École Polytechnique Fédérale de Lausanne,
+ *  http://www.epfl.ch
+ *
+ *  Written by Francois Fleuret <francois@fleuret.org>
+ *
+ *  This file is part of mlp-mnist.
+ *
+ *  mlp-mnist is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 3 as
+ *  published by the Free Software Foundation.
+ *
+ *  mlp-mnist is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with mlp-mnist.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+// LeCun et al. 1998:
+
+// 2-layer NN, 300 hidden units, mean square error  4.70%
+// 2-layer NN, 1000 hidden units                    4.50%
+// 3-layer NN, 300+100 hidden units                 3.05%
+// 3-layer NN, 500+150 hidden units                 2.95%
+
+/*********************************************************************
+
+   This program, trained on 20,000 (+ 20,000 for the stopping
+   criterion), tested on the 10,000 of the MNIST test set 100 hidden
+   neurons, basic network, 3.48%
+
+   TRAINING
+
+    ./ann --nb-training-examples 20000 --nb-validation-examples 20000 \
+        --mlp-structure 784,200,10 \
+        --data-files ${DATA_DIR}/train-images-idx3-ubyte ${DATA_DIR}/train-labels-idx1-ubyte \
+        --save-mlp simple.mlp
+
+   TEST
+
+    ./ann --load-mlp simple.mlp \
+        --data-files ${DATA_DIR}/t10k-images-idx3-ubyte ${DATA_DIR}/t10k-labels-idx1-ubyte \
+        --nb-test-examples 10000
+
+*********************************************************************/
+
+#include <iostream>
+#include <fstream>
+#include <cmath>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+using namespace std;
+
+#include "images.h"
+#include "neural.h"
+
+#define SMALL_BUFFER_SIZE 1024
+
+//////////////////////////////////////////////////////////////////////
+// Global Variables
+//////////////////////////////////////////////////////////////////////
+
+int nb_experiment = 0;
+int nb_training_examples = 0;
+int nb_validation_examples = 0;
+int nb_test_examples = 0;
+bool save_data = false;
+
+char images_filename[SMALL_BUFFER_SIZE] = "\0";
+char labels_filename[SMALL_BUFFER_SIZE] = "\0";
+char opt_load_filename[SMALL_BUFFER_SIZE] = "\0";
+char opt_save_filename[SMALL_BUFFER_SIZE] = "\0";
+char opt_layer_sizes[SMALL_BUFFER_SIZE] = "\0";
+
+char *next_word(char *buffer, char *r, int buffer_size) {
+  char *s;
+  s = buffer;
+  if(r != NULL)
+    {
+      if(*r == '"') {
+        r++;
+        while((*r != '"') && (*r != '\0') &&
+              (s<buffer+buffer_size-1))
+          *s++ = *r++;
+        if(*r == '"') r++;
+      } else {
+        while((*r != '\r') && (*r != '\n') && (*r != '\0') &&
+              (*r != '\t') && (*r != ' ') && (*r != ',') &&
+              (s<buffer+buffer_size-1))
+          *s++ = *r++;
+      }
+
+      while((*r == ' ') || (*r == '\t') || (*r == ',')) r++;
+      if((*r == '\0') || (*r=='\r') || (*r=='\n')) r = NULL;
+    }
+  *s = '\0';
+  return r;
+}
+
+//////////////////////////////////////////////////////////////////////
+// Simple routine to check we have enough parameters
+//////////////////////////////////////////////////////////////////////
+
+void check_opt(int argc, char **argv, int n_opt, int n, const char *help) {
+  if(n_opt + n >= argc) {
+    cerr << "Missing argument for " << argv[n_opt] << ".\n";
+    cerr << "Expecting " << help << ".\n";
+    exit(1);
+  }
+}
+
+void print_help_and_exit(int e) {
+  cout << "ANN. Written by François Fleuret.\n";
+  cout << "$Id: ann.cc,v 1.1 2005-12-13 17:19:11 fleuret Exp $\n";
+  cout<< "\n";
+  exit(e);
+}
+
+int main(int argc, char **argv) {
+
+  if(argc == 1) print_help_and_exit(1);
+
+  nice(10);
+
+  // Parsing the command line parameters ///////////////////////////////
+
+  int i = 1;
+
+  while(i < argc) {
+
+    if(argc == 1 || strcmp(argv[i], "--help") == 0) print_help_and_exit(0);
+
+    else if(strcmp(argv[i], "--data-files") == 0) {
+      check_opt(argc, argv, i, 2, "<string: pixel filename> <string: label filename>");
+      strncpy(images_filename, argv[i+1], SMALL_BUFFER_SIZE);
+      strncpy(labels_filename, argv[i+2], SMALL_BUFFER_SIZE);
+      i += 3;
+    }
+
+    else if(strcmp(argv[i], "--load-mlp") == 0) {
+      check_opt(argc, argv, i, 1, "<string: mlp filename>");
+      strncpy(opt_load_filename, argv[i+1], SMALL_BUFFER_SIZE);
+      i += 2;
+    }
+
+    else if(strcmp(argv[i], "--mlp-structure") == 0) {
+      check_opt(argc, argv, i, 1, "<int: input layer size>,<int: first hidden layer size>,[...,]<int: output layer size>");
+      strncpy(opt_layer_sizes, argv[i+1], SMALL_BUFFER_SIZE);
+      i += 2;
+    }
+
+    else if(strcmp(argv[i], "--save-mlp") == 0) {
+      check_opt(argc, argv, i, 1, "<string: mlp filename>");
+      strncpy(opt_save_filename, argv[i+1], SMALL_BUFFER_SIZE);
+      i += 2;
+    }
+
+    else if(strcmp(argv[i], "--nb-experiment") == 0) {
+      check_opt(argc, argv, i, 1, "<int: number of the experiment>");
+      nb_experiment = atoi(argv[i+1]);
+      i += 2;
+    }
+
+    else if(strcmp(argv[i], "--nb-training-examples") == 0) {
+      check_opt(argc, argv, i, 1, "<int: number of examples for the training>");
+      nb_training_examples = atoi(argv[i+1]);
+      i += 2;
+    }
+
+    else if(strcmp(argv[i], "--nb-validation-examples") == 0) {
+      check_opt(argc, argv, i, 1, "<int: number of examples for the validation>");
+      nb_validation_examples = atoi(argv[i+1]);
+      i += 2;
+    }
+
+    else if(strcmp(argv[i], "--nb-test-examples") == 0) {
+      check_opt(argc, argv, i, 1, "<int: number of examples for the test>");
+      nb_test_examples = atoi(argv[i+1]);
+      i += 2;
+    }
+
+    else if(strcmp(argv[i], "--save-data") == 0) {
+      save_data = true;
+      i++;
+    }
+
+    else {
+      cerr << "Unknown option " << argv[i] << "\n";
+      print_help_and_exit(1);
+    }
+  }
+
+  ImageSet image_set;
+  cout << "Loading the data file ..."; cout.flush();
+  image_set.load_mnist_format(images_filename, labels_filename);
+  cout << " done.\n"; cout.flush();
+
+  cout << "Database contains " << image_set.nb_pics()
+       << " images of resolution " << image_set.width() << "x" << image_set.height()
+       << " divided into " << image_set.nb_obj() << " objects.\n";
+
+  srand48(nb_experiment);
+
+  int nb_layers = 0;
+  int *layer_sizes = 0;
+
+  if(opt_layer_sizes[0]) {
+    char *s = opt_layer_sizes;
+    char token[SMALL_BUFFER_SIZE];
+    while(s) { s = next_word(token, s, SMALL_BUFFER_SIZE); nb_layers++; }
+
+    if(nb_layers < 2) {
+      cerr << "Need at least two layers.\n";
+      exit(1);
+    }
+
+    layer_sizes = new int[nb_layers];
+    s = opt_layer_sizes;
+    int n = 0;
+    while(s) { s = next_word(token, s, SMALL_BUFFER_SIZE); layer_sizes[n++] = atoi(token); }
+  }
+
+  // Loading or creating a perceptron from scratch /////////////////////
+
+  MultiLayerPerceptron *mlp = 0;
+
+  if(opt_load_filename[0]) {
+
+    ifstream stream(opt_load_filename);
+    if(stream.fail()) {
+      cerr << "Can not read " << opt_load_filename << ".\n";
+      exit(1);
+    }
+
+    cout << "Loading network " << opt_load_filename << " ... "; cout.flush();
+    mlp = new MultiLayerPerceptron(stream);
+    cout << "done (layers of sizes";
+    for(int l = 0; l < mlp->nb_layers(); l++) cout << " " << mlp->layer_size(l);
+    cout << ")\n"; cout.flush();
+
+  } else if(nb_layers > 0) {
+
+    if(layer_sizes[0] != image_set.width() * image_set.height() ||
+       layer_sizes[nb_layers-1] != image_set.nb_obj()) {
+      cerr << "For this data set, the input layer has to be of size " << image_set.width() * image_set.height() << ",\n";
+      cerr << "and the output has to be of size " << image_set.nb_obj() << ".\n";
+      exit(1);
+    }
+
+    cout << "Creating a new network (layers of sizes";
+    for(int i = 0; i < nb_layers; i++) cout << " " << layer_sizes[i];
+    cout << ").\n";
+
+    mlp = new MultiLayerPerceptron(nb_layers, layer_sizes);
+    mlp->init_random_weights(1e-1);
+  }
+
+  // Training the perceptron ///////////////////////////////////////////
+
+  ImageSet training_set, validation_set, test_set;
+
+  if(nb_training_examples > 0)
+    training_set.sample_among_unused_pictures(image_set, nb_training_examples);
+
+  if(nb_validation_examples > 0)
+    validation_set.sample_among_unused_pictures(image_set, nb_validation_examples);
+
+  if(save_data && mlp) mlp->save_data();
+
+  if(nb_training_examples > 0) {
+    if(validation_set.nb_pics() == 0) {
+      cerr << "We need validation pictures for training.\n";
+      exit(1);
+    }
+    cout << "Training the network with " << nb_training_examples << " training and " << nb_validation_examples << " validation examples.\n"; cout.flush();
+    mlp->train(&training_set, &validation_set);
+  }
+
+  // Saving the perceptron /////////////////////////////////////////////
+
+  if(opt_save_filename[0]) {
+    if(!mlp) {
+      cerr << "No perceptron to save.\n";
+      exit(1);
+    }
+
+    ofstream stream(opt_save_filename);
+    if(stream.fail()) {
+      cerr << "Can not write " << opt_save_filename << ".\n";
+      exit(1);
+    }
+
+    cout << "Saving network " << opt_save_filename << " ... "; cout.flush();
+    mlp->save(stream);
+    cout << "done.\n"; cout.flush();
+  }
+
+  // Testing the perceptron ////////////////////////////////////////////
+
+  if(nb_test_examples > 0) {
+    test_set.sample_among_unused_pictures(image_set, nb_test_examples);
+    cout << "Error rate " << mlp->error(&test_set) << " (" << mlp->classification_error(&test_set)*100 << "%)\n";
+
+    // This is to test the analytical gradient
+    //     scalar_t gradient[mlp->nb_weights()], numerical_gradient[mlp->nb_weights()];
+    //     mlp->compute_gradient(&test_set, gradient);
+    //     mlp->compute_numerical_gradient(&test_set, numerical_gradient);
+    //     for(int i = 0; i < mlp->nb_weights(); i++) cout << "TEST " << gradient[i] << " " << numerical_gradient[i] << "\n";
+  }
+
+  // Flushing the log //////////////////////////////////////////////////
+
+  delete[] layer_sizes;
+}