kmean -> kmeans.

[clueless-kmeans.git] / clueless-kmean.cc
diff --git a/clueless-kmean.cc b/clueless-kmean.cc

deleted file mode 100644 (file)

index 557f0d8..0000000
--- a/clueless-kmean.cc
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- *  clueless-kmean is a variant of k-mean which enforces balanced
- *  distribution of classes in every cluster
- *
- *  Copyright (c) 2013 Idiap Research Institute, http://www.idiap.ch/
- *  Written by Francois Fleuret <francois.fleuret@idiap.ch>
- *
- *  This file is part of clueless-kmean.
- *
- *  clueless-kmean is free software: you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  version 3 as published by the Free Software Foundation.
- *
- *  clueless-kmean is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with selector.  If not, see <http://www.gnu.org/licenses/>.
- *
- */
-
-#include <iostream>
-#include <fstream>
-#include <stdio.h>
-#include <stdlib.h>
-#include <float.h>
-#include <glpk.h>
-
-using namespace std;
-
-#include "misc.h"
-#include "arrays.h"
-#include "sample_set.h"
-#include "clusterer.h"
-
-void generate_toy_problem(SampleSet *sample_set) {
-  int dim = 2;
-  int nb_points = 1000;
-
-  sample_set->resize(dim, nb_points);
-  sample_set->nb_classes = 2;
-
-  for(int n = 0; n < nb_points; n++) {
-    sample_set->labels[n] = int(drand48() * 2);
-    if(sample_set->labels[n] == 0) {
-      sample_set->points[n][0] = (2 * drand48()  - 1) * 0.8;
-      sample_set->points[n][1] = - 0.6 + (2 * drand48()  - 1) * 0.4;
-    } else {
-      sample_set->points[n][0] = (2 * drand48()  - 1) * 0.4;
-      sample_set->points[n][1] =   0.6 + (2 * drand48()  - 1) * 0.4;
-    }
-  }
-}
-
-int main(int argc, char **argv) {
-  SampleSet sample_set;
-  Clusterer clusterer;
-  int nb_clusters = 3;
-
-  generate_toy_problem(&sample_set);
-
-  {
-    ofstream out("points.dat");
-    for(int n = 0; n < sample_set.nb_points; n++) {
-      out << sample_set.labels[n];
-      for(int d = 0; d < sample_set.dim; d++) {
-        out << " " << sample_set.points[n][d];
-      }
-      out << endl;
-    }
-  }
-
-  int *associated_clusters = new int[sample_set.nb_points];
-
-  glp_term_out(0);
-
-  int mode;
-
-  if(argc == 2) {
-    if(strcmp(argv[1], "standard") == 0) {
-      mode = Clusterer::STANDARD_LP_ASSOCIATION;
-    } else if(strcmp(argv[1], "clueless") == 0) {
-      mode = Clusterer::UNINFORMATIVE_LP_ASSOCIATION;
-    } else {
-      cerr << "Unknown association mode " << argv[1] << endl;
-      exit(EXIT_FAILURE);
-    }
-  } else {
-    cerr << "Usage: " << argv[0] << " standard|clueless" << endl;
-    exit(EXIT_FAILURE);
-  }
-
-  clusterer.train(mode,
-                  nb_clusters,
-                  sample_set.dim,
-                  sample_set.nb_points, sample_set.points,
-                  sample_set.nb_classes, sample_set.labels,
-                  associated_clusters);
-
-  {
-    ofstream out("associated_clusters.dat");
-    for(int n = 0; n < sample_set.nb_points; n++) {
-      out << associated_clusters[n];
-      for(int d = 0; d < sample_set.dim; d++) {
-        out << " " << sample_set.points[n][d];
-      }
-      out << endl;
-    }
-  }
-
-  {
-    ofstream out("clusters.dat");
-    for(int k = 0 ; k < clusterer._nb_clusters; k++) {
-      out << k;
-      for(int d = 0; d < sample_set.dim; d++) {
-        out << " " << clusterer._cluster_means[k][d];
-      }
-      for(int d = 0; d < sample_set.dim; d++) {
-        out << " " << 2 * sqrt(clusterer._cluster_var[k][d]);
-      }
-      out << endl;
-    }
-  }
-
-  delete[] associated_clusters;
-
-  glp_free_env(); // I do not want valgrind to complain
-}