--- /dev/null
+/*
+ * clueless-kmean is a variant of k-mean which enforces balanced
+ * distribution of classes in every cluster
+ *
+ * Copyright (c) 2013 Idiap Research Institute, http://www.idiap.ch/
+ * Written by Francois Fleuret <francois.fleuret@idiap.ch>
+ *
+ * This file is part of clueless-kmean.
+ *
+ * clueless-kmean is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 3 as published by the Free Software Foundation.
+ *
+ * clueless-kmean is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with selector. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <iostream>
+#include <fstream>
+#include <stdio.h>
+#include <stdlib.h>
+#include <float.h>
+#include <glpk.h>
+
+using namespace std;
+
+#include "misc.h"
+#include "arrays.h"
+#include "sample_set.h"
+#include "clusterer.h"
+
+void generate_toy_problem(SampleSet *sample_set) {
+ int dim = 2;
+ int nb_points = 1000;
+
+ sample_set->resize(dim, nb_points);
+ sample_set->nb_classes = 2;
+
+ for(int n = 0; n < nb_points; n++) {
+ sample_set->labels[n] = int(drand48() * 2);
+ if(sample_set->labels[n] == 0) {
+ sample_set->points[n][0] = (2 * drand48() - 1) * 0.8;
+ sample_set->points[n][1] = - 0.6 + (2 * drand48() - 1) * 0.4;
+ } else {
+ sample_set->points[n][0] = (2 * drand48() - 1) * 0.4;
+ sample_set->points[n][1] = 0.6 + (2 * drand48() - 1) * 0.4;
+ }
+ }
+}
+
+int main(int argc, char **argv) {
+ SampleSet sample_set;
+ Clusterer clusterer;
+ int nb_clusters = 3;
+
+ generate_toy_problem(&sample_set);
+
+ {
+ ofstream out("points.dat");
+ for(int n = 0; n < sample_set.nb_points; n++) {
+ out << sample_set.labels[n];
+ for(int d = 0; d < sample_set.dim; d++) {
+ out << " " << sample_set.points[n][d];
+ }
+ out << endl;
+ }
+ }
+
+ int *associated_clusters = new int[sample_set.nb_points];
+
+ glp_term_out(0);
+
+ int mode;
+
+ if(argc == 2) {
+ if(strcmp(argv[1], "standard") == 0) {
+ mode = Clusterer::STANDARD_LP_ASSOCIATION;
+ } else if(strcmp(argv[1], "clueless") == 0) {
+ mode = Clusterer::UNINFORMATIVE_LP_ASSOCIATION;
+ } else {
+ cerr << "Unknown association mode " << argv[1] << endl;
+ exit(EXIT_FAILURE);
+ }
+ } else {
+ cerr << "Usage: " << argv[0] << " standard|clueless" << endl;
+ exit(EXIT_FAILURE);
+ }
+
+ clusterer.train(mode,
+ nb_clusters,
+ sample_set.dim,
+ sample_set.nb_points, sample_set.points,
+ sample_set.nb_classes, sample_set.labels,
+ associated_clusters);
+
+ {
+ ofstream out("associated_clusters.dat");
+ for(int n = 0; n < sample_set.nb_points; n++) {
+ out << associated_clusters[n];
+ for(int d = 0; d < sample_set.dim; d++) {
+ out << " " << sample_set.points[n][d];
+ }
+ out << endl;
+ }
+ }
+
+ {
+ ofstream out("clusters.dat");
+ for(int k = 0 ; k < clusterer._nb_clusters; k++) {
+ out << k;
+ for(int d = 0; d < sample_set.dim; d++) {
+ out << " " << clusterer._cluster_means[k][d];
+ }
+ for(int d = 0; d < sample_set.dim; d++) {
+ out << " " << 2 * sqrt(clusterer._cluster_var[k][d]);
+ }
+ out << endl;
+ }
+ }
+
+ delete[] associated_clusters;
+
+ glp_free_env(); // I do not want valgrind to complain
+}