X-Git-Url: https://fleuret.org/cgi-bin/gitweb/gitweb.cgi?a=blobdiff_plain;f=clueless-kmean.cc;fp=clueless-kmean.cc;h=0000000000000000000000000000000000000000;hb=8f8e8f2fb669aa421c245eada82095fb3fdcadc9;hp=557f0d8d2a161fd789aac9d2d2d28c57fe20629d;hpb=056eef1e23b3f6e5218b7bc3800f6412a3f97bfc;p=clueless-kmeans.git diff --git a/clueless-kmean.cc b/clueless-kmean.cc deleted file mode 100644 index 557f0d8..0000000 --- a/clueless-kmean.cc +++ /dev/null @@ -1,130 +0,0 @@ -/* - * clueless-kmean is a variant of k-mean which enforces balanced - * distribution of classes in every cluster - * - * Copyright (c) 2013 Idiap Research Institute, http://www.idiap.ch/ - * Written by Francois Fleuret - * - * This file is part of clueless-kmean. - * - * clueless-kmean is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 3 as published by the Free Software Foundation. - * - * clueless-kmean is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with selector. If not, see . - * - */ - -#include -#include -#include -#include -#include -#include - -using namespace std; - -#include "misc.h" -#include "arrays.h" -#include "sample_set.h" -#include "clusterer.h" - -void generate_toy_problem(SampleSet *sample_set) { - int dim = 2; - int nb_points = 1000; - - sample_set->resize(dim, nb_points); - sample_set->nb_classes = 2; - - for(int n = 0; n < nb_points; n++) { - sample_set->labels[n] = int(drand48() * 2); - if(sample_set->labels[n] == 0) { - sample_set->points[n][0] = (2 * drand48() - 1) * 0.8; - sample_set->points[n][1] = - 0.6 + (2 * drand48() - 1) * 0.4; - } else { - sample_set->points[n][0] = (2 * drand48() - 1) * 0.4; - sample_set->points[n][1] = 0.6 + (2 * drand48() - 1) * 0.4; - } - } -} - -int main(int argc, char **argv) { - SampleSet sample_set; - Clusterer clusterer; - int nb_clusters = 3; - - generate_toy_problem(&sample_set); - - { - ofstream out("points.dat"); - for(int n = 0; n < sample_set.nb_points; n++) { - out << sample_set.labels[n]; - for(int d = 0; d < sample_set.dim; d++) { - out << " " << sample_set.points[n][d]; - } - out << endl; - } - } - - int *associated_clusters = new int[sample_set.nb_points]; - - glp_term_out(0); - - int mode; - - if(argc == 2) { - if(strcmp(argv[1], "standard") == 0) { - mode = Clusterer::STANDARD_LP_ASSOCIATION; - } else if(strcmp(argv[1], "clueless") == 0) { - mode = Clusterer::UNINFORMATIVE_LP_ASSOCIATION; - } else { - cerr << "Unknown association mode " << argv[1] << endl; - exit(EXIT_FAILURE); - } - } else { - cerr << "Usage: " << argv[0] << " standard|clueless" << endl; - exit(EXIT_FAILURE); - } - - clusterer.train(mode, - nb_clusters, - sample_set.dim, - sample_set.nb_points, sample_set.points, - sample_set.nb_classes, sample_set.labels, - associated_clusters); - - { - ofstream out("associated_clusters.dat"); - for(int n = 0; n < sample_set.nb_points; n++) { - out << associated_clusters[n]; - for(int d = 0; d < sample_set.dim; d++) { - out << " " << sample_set.points[n][d]; - } - out << endl; - } - } - - { - ofstream out("clusters.dat"); - for(int k = 0 ; k < clusterer._nb_clusters; k++) { - out << k; - for(int d = 0; d < sample_set.dim; d++) { - out << " " << clusterer._cluster_means[k][d]; - } - for(int d = 0; d < sample_set.dim; d++) { - out << " " << 2 * sqrt(clusterer._cluster_var[k][d]); - } - out << endl; - } - } - - delete[] associated_clusters; - - glp_free_env(); // I do not want valgrind to complain -}