2 * clueless-kmeans is a variant of k-means which enforces balanced
3 * distribution of classes in every cluster
5 * Copyright (c) 2013 Idiap Research Institute, http://www.idiap.ch/
6 * Written by Francois Fleuret <francois.fleuret@idiap.ch>
8 * This file is part of clueless-kmeans.
10 * clueless-kmeans is free software: you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * version 3 as published by the Free Software Foundation.
14 * clueless-kmeans is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with selector. If not, see <http://www.gnu.org/licenses/>.
35 #include "sample_set.h"
36 #include "clusterer.h"
38 void generate_toy_problem(SampleSet *sample_set) {
42 sample_set->resize(dim, nb_points);
43 sample_set->nb_classes = 2;
45 for(int n = 0; n < nb_points; n++) {
46 sample_set->labels[n] = int(drand48() * 2);
47 if(sample_set->labels[n] == 0) {
48 sample_set->points[n][0] = (2 * drand48() - 1) * 0.8;
49 sample_set->points[n][1] = - 0.6 + (2 * drand48() - 1) * 0.4;
51 sample_set->points[n][0] = (2 * drand48() - 1) * 0.4;
52 sample_set->points[n][1] = 0.6 + (2 * drand48() - 1) * 0.4;
57 int main(int argc, char **argv) {
62 generate_toy_problem(&sample_set);
65 ofstream out("points.dat");
66 for(int n = 0; n < sample_set.nb_points; n++) {
67 out << sample_set.labels[n];
68 for(int d = 0; d < sample_set.dim; d++) {
69 out << " " << sample_set.points[n][d];
75 int *associated_clusters = new int[sample_set.nb_points];
82 if(strcmp(argv[1], "standard") == 0) {
83 mode = Clusterer::STANDARD_LP_ASSOCIATION;
84 } else if(strcmp(argv[1], "clueless") == 0) {
85 mode = Clusterer::UNINFORMATIVE_LP_ASSOCIATION;
86 } else if(strcmp(argv[1], "clueless-absolute") == 0) {
87 mode = Clusterer::UNINFORMATIVE_LP_ASSOCIATION_ABSOLUTE;
89 cerr << "Unknown association mode " << argv[1] << endl;
93 cerr << "Usage: " << argv[0] << " standard|clueless|clueless-absolute" << endl;
100 sample_set.nb_points, sample_set.points,
101 sample_set.nb_classes, sample_set.labels,
102 associated_clusters);
105 ofstream out("associated_clusters.dat");
106 for(int n = 0; n < sample_set.nb_points; n++) {
107 out << associated_clusters[n];
108 for(int d = 0; d < sample_set.dim; d++) {
109 out << " " << sample_set.points[n][d];
116 ofstream out("clusters.dat");
117 for(int k = 0 ; k < clusterer._nb_clusters; k++) {
119 for(int d = 0; d < sample_set.dim; d++) {
120 out << " " << clusterer._cluster_means[k][d];
122 for(int d = 0; d < sample_set.dim; d++) {
123 out << " " << 2 * sqrt(clusterer._cluster_var[k][d]);
129 delete[] associated_clusters;
131 glp_free_env(); // I do not want valgrind to complain