2 * clueless-kmean is a variant of k-mean which enforces balanced
3 * distribution of classes in every cluster
5 * Copyright (c) 2013 Idiap Research Institute, http://www.idiap.ch/
6 * Written by Francois Fleuret <francois.fleuret@idiap.ch>
8 * This file is part of clueless-kmean.
10 * clueless-kmean is free software: you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * version 3 as published by the Free Software Foundation.
14 * clueless-kmean is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with selector. If not, see <http://www.gnu.org/licenses/>.
35 #include "sample_set.h"
36 #include "clusterer.h"
38 void generate_toy_problem(SampleSet *sample_set) {
42 sample_set->resize(dim, nb_points);
43 sample_set->nb_classes = 2;
45 for(int n = 0; n < nb_points; n++) {
46 sample_set->labels[n] = int(drand48() * 2);
47 if(sample_set->labels[n] == 0) {
48 sample_set->points[n][0] = (2 * drand48() - 1) * 0.8;
49 sample_set->points[n][1] = - 0.6 + (2 * drand48() - 1) * 0.4;
51 sample_set->points[n][0] = (2 * drand48() - 1) * 0.4;
52 sample_set->points[n][1] = 0.6 + (2 * drand48() - 1) * 0.4;
57 int main(int argc, char **argv) {
62 generate_toy_problem(&sample_set);
65 ofstream out("points.dat");
66 for(int n = 0; n < sample_set.nb_points; n++) {
67 out << sample_set.labels[n];
68 for(int d = 0; d < sample_set.dim; d++) {
69 out << " " << sample_set.points[n][d];
75 int *associated_clusters = new int[sample_set.nb_points];
79 clusterer.train(nb_clusters, sample_set.dim,
80 sample_set.nb_points, sample_set.points,
81 sample_set.nb_classes, sample_set.labels,
85 ofstream out("associated_clusters.dat");
86 for(int n = 0; n < sample_set.nb_points; n++) {
87 out << associated_clusters[n];
88 for(int d = 0; d < sample_set.dim; d++) {
89 out << " " << sample_set.points[n][d];
96 ofstream out("clusters.dat");
97 for(int k = 0 ; k < clusterer._nb_clusters; k++) {
99 for(int d = 0; d < sample_set.dim; d++) {
100 out << " " << clusterer._cluster_means[k][d];
102 for(int d = 0; d < sample_set.dim; d++) {
103 out << " " << 2 * sqrt(clusterer._cluster_var[k][d]);
109 delete[] associated_clusters;