+++ /dev/null
-/*
- * clueless-kmean is a variant of k-mean which enforces balanced
- * distribution of classes in every cluster
- *
- * Copyright (c) 2013 Idiap Research Institute, http://www.idiap.ch/
- * Written by Francois Fleuret <francois.fleuret@idiap.ch>
- *
- * This file is part of clueless-kmean.
- *
- * clueless-kmean is free software: you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 3 as published by the Free Software Foundation.
- *
- * clueless-kmean is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with selector. If not, see <http://www.gnu.org/licenses/>.
- *
- */
-
-#include <iostream>
-#include <fstream>
-#include <stdio.h>
-#include <stdlib.h>
-#include <float.h>
-#include <glpk.h>
-
-using namespace std;
-
-#include "misc.h"
-#include "arrays.h"
-#include "sample_set.h"
-#include "clusterer.h"
-
-void generate_toy_problem(SampleSet *sample_set) {
- int dim = 2;
- int nb_points = 1000;
-
- sample_set->resize(dim, nb_points);
- sample_set->nb_classes = 2;
-
- for(int n = 0; n < nb_points; n++) {
- sample_set->labels[n] = int(drand48() * 2);
- if(sample_set->labels[n] == 0) {
- sample_set->points[n][0] = (2 * drand48() - 1) * 0.8;
- sample_set->points[n][1] = - 0.6 + (2 * drand48() - 1) * 0.4;
- } else {
- sample_set->points[n][0] = (2 * drand48() - 1) * 0.4;
- sample_set->points[n][1] = 0.6 + (2 * drand48() - 1) * 0.4;
- }
- }
-}
-
-int main(int argc, char **argv) {
- SampleSet sample_set;
- Clusterer clusterer;
- int nb_clusters = 3;
-
- generate_toy_problem(&sample_set);
-
- {
- ofstream out("points.dat");
- for(int n = 0; n < sample_set.nb_points; n++) {
- out << sample_set.labels[n];
- for(int d = 0; d < sample_set.dim; d++) {
- out << " " << sample_set.points[n][d];
- }
- out << endl;
- }
- }
-
- int *associated_clusters = new int[sample_set.nb_points];
-
- glp_term_out(0);
-
- int mode;
-
- if(argc == 2) {
- if(strcmp(argv[1], "standard") == 0) {
- mode = Clusterer::STANDARD_LP_ASSOCIATION;
- } else if(strcmp(argv[1], "clueless") == 0) {
- mode = Clusterer::UNINFORMATIVE_LP_ASSOCIATION;
- } else {
- cerr << "Unknown association mode " << argv[1] << endl;
- exit(EXIT_FAILURE);
- }
- } else {
- cerr << "Usage: " << argv[0] << " standard|clueless" << endl;
- exit(EXIT_FAILURE);
- }
-
- clusterer.train(mode,
- nb_clusters,
- sample_set.dim,
- sample_set.nb_points, sample_set.points,
- sample_set.nb_classes, sample_set.labels,
- associated_clusters);
-
- {
- ofstream out("associated_clusters.dat");
- for(int n = 0; n < sample_set.nb_points; n++) {
- out << associated_clusters[n];
- for(int d = 0; d < sample_set.dim; d++) {
- out << " " << sample_set.points[n][d];
- }
- out << endl;
- }
- }
-
- {
- ofstream out("clusters.dat");
- for(int k = 0 ; k < clusterer._nb_clusters; k++) {
- out << k;
- for(int d = 0; d < sample_set.dim; d++) {
- out << " " << clusterer._cluster_means[k][d];
- }
- for(int d = 0; d < sample_set.dim; d++) {
- out << " " << 2 * sqrt(clusterer._cluster_var[k][d]);
- }
- out << endl;
- }
- }
-
- delete[] associated_clusters;
-
- glp_free_env(); // I do not want valgrind to complain
-}