2 * clueless-kmeans is a variant of k-means which enforces balanced
3 * distribution of classes in every cluster
5 * Copyright (c) 2013 Idiap Research Institute, http://www.idiap.ch/
6 * Written by Francois Fleuret <francois.fleuret@idiap.ch>
8 * This file is part of clueless-kmeans.
10 * clueless-kmeans is free software: you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * version 3 as published by the Free Software Foundation.
14 * clueless-kmeans is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with selector. If not, see <http://www.gnu.org/licenses/>.
36 // Same, implemented as a LP problem for sanity check
37 STANDARD_LP_ASSOCIATION,
38 // Criterion forcing to have the same distribution of classes in
40 UNINFORMATIVE_LP_ASSOCIATION,
41 // Criterion forcing to have the same number of samples of each
42 // class in all clusters
43 UNINFORMATIVE_LP_ASSOCIATION_ABSOLUTE
46 const static int max_nb_iterations = 10;
47 const static scalar_t min_iteration_improvement = 0.999;
48 const static scalar_t min_cluster_variance = 0.01f;
53 scalar_t **_cluster_means, **_cluster_var;
55 scalar_t distance_to_centroid(scalar_t *x, int k);
57 void initialize_clusters(int nb_points, scalar_t **points);
59 // Standard hard k-means association
61 scalar_t baseline_cluster_association(int nb_points, scalar_t **points,
62 int nb_classes, int *labels,
65 // Standard k-means association implemented as an LP optimization
67 scalar_t baseline_lp_cluster_association(int nb_points, scalar_t **points,
68 int nb_classes, int *labels,
71 // Association under the constraint that each cluster gets the same
72 // class proportions as the overall training set
74 scalar_t uninformative_lp_cluster_association(int nb_points, scalar_t **points,
75 int nb_classes, int *labels,
77 int absolute_proportion);
79 void update_clusters(int nb_points, scalar_t **points, scalar_t **gamma);
86 int nb_clusters, int dim,
87 int nb_points, scalar_t **points,
88 int nb_classes, int *labels,
89 // This last array returns for each sample to what
90 // cluster it was associated. It can be null.
91 int *cluster_associations);
93 int cluster(scalar_t *point);