2 * clueless-kmean is a variant of k-mean which enforces balanced
3 * distribution of classes in every cluster
5 * Copyright (c) 2013 Idiap Research Institute, http://www.idiap.ch/
6 * Written by Francois Fleuret <francois.fleuret@idiap.ch>
8 * This file is part of clueless-kmean.
10 * clueless-kmean is free software: you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * version 3 as published by the Free Software Foundation.
14 * clueless-kmean is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with selector. If not, see <http://www.gnu.org/licenses/>.
35 STANDARD_LP_ASSOCIATION,
36 UNINFORMATIVE_LP_ASSOCIATION
39 const static int max_nb_iterations = 10;
40 const static scalar_t min_iteration_improvement = 0.999;
44 scalar_t **_cluster_means, **_cluster_var;
46 scalar_t distance_to_centroid(scalar_t *x, int k);
48 void initialize_clusters(int nb_points, scalar_t **points);
50 // Standard hard k-mean association
52 scalar_t baseline_cluster_association(int nb_points, scalar_t **points,
53 int nb_classes, int *labels,
56 // Standard k-mean association implemented as an LP optimization
58 scalar_t baseline_lp_cluster_association(int nb_points, scalar_t **points,
59 int nb_classes, int *labels,
62 // Association under the constraint that each cluster gets the same
63 // class proportions as the overall training set
65 scalar_t uninformative_lp_cluster_association(int nb_points, scalar_t **points,
66 int nb_classes, int *labels,
69 void update_clusters(int nb_points, scalar_t **points, scalar_t **gamma);
76 int nb_clusters, int dim,
77 int nb_points, scalar_t **points,
78 int nb_classes, int *labels,
79 // This last array returns for each sample to what
80 // cluster it was associated. It can be null.
81 int *cluster_associations);
83 int cluster(scalar_t *point);