2 * clueless-kmeans is a variant of k-means which enforces balanced
3 * distribution of classes in every cluster
5 * Copyright (c) 2013 Idiap Research Institute, http://www.idiap.ch/
6 * Written by Francois Fleuret <francois.fleuret@idiap.ch>
8 * This file is part of clueless-kmeans.
10 * clueless-kmeans is free software: you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * version 3 as published by the Free Software Foundation.
14 * clueless-kmeans is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with selector. If not, see <http://www.gnu.org/licenses/>.
35 STANDARD_LP_ASSOCIATION,
36 UNINFORMATIVE_LP_ASSOCIATION
39 const static int max_nb_iterations = 10;
40 const static scalar_t min_iteration_improvement = 0.999;
41 const static scalar_t min_cluster_variance = 0.01f;
46 scalar_t **_cluster_means, **_cluster_var;
48 scalar_t distance_to_centroid(scalar_t *x, int k);
50 void initialize_clusters(int nb_points, scalar_t **points);
52 // Standard hard k-means association
54 scalar_t baseline_cluster_association(int nb_points, scalar_t **points,
55 int nb_classes, int *labels,
58 // Standard k-means association implemented as an LP optimization
60 scalar_t baseline_lp_cluster_association(int nb_points, scalar_t **points,
61 int nb_classes, int *labels,
64 // Association under the constraint that each cluster gets the same
65 // class proportions as the overall training set
67 scalar_t uninformative_lp_cluster_association(int nb_points, scalar_t **points,
68 int nb_classes, int *labels,
71 void update_clusters(int nb_points, scalar_t **points, scalar_t **gamma);
78 int nb_clusters, int dim,
79 int nb_points, scalar_t **points,
80 int nb_classes, int *labels,
81 // This last array returns for each sample to what
82 // cluster it was associated. It can be null.
83 int *cluster_associations);
85 int cluster(scalar_t *point);