2 * clueless-kmeans is a variant of k-means which enforces balanced
3 * distribution of classes in every cluster
5 * Copyright (c) 2013 Idiap Research Institute, http://www.idiap.ch/
6 * Written by Francois Fleuret <francois.fleuret@idiap.ch>
8 * This file is part of clueless-kmeans.
10 * clueless-kmeans is free software: you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * version 3 as published by the Free Software Foundation.
14 * clueless-kmeans is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with selector. If not, see <http://www.gnu.org/licenses/>.
30 char *next_word(char *buffer, char *r, int buffer_size) {
35 while((*r == ' ') || (*r == '\t') || (*r == ',')) r++;
38 while((*r != '"') && (*r != '\0') &&
39 (s<buffer+buffer_size-1))
43 while((*r != '\r') && (*r != '\n') && (*r != '\0') &&
44 (*r != '\t') && (*r != ' ') && (*r != ',')) {
45 if(s == buffer + buffer_size) {
46 cerr << "Buffer overflow in next_word." << endl;
53 while((*r == ' ') || (*r == '\t') || (*r == ',')) r++;
54 if((*r == '\0') || (*r=='\r') || (*r=='\n')) r = 0;
61 scalar_t discrete_entropy(int *n, int nb) {
62 scalar_t s = 0, t = 0;
63 for(int k = 0; k < nb; k++) if(n[k] > 0) {
64 s += n[k] * log(scalar_t(n[k]));
67 return (log(t) - s/scalar_t(t))/log(2.0);
70 void random_permutation(int *val, int nb) {
71 for(int k = 0; k < nb; k++) val[k] = k;
73 for(int k = 0; k < nb - 1; k++) {
74 i = int(drand48() * (nb - k)) + k;
81 void tag_subset(bool *val, int nb_total, int nb_to_tag) {
82 ASSERT(nb_to_tag <= nb_total);
84 random_permutation(index, nb_total);
85 for(int n = 0; n < nb_total; n++) val[n] = false;
86 for(int n = 0; n < nb_to_tag; n++) val[index[n]] = true;
89 int compare_couple(const void *a, const void *b) {
90 if(((Couple *) a)->value < ((Couple *) b)->value) return -1;
91 else if(((Couple *) a)->value > ((Couple *) b)->value) return 1;