misc.cc

   1 /*
   2  *  clueless-kmeans is a variant of k-means which enforces balanced
   3  *  distribution of classes in every cluster
   4  *
   5  *  Copyright (c) 2013 Idiap Research Institute, http://www.idiap.ch/
   6  *  Written by Francois Fleuret <francois.fleuret@idiap.ch>
   7  *
   8  *  This file is part of clueless-kmeans.
   9  *
  10  *  clueless-kmeans is free software: you can redistribute it and/or
  11  *  modify it under the terms of the GNU General Public License
  12  *  version 3 as published by the Free Software Foundation.
  13  *
  14  *  clueless-kmeans is distributed in the hope that it will be useful,
  15  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  *  General Public License for more details.
  18  *
  19  *  You should have received a copy of the GNU General Public License
  20  *  along with selector.  If not, see <http://www.gnu.org/licenses/>.
  21  *
  22  */
  23
  24 #include <fstream>
  25
  26 using namespace std;
  27
  28 #include "misc.h"
  29
  30 char *next_word(char *buffer, char *r, int buffer_size) {
  31   char *s;
  32   s = buffer;
  33
  34   if(r != 0) {
  35     while((*r == ' ') || (*r == '\t') || (*r == ',')) r++;
  36     if(*r == '"') {
  37       r++;
  38       while((*r != '"') && (*r != '\0') &&
  39             (s<buffer+buffer_size-1))
  40         *s++ = *r++;
  41       if(*r == '"') r++;
  42     } else {
  43       while((*r != '\r') && (*r != '\n') && (*r != '\0') &&
  44             (*r != '\t') && (*r != ' ') && (*r != ',')) {
  45         if(s == buffer + buffer_size) {
  46           cerr << "Buffer overflow in next_word." << endl;
  47           exit(1);
  48         }
  49         *s++ = *r++;
  50       }
  51     }
  52
  53     while((*r == ' ') || (*r == '\t') || (*r == ',')) r++;
  54     if((*r == '\0') || (*r=='\r') || (*r=='\n')) r = 0;
  55   }
  56   *s = '\0';
  57
  58   return r;
  59 }
  60
  61 scalar_t discrete_entropy(int *n, int nb) {
  62   scalar_t s = 0, t = 0;
  63   for(int k = 0; k < nb; k++) if(n[k] > 0) {
  64     s += n[k] * log(scalar_t(n[k]));
  65     t += n[k];
  66   }
  67   return (log(t) - s/scalar_t(t))/log(2.0);
  68 }
  69
  70 void random_permutation(int *val, int nb) {
  71   for(int k = 0; k < nb; k++) val[k] = k;
  72   int i, t;
  73   for(int k = 0; k < nb - 1; k++) {
  74     i = int(drand48() * (nb - k)) + k;
  75     t = val[i];
  76     val[i] = val[k];
  77     val[k] = t;
  78   }
  79 }
  80
  81 void tag_subset(bool *val, int nb_total, int nb_to_tag) {
  82   ASSERT(nb_to_tag <= nb_total);
  83   int index[nb_total];
  84   random_permutation(index, nb_total);
  85   for(int n = 0; n < nb_total; n++) val[n] = false;
  86   for(int n = 0; n < nb_to_tag; n++) val[index[n]] = true;
  87 }
  88
  89 int compare_couple(const void *a, const void *b) {
  90   if(((Couple *) a)->value < ((Couple *) b)->value) return -1;
  91   else if(((Couple *) a)->value > ((Couple *) b)->value) return 1;
  92   else return 0;
  93 }