Cosmetics.

author Francois Fleuret <francois@fleuret.org>

Wed, 27 Mar 2013 20:11:48 +0000 (21:11 +0100)

committer Francois Fleuret <francois@fleuret.org>

Wed, 27 Mar 2013 20:11:48 +0000 (21:11 +0100)
author Francois Fleuret <francois@fleuret.org>
Wed, 27 Mar 2013 20:11:48 +0000 (21:11 +0100)
committer Francois Fleuret <francois@fleuret.org>
Wed, 27 Mar 2013 20:11:48 +0000 (21:11 +0100)
diff --git a/clusterer.cc b/clusterer.cc

index 3c33f3c..04a9af4 100644 (file)
--- a/clusterer.cc
+++ b/clusterer.cc
@@ -35,6 +35,8 @@ Clusterer::~Clusterer() {
  }
  
  scalar_t Clusterer::distance_to_centroid(scalar_t *x, int k) {
+  // We take the variance into account + the normalization term. This
+  // is between k-mean and EM with a diagonal covariance
    scalar_t dist = 0;
    for(int d = 0; d < _dim; d++) {
      dist += sq(_cluster_means[k][d] - x[d]) / (2 * _cluster_var[k][d]);
@@ -185,7 +187,8 @@ scalar_t Clusterer::uninformative_lp_cluster_association(int nb_points, scalar_t
    // association coefficients is equal to 1.0
  
    for(int n = 1; n <= nb_points; n++) {
-    glp_set_row_bnds(lp, n, GLP_FX, 1.0, 1.0);
+    int row = n;
+    glp_set_row_bnds(lp, row, GLP_FX, 1.0, 1.0);
    }
  
    // (B) For each pair cluster/class, the sum of the association
@@ -207,24 +210,17 @@ scalar_t Clusterer::uninformative_lp_cluster_association(int nb_points, scalar_t
  
    for(int k = 1; k <= _nb_clusters; k++) {
      for(int n = 1; n <= nb_points; n++) {
-      int r = n + nb_points * (k - 1);
-
-      // scalar_t dist = 0;
-
-      // for(int d = 0; d < _dim; d++) {
-        // dist += sq(_cluster_means[k-1][d] - points[n-1][d]) / (2 * _cluster_var[k-1][d]);
-        // dist += 0.5 * log(_cluster_var[k-1][d]);
-      // }
+      int col = n + nb_points * (k - 1);
  
        // The LP weight on this association coefficient for the global
        // loss is the normalized distance of that sample to the
        // centroid of that cluster
  
-      glp_set_obj_coef(lp, r, distance_to_centroid(points[n-1], k-1));
+      glp_set_obj_coef(lp, col, distance_to_centroid(points[n-1], k-1));
  
-      // And this association coefficient is in [0,1]
+      // And all the association coefficient is in [0,1]
  
-      glp_set_col_bnds(lp, r, GLP_DB, 0.0, 1.0);
+      glp_set_col_bnds(lp, col, GLP_DB, 0.0, 1.0);
      }
    }
  
@@ -311,11 +307,13 @@ void Clusterer::update_clusters(int nb_points, scalar_t **points, scalar_t **gam
  
      for(int d = 0; d < _dim; d++) {
        if(sum_gamma >= 2) {
-        _cluster_var[k][d] = (_cluster_var[k][d] - sq(_cluster_means[k][d]) / sum_gamma) / (sum_gamma - 1);
+        _cluster_var[k][d] =
+          (_cluster_var[k][d] - sq(_cluster_means[k][d]) / sum_gamma) / (sum_gamma - 1);
+        _cluster_var[k][d] = max(scalar_t(min_cluster_variance), _cluster_var[k][d]);
        } else {
          _cluster_var[k][d] = 1;
        }
-      _cluster_var[k][d] = max(0.01, _cluster_var[k][d]);
+
        _cluster_means[k][d] /= sum_gamma;
      }
    }
diff --git a/clusterer.h b/clusterer.h

index f73d6c7..ad0c58f 100644 (file)
--- a/clusterer.h
+++ b/clusterer.h
@@ -38,6 +38,7 @@ public:
  
    const static int max_nb_iterations = 10;
    const static scalar_t min_iteration_improvement = 0.999;
+  const static scalar_t min_cluster_variance = 0.01f;
  
    int _nb_clusters;
    int _dim;
author	Francois Fleuret <francois@fleuret.org>
	Wed, 27 Mar 2013 20:11:48 +0000 (21:11 +0100)
committer	Francois Fleuret <francois@fleuret.org>
	Wed, 27 Mar 2013 20:11:48 +0000 (21:11 +0100)
clusterer.cc		patch \| blob \| history
clusterer.h		patch \| blob \| history