Added distance_to_centroid.

author Francois Fleuret <francois@fleuret.org>

Wed, 27 Mar 2013 12:04:56 +0000 (13:04 +0100)

committer Francois Fleuret <francois@fleuret.org>

Wed, 27 Mar 2013 12:04:56 +0000 (13:04 +0100)
author Francois Fleuret <francois@fleuret.org>
Wed, 27 Mar 2013 12:04:56 +0000 (13:04 +0100)
committer Francois Fleuret <francois@fleuret.org>
Wed, 27 Mar 2013 12:04:56 +0000 (13:04 +0100)
diff --git a/clusterer.cc b/clusterer.cc

index 9c5e7cb..3c33f3c 100644 (file)
--- a/clusterer.cc
+++ b/clusterer.cc
@@ -34,6 +34,31 @@ Clusterer::~Clusterer() {
    deallocate_array<scalar_t>(_cluster_var);
  }
  
+scalar_t Clusterer::distance_to_centroid(scalar_t *x, int k) {
+  scalar_t dist = 0;
+  for(int d = 0; d < _dim; d++) {
+    dist += sq(_cluster_means[k][d] - x[d]) / (2 * _cluster_var[k][d]);
+    dist += 0.5 * log(_cluster_var[k][d]);
+    ASSERT(!isnan(dist) && !isinf(dist));
+  }
+  return dist;
+}
+
+void Clusterer::initialize_clusters(int nb_points, scalar_t **points) {
+  int *used = new int[nb_points];
+  for(int k = 0; k < nb_points; k++) { used[k] = 0; }
+  for(int k = 0; k < _nb_clusters; k++) {
+    int l;
+    do { l = int(drand48() * nb_points); } while(used[l]);
+    for(int d = 0; d < _dim; d++) {
+      _cluster_means[k][d] = points[l][d];
+      _cluster_var[k][d] = 1.0;
+    }
+    used[l] = 1;
+  }
+  delete[] used;
+}
+
  scalar_t Clusterer::baseline_cluster_association(int nb_points, scalar_t **points,
                                                   int nb_classes, int *labels,
                                                   scalar_t **gamma)  {
@@ -43,14 +68,7 @@ scalar_t Clusterer::baseline_cluster_association(int nb_points, scalar_t **point
    for(int n = 0; n < nb_points; n++) {
      scalar_t lowest_dist = 0;
      for(int k = 0; k < _nb_clusters; k++) {
-      scalar_t dist = 0;
-
-      for(int d = 0; d < _dim; d++) {
-        dist += sq(_cluster_means[k][d] - points[n][d]) / (2 * _cluster_var[k][d]);
-        dist += 0.5 * log(_cluster_var[k][d]);
-        ASSERT(!isnan(dist) && !isinf(dist));
-      }
-
+      scalar_t dist = distance_to_centroid(points[n], k);
        if(k == 0 || dist <= lowest_dist) {
          lowest_dist = dist;
          associated_clusters[n] = k;
@@ -96,15 +114,7 @@ scalar_t Clusterer::baseline_lp_cluster_association(int nb_points, scalar_t **po
    for(int k = 1; k <= _nb_clusters; k++) {
      for(int n = 1; n <= nb_points; n++) {
        int i = n + nb_points * (k - 1);
-
-      scalar_t dist = 0;
-
-      for(int d = 0; d < _dim; d++) {
-        dist += sq(_cluster_means[k-1][d] - points[n-1][d]) / (2 * _cluster_var[k-1][d]);
-        dist += 0.5 * log(_cluster_var[k-1][d]);
-      }
-
-      glp_set_obj_coef(lp, i, dist);
+      glp_set_obj_coef(lp, i, distance_to_centroid(points[n-1], k-1));
        glp_set_col_bnds(lp, i, GLP_DB, 0.0, 1.0);
      }
    }
@@ -167,18 +177,18 @@ scalar_t Clusterer::uninformative_lp_cluster_association(int nb_points, scalar_t
    glp_set_prob_name(lp, "uninformative_lp_cluster_association");
    glp_set_obj_dir(lp, GLP_MIN);
  
-  // We have one constraint per points and one per cluster/class
+  // We have one constraint per point and one per cluster/class
  
    glp_add_rows(lp, nb_points + _nb_clusters * nb_classes);
  
    // (A) For each point, the constraint is that the sum of its
-  // association coefficients will be equal to 1.0
+  // association coefficients is equal to 1.0
  
    for(int n = 1; n <= nb_points; n++) {
      glp_set_row_bnds(lp, n, GLP_FX, 1.0, 1.0);
    }
  
-  // (B) For each cluster and each class, the sum of the association
+  // (B) For each pair cluster/class, the sum of the association
    // coefficient to this cluster for this class is equal to the number
    // of sample of that class, divided by the number of clusters
  
@@ -199,17 +209,18 @@ scalar_t Clusterer::uninformative_lp_cluster_association(int nb_points, scalar_t
      for(int n = 1; n <= nb_points; n++) {
        int r = n + nb_points * (k - 1);
  
-      scalar_t dist = 0;
+      // scalar_t dist = 0;
  
-      for(int d = 0; d < _dim; d++) {
-        dist += sq(_cluster_means[k-1][d] - points[n-1][d]) / (2 * _cluster_var[k-1][d]);
-        dist += 0.5 * log(_cluster_var[k-1][d]);
-      }
+      // for(int d = 0; d < _dim; d++) {
+        // dist += sq(_cluster_means[k-1][d] - points[n-1][d]) / (2 * _cluster_var[k-1][d]);
+        // dist += 0.5 * log(_cluster_var[k-1][d]);
+      // }
  
-      // The LP weight on this association coefficient is the distance
-      // (normalized) of that sample to the centroid of that cluster
+      // The LP weight on this association coefficient for the global
+      // loss is the normalized distance of that sample to the
+      // centroid of that cluster
  
-      glp_set_obj_coef(lp, r, dist);
+      glp_set_obj_coef(lp, r, distance_to_centroid(points[n-1], k-1));
  
        // And this association coefficient is in [0,1]
  
@@ -310,21 +321,6 @@ void Clusterer::update_clusters(int nb_points, scalar_t **points, scalar_t **gam
    }
  }
  
-void Clusterer::initialize_clusters(int nb_points, scalar_t **points) {
-  int *used = new int[nb_points];
-  for(int k = 0; k < nb_points; k++) { used[k] = 0; }
-  for(int k = 0; k < _nb_clusters; k++) {
-    int l;
-    do { l = int(drand48() * nb_points); } while(used[l]);
-    for(int d = 0; d < _dim; d++) {
-      _cluster_means[k][d] = points[l][d];
-      _cluster_var[k][d] = 1.0;
-    }
-    used[l] = 1;
-  }
-  delete[] used;
-}
-
  void Clusterer::train(int mode,
                        int nb_clusters, int dim,
                        int nb_points, scalar_t **points,
diff --git a/clusterer.h b/clusterer.h

index b45f8c3..065dd3f 100644 (file)
--- a/clusterer.h
+++ b/clusterer.h
@@ -39,6 +39,8 @@ public:
    int _dim;
    scalar_t **_cluster_means, **_cluster_var;
  
+  scalar_t distance_to_centroid(scalar_t *x, int k);
+
    void initialize_clusters(int nb_points, scalar_t **points);
  
    // Does the standard hard k-mean association
author	Francois Fleuret <francois@fleuret.org>
	Wed, 27 Mar 2013 12:04:56 +0000 (13:04 +0100)
committer	Francois Fleuret <francois@fleuret.org>
	Wed, 27 Mar 2013 12:04:56 +0000 (13:04 +0100)
clusterer.cc		patch \| blob \| history
clusterer.h		patch \| blob \| history