}
scalar_t Clusterer::distance_to_centroid(scalar_t *x, int k) {
+ // We take the variance into account + the normalization term. This
+ // is between k-mean and EM with a diagonal covariance
scalar_t dist = 0;
for(int d = 0; d < _dim; d++) {
dist += sq(_cluster_means[k][d] - x[d]) / (2 * _cluster_var[k][d]);
// association coefficients is equal to 1.0
for(int n = 1; n <= nb_points; n++) {
- glp_set_row_bnds(lp, n, GLP_FX, 1.0, 1.0);
+ int row = n;
+ glp_set_row_bnds(lp, row, GLP_FX, 1.0, 1.0);
}
// (B) For each pair cluster/class, the sum of the association
for(int k = 1; k <= _nb_clusters; k++) {
for(int n = 1; n <= nb_points; n++) {
- int r = n + nb_points * (k - 1);
-
- // scalar_t dist = 0;
-
- // for(int d = 0; d < _dim; d++) {
- // dist += sq(_cluster_means[k-1][d] - points[n-1][d]) / (2 * _cluster_var[k-1][d]);
- // dist += 0.5 * log(_cluster_var[k-1][d]);
- // }
+ int col = n + nb_points * (k - 1);
// The LP weight on this association coefficient for the global
// loss is the normalized distance of that sample to the
// centroid of that cluster
- glp_set_obj_coef(lp, r, distance_to_centroid(points[n-1], k-1));
+ glp_set_obj_coef(lp, col, distance_to_centroid(points[n-1], k-1));
- // And this association coefficient is in [0,1]
+ // And all the association coefficient is in [0,1]
- glp_set_col_bnds(lp, r, GLP_DB, 0.0, 1.0);
+ glp_set_col_bnds(lp, col, GLP_DB, 0.0, 1.0);
}
}
for(int d = 0; d < _dim; d++) {
if(sum_gamma >= 2) {
- _cluster_var[k][d] = (_cluster_var[k][d] - sq(_cluster_means[k][d]) / sum_gamma) / (sum_gamma - 1);
+ _cluster_var[k][d] =
+ (_cluster_var[k][d] - sq(_cluster_means[k][d]) / sum_gamma) / (sum_gamma - 1);
+ _cluster_var[k][d] = max(scalar_t(min_cluster_variance), _cluster_var[k][d]);
} else {
_cluster_var[k][d] = 1;
}
- _cluster_var[k][d] = max(0.01, _cluster_var[k][d]);
+
_cluster_means[k][d] /= sum_gamma;
}
}