projects
/
clueless-kmeans.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
d0c772d
)
Added comments in the main method.
author
Francois Fleuret
<francois@fleuret.org>
Wed, 27 Mar 2013 11:48:50 +0000
(12:48 +0100)
committer
Francois Fleuret
<francois@fleuret.org>
Wed, 27 Mar 2013 11:48:50 +0000
(12:48 +0100)
clusterer.cc
patch
|
blob
|
history
diff --git
a/clusterer.cc
b/clusterer.cc
index
47d9ac3
..
9c5e7cb
100644
(file)
--- a/
clusterer.cc
+++ b/
clusterer.cc
@@
-167,12
+167,21
@@
scalar_t Clusterer::uninformative_lp_cluster_association(int nb_points, scalar_t
glp_set_prob_name(lp, "uninformative_lp_cluster_association");
glp_set_obj_dir(lp, GLP_MIN);
glp_set_prob_name(lp, "uninformative_lp_cluster_association");
glp_set_obj_dir(lp, GLP_MIN);
+ // We have one constraint per points and one per cluster/class
+
glp_add_rows(lp, nb_points + _nb_clusters * nb_classes);
glp_add_rows(lp, nb_points + _nb_clusters * nb_classes);
+ // (A) For each point, the constraint is that the sum of its
+ // association coefficients will be equal to 1.0
+
for(int n = 1; n <= nb_points; n++) {
glp_set_row_bnds(lp, n, GLP_FX, 1.0, 1.0);
}
for(int n = 1; n <= nb_points; n++) {
glp_set_row_bnds(lp, n, GLP_FX, 1.0, 1.0);
}
+ // (B) For each cluster and each class, the sum of the association
+ // coefficient to this cluster for this class is equal to the number
+ // of sample of that class, divided by the number of clusters
+
for(int k = 1; k <= _nb_clusters; k++) {
for(int c = 1; c <= nb_classes; c++) {
int row = nb_points + (k - 1) * nb_classes + c;
for(int k = 1; k <= _nb_clusters; k++) {
for(int c = 1; c <= nb_classes; c++) {
int row = nb_points + (k - 1) * nb_classes + c;
@@
-181,6
+190,9
@@
scalar_t Clusterer::uninformative_lp_cluster_association(int nb_points, scalar_t
}
}
}
}
+ // Each one of the constraints above involve a linear combination of
+ // all the association coefficients
+
glp_add_cols(lp, nb_points * _nb_clusters);
for(int k = 1; k <= _nb_clusters; k++) {
glp_add_cols(lp, nb_points * _nb_clusters);
for(int k = 1; k <= _nb_clusters; k++) {
@@
-194,13
+206,24
@@
scalar_t Clusterer::uninformative_lp_cluster_association(int nb_points, scalar_t
dist += 0.5 * log(_cluster_var[k-1][d]);
}
dist += 0.5 * log(_cluster_var[k-1][d]);
}
+ // The LP weight on this association coefficient is the distance
+ // (normalized) of that sample to the centroid of that cluster
+
glp_set_obj_coef(lp, r, dist);
glp_set_obj_coef(lp, r, dist);
+
+ // And this association coefficient is in [0,1]
+
glp_set_col_bnds(lp, r, GLP_DB, 0.0, 1.0);
}
}
int l = 1;
glp_set_col_bnds(lp, r, GLP_DB, 0.0, 1.0);
}
}
int l = 1;
+ // We build the matrix of the LP coefficients
+
+ // The sums of the association coefficients per points for the
+ // constraints (A) above.
+
for(int n = 1; n <= nb_points; n++) {
for(int k = 1; k <= _nb_clusters; k++) {
int row = n;
for(int n = 1; n <= nb_points; n++) {
for(int k = 1; k <= _nb_clusters; k++) {
int row = n;
@@
-211,6
+234,9
@@
scalar_t Clusterer::uninformative_lp_cluster_association(int nb_points, scalar_t
}
}
}
}
+ // And the sums of coefficients for each pair class/cluster for
+ // constraint (B) above.
+
for(int k = 1; k <= _nb_clusters; k++) {
for(int c = 1; c <= nb_classes; c++) {
int row = nb_points + (k - 1) * nb_classes + c;
for(int k = 1; k <= _nb_clusters; k++) {
for(int c = 1; c <= nb_classes; c++) {
int row = nb_points + (k - 1) * nb_classes + c;
@@
-229,8
+255,12
@@
scalar_t Clusterer::uninformative_lp_cluster_association(int nb_points, scalar_t
glp_load_matrix(lp, nb_coeffs, ia, ja, ar);
glp_load_matrix(lp, nb_coeffs, ia, ja, ar);
+ // Now a miracle occurs
+
glp_simplex(lp, NULL);
glp_simplex(lp, NULL);
+ // We retrieve the result
+
scalar_t total_dist = glp_get_obj_val(lp);
for(int k = 1; k <= _nb_clusters; k++) {
scalar_t total_dist = glp_get_obj_val(lp);
for(int k = 1; k <= _nb_clusters; k++) {