for(int c = 1; c <= nb_classes; c++) {
int row = nb_points + (k - 1) * nb_classes + c;
scalar_t tau = nb_samples_per_class[c-1] / scalar_t(_nb_clusters);
- // cout << "tau " << k << " " << c << " " << tau << endl;
glp_set_row_bnds(lp, row, GLP_FX, tau, tau);
}
}
}
}
- // { // ******************************* START ***************************
-// #warning Test code added on 2013 Feb 07 20:32:05
- // // for(int n = 0; n < nb_points; n++) {
- // // scalar_t sum = 0;
- // // for(int k = 0; k < _nb_clusters; k++) {
- // // ASSERT(gamma[n][k] >= 0 && gamma[n][k] <= 1);
- // // sum += gamma[n][k];
- // // }
- // // cout << sum << endl;
- // // }
-
- // scalar_t *sum_gamma = new scalar_t[nb_classes];
-
- // for(int k = 0; k < _nb_clusters; k++) {
- // for(int c = 0; c < nb_classes; c++) { sum_gamma[c] = 0.0; }
- // for(int n = 0; n < nb_points; n++) {
- // sum_gamma[labels[n]] += gamma[n][k];
- // }
- // cout << "CLUSTER" << k;
- // for(int c = 0; c < nb_classes; c++) {
- // cout << " " << sum_gamma[c];
- // }
- // cout << endl;
- // }
-
- // delete sum_gamma;
-
- // } // ******************************** END ****************************
-
delete[] nb_samples_per_class;
delete[] ia;
delete[] ja;
return total_dist;
}
-void Clusterer::baseline_update_clusters(int nb_points, scalar_t **points, scalar_t **gamma) {
+void Clusterer::update_clusters(int nb_points, scalar_t **points, scalar_t **gamma) {
for(int k = 0; k < _nb_clusters; k++) {
for(int d = 0; d < _dim; d++) {
delete[] used;
}
-void Clusterer::train(int nb_clusters, int dim,
+void Clusterer::train(int mode,
+ int nb_clusters, int dim,
int nb_points, scalar_t **points,
int nb_classes, int *labels,
int *cluster_associations) {
do {
pred_total_distance = total_distance;
+
+ switch(mode) {
+
+ case STANDARD_ASSOCIATION:
+ total_distance =
+ baseline_cluster_association(nb_points, points, nb_classes, labels, gammas);
+ break;
+
+ case STANDARD_LP_ASSOCIATION:
+ total_distance =
+ baseline_lp_cluster_association(nb_points, points, nb_classes, labels, gammas);
+ break;
+
+ case UNINFORMATIVE_LP_ASSOCIATION:
total_distance =
- // baseline_cluster_association(nb_points, points, nb_classes, labels, gammas);
- // baseline_lp_cluster_association(nb_points, points, nb_classes, labels, gammas);
uninformative_lp_cluster_association(nb_points, points, nb_classes, labels, gammas);
+ break;
+
+ default:
+ cerr << "Unknown sample-cluster association mode." << endl;
+ abort();
+ }
+
cout << "TRAIN " << nb_rounds << " " << total_distance << endl;
- baseline_update_clusters(nb_points, points, gammas);
+ update_clusters(nb_points, points, gammas);
nb_rounds++;
} while(total_distance < min_iteration_improvement * pred_total_distance &&
nb_rounds < max_nb_iterations);
- {
- cout << "TOTAL_NB_SAMPLES";
- for(int c = 0; c < nb_classes; c++) {
- int nb_samples = 0;
- for(int n = 0; n < nb_points; n++) {
- if(labels[n] == c) {
- nb_samples++;
- }
- }
- cout << " " << nb_samples;
- }
- cout << endl;
-
- for(int k = 0; k < _nb_clusters; k++) {
- cout << "CLUSTER_GAMMA_SUM " << k << " :";
- for(int c = 0; c < nb_classes; c++) {
- scalar_t sum = 0.0;
- for(int n = 0; n < nb_points; n++) {
- if(labels[n] == c) {
- sum += gammas[n][k];
- }
+ if(cluster_associations) {
+ for(int n = 0; n < nb_points; n++) {
+ for(int k = 0; k < _nb_clusters; k++) {
+ if(k == 0 || gammas[n][k] > gammas[n][cluster_associations[n]]) {
+ cluster_associations[n] = k;
}
- cout << " " << sum;
- }
- cout << endl;
- }
- }
-
- for(int n = 0; n < nb_points; n++) {
- for(int k = 0; k < _nb_clusters; k++) {
- if(k == 0 || gammas[n][k] > gammas[n][cluster_associations[n]]) {
- cluster_associations[n] = k;
}
}
}
class Clusterer {
public:
+
+ enum { STANDARD_ASSOCIATION, STANDARD_LP_ASSOCIATION, UNINFORMATIVE_LP_ASSOCIATION };
+
const static int max_nb_iterations = 10;
const static scalar_t min_iteration_improvement = 0.999;
void initialize_clusters(int nb_points, scalar_t **points);
+ // Does the standard hard k-mean association
+
scalar_t baseline_cluster_association(int nb_points, scalar_t **points,
int nb_classes, int *labels,
scalar_t **gamma);
+ // Does the same with an LP formulation, as a sanity check
+
scalar_t baseline_lp_cluster_association(int nb_points, scalar_t **points,
int nb_classes, int *labels,
scalar_t **gamma);
+ // Does the association under constraints that each cluster gets
+ // associated clusters with the same class proportion as the overall
+ // training set
+
scalar_t uninformative_lp_cluster_association(int nb_points, scalar_t **points,
int nb_classes, int *labels,
scalar_t **gamma);
- void baseline_update_clusters(int nb_points, scalar_t **points, scalar_t **gamma);
+ void update_clusters(int nb_points, scalar_t **points, scalar_t **gamma);
public:
Clusterer();
~Clusterer();
- void train(int nb_clusters, int dim,
+
+ void train(int mode,
+ int nb_clusters, int dim,
int nb_points, scalar_t **points,
int nb_classes, int *labels,
+ // This last array returns for each sample to what
+ // cluster it was associated. It can be null.
int *cluster_associations);
int cluster(scalar_t *point);