X-Git-Url: https://fleuret.org/cgi-bin/gitweb/gitweb.cgi?a=blobdiff_plain;f=loss_machine.cc;fp=loss_machine.cc;h=6ff78d59ab9f200b75575585b1df299f66e8f399;hb=d922ad61d35e9a6996730bec24b16f8bf7bc426c;hp=0000000000000000000000000000000000000000;hpb=3bb118f5a9462d02ff7d99ef28ecc0d7e23529f9;p=folded-ctf.git diff --git a/loss_machine.cc b/loss_machine.cc new file mode 100644 index 0000000..6ff78d5 --- /dev/null +++ b/loss_machine.cc @@ -0,0 +1,421 @@ + +/////////////////////////////////////////////////////////////////////////// +// This program is free software: you can redistribute it and/or modify // +// it under the terms of the version 3 of the GNU General Public License // +// as published by the Free Software Foundation. // +// // +// This program is distributed in the hope that it will be useful, but // +// WITHOUT ANY WARRANTY; without even the implied warranty of // +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // +// General Public License for more details. // +// // +// You should have received a copy of the GNU General Public License // +// along with this program. If not, see . // +// // +// Written by Francois Fleuret, (C) IDIAP // +// Contact for comments & bug reports // +/////////////////////////////////////////////////////////////////////////// + +#include "tools.h" +#include "loss_machine.h" + +LossMachine::LossMachine(int loss_type) { + _loss_type = loss_type; +} + +void LossMachine::get_loss_derivatives(SampleSet *samples, + scalar_t *responses, + scalar_t *derivatives) { + + switch(_loss_type) { + + case LOSS_EXPONENTIAL: + { + for(int n = 0; n < samples->nb_samples(); n++) { + derivatives[n] = + - samples->label(n) * exp( - samples->label(n) * responses[n]); + } + } + break; + + case LOSS_EV_REGULARIZED: + { + scalar_t sum_pos = 0, sum_sq_pos = 0, nb_pos = 0, m_pos, v_pos; + scalar_t sum_neg = 0, sum_sq_neg = 0, nb_neg = 0, m_neg, v_neg; + + for(int n = 0; n < samples->nb_samples(); n++) { + if(samples->label(n) > 0) { + sum_pos += responses[n]; + sum_sq_pos += sq(responses[n]); + nb_pos += 1.0; + } + else if(samples->label(n) < 0) { + sum_neg += responses[n]; + sum_sq_neg += sq(responses[n]); + nb_neg += 1.0; + } + } + + m_pos = sum_pos / nb_pos; + v_pos = sum_sq_pos/(nb_pos - 1) - sq(sum_pos)/(nb_pos * (nb_pos - 1)); + + scalar_t loss_pos = nb_pos * exp(v_pos/2 - m_pos); + + m_neg = sum_neg / nb_neg; + v_neg = sum_sq_neg/(nb_neg - 1) - sq(sum_neg)/(nb_neg * (nb_neg - 1)); + + scalar_t loss_neg = nb_neg * exp(v_neg/2 + m_neg); + + for(int n = 0; n < samples->nb_samples(); n++) { + if(samples->label(n) > 0) { + derivatives[n] = + ( - 1/nb_pos + (responses[n] - m_pos)/(nb_pos - 1)) * loss_pos; + } else if(samples->label(n) < 0) { + derivatives[n] = + ( 1/nb_neg + (responses[n] - m_neg)/(nb_neg - 1)) * loss_neg; + } + } + } + + break; + + case LOSS_HINGE: + { + for(int n = 0; n < samples->nb_samples(); n++) { + if(samples->label(n) != 0 && samples->label(n) * responses[n] < 1) + derivatives[n] = 1; + else + derivatives[n] = 0; + } + } + break; + + case LOSS_LOGISTIC: + { + for(int n = 0; n < samples->nb_samples(); n++) { + if(samples->label(n) == 0) + derivatives[n] = 0.0; + else + derivatives[n] = samples->label(n) * 1/(1 + exp(samples->label(n) * responses[n])); + } + } + break; + + default: + cerr << "Unknown loss type in BoostedClassifier::get_loss_derivatives." + << endl; + exit(1); + } + +} + +scalar_t LossMachine::loss(SampleSet *samples, scalar_t *responses) { + scalar_t l = 0; + + switch(_loss_type) { + + case LOSS_EXPONENTIAL: + { + for(int n = 0; n < samples->nb_samples(); n++) { + l += exp( - samples->label(n) * responses[n]); + ASSERT(!isinf(l)); + } + } + break; + + case LOSS_EV_REGULARIZED: + { + scalar_t sum_pos = 0, sum_sq_pos = 0, nb_pos = 0, m_pos, v_pos; + scalar_t sum_neg = 0, sum_sq_neg = 0, nb_neg = 0, m_neg, v_neg; + + for(int n = 0; n < samples->nb_samples(); n++) { + if(samples->label(n) > 0) { + sum_pos += responses[n]; + sum_sq_pos += sq(responses[n]); + nb_pos += 1.0; + } else if(samples->label(n) < 0) { + sum_neg += responses[n]; + sum_sq_neg += sq(responses[n]); + nb_neg += 1.0; + } + } + + l = 0; + + if(nb_pos > 0) { + m_pos = sum_pos / nb_pos; + v_pos = sum_sq_pos/(nb_pos - 1) - sq(sum_pos)/(nb_pos * (nb_pos - 1)); + l += nb_pos * exp(v_pos/2 - m_pos); + } + + if(nb_neg > 0) { + m_neg = sum_neg / nb_neg; + v_neg = sum_sq_neg/(nb_neg - 1) - sq(sum_neg)/(nb_neg * (nb_neg - 1)); + l += nb_neg * exp(v_neg/2 + m_neg); + } + + } + break; + + case LOSS_HINGE: + { + for(int n = 0; n < samples->nb_samples(); n++) { + if(samples->label(n) != 0) { + if(samples->label(n) * responses[n] < 1) + l += (1 - samples->label(n) * responses[n]); + } + } + } + break; + + case LOSS_LOGISTIC: + { + for(int n = 0; n < samples->nb_samples(); n++) { + if(samples->label(n) != 0) { + scalar_t u = - samples->label(n) * responses[n]; + if(u > 20) { + l += u; + } if(u > -20) { + l += log(1 + exp(u)); + } + } + } + } + break; + + default: + cerr << "Unknown loss type in LossMachine::loss." << endl; + exit(1); + } + + return l; +} + +scalar_t LossMachine::optimal_weight(SampleSet *sample_set, + scalar_t *weak_learner_responses, + scalar_t *current_responses) { + + switch(_loss_type) { + + case LOSS_EXPONENTIAL: + { + scalar_t num = 0, den = 0, z; + for(int n = 0; n < sample_set->nb_samples(); n++) { + z = sample_set->label(n) * weak_learner_responses[n]; + if(z > 0) { + num += exp( - sample_set->label(n) * current_responses[n]); + } else if(z < 0) { + den += exp( - sample_set->label(n) * current_responses[n]); + } + } + + return 0.5 * log(num / den); + } + break; + + case LOSS_EV_REGULARIZED: + { + + scalar_t u = 0, du = -0.1; + scalar_t *responses = new scalar_t[sample_set->nb_samples()]; + + scalar_t l, prev_l = -1; + + const scalar_t minimum_delta_for_optimization = 1e-5; + + scalar_t shift = 0; + + { + scalar_t sum_pos = 0, sum_sq_pos = 0, nb_pos = 0, m_pos, v_pos; + scalar_t sum_neg = 0, sum_sq_neg = 0, nb_neg = 0, m_neg, v_neg; + + for(int n = 0; n < sample_set->nb_samples(); n++) { + if(sample_set->label(n) > 0) { + sum_pos += responses[n]; + sum_sq_pos += sq(responses[n]); + nb_pos += 1.0; + } else if(sample_set->label(n) < 0) { + sum_neg += responses[n]; + sum_sq_neg += sq(responses[n]); + nb_neg += 1.0; + } + } + + if(nb_pos > 0) { + m_pos = sum_pos / nb_pos; + v_pos = sum_sq_pos/(nb_pos - 1) - sq(sum_pos)/(nb_pos * (nb_pos - 1)); + shift = max(shift, v_pos/2 - m_pos); + } + + if(nb_neg > 0) { + m_neg = sum_neg / nb_neg; + v_neg = sum_sq_neg/(nb_neg - 1) - sq(sum_neg)/(nb_neg * (nb_neg - 1)); + shift = max(shift, v_neg/2 + m_neg); + } + +// (*global.log_stream) << "nb_pos = " << nb_pos << " nb_neg = " << nb_neg << endl; + + } + + int nb = 0; + + while(nb < 100 && abs(du) > minimum_delta_for_optimization) { + nb++; + +// (*global.log_stream) << "l = " << l << " u = " << u << " du = " << du << endl; + + u += du; + for(int s = 0; s < sample_set->nb_samples(); s++) { + responses[s] = current_responses[s] + u * weak_learner_responses[s] ; + } + + { + scalar_t sum_pos = 0, sum_sq_pos = 0, nb_pos = 0, m_pos, v_pos; + scalar_t sum_neg = 0, sum_sq_neg = 0, nb_neg = 0, m_neg, v_neg; + + for(int n = 0; n < sample_set->nb_samples(); n++) { + if(sample_set->label(n) > 0) { + sum_pos += responses[n]; + sum_sq_pos += sq(responses[n]); + nb_pos += 1.0; + } else if(sample_set->label(n) < 0) { + sum_neg += responses[n]; + sum_sq_neg += sq(responses[n]); + nb_neg += 1.0; + } + } + + l = 0; + + if(nb_pos > 0) { + m_pos = sum_pos / nb_pos; + v_pos = sum_sq_pos/(nb_pos - 1) - sq(sum_pos)/(nb_pos * (nb_pos - 1)); + l += nb_pos * exp(v_pos/2 - m_pos - shift); + } + + if(nb_neg > 0) { + m_neg = sum_neg / nb_neg; + v_neg = sum_sq_neg/(nb_neg - 1) - sq(sum_neg)/(nb_neg * (nb_neg - 1)); + l += nb_neg * exp(v_neg/2 + m_neg - shift); + } + + } + + if(l > prev_l) du = du * -0.25; + prev_l = l; + } + + delete[] responses; + + return u; + } + + case LOSS_HINGE: + case LOSS_LOGISTIC: + { + + scalar_t u = 0, du = -0.1; + scalar_t *responses = new scalar_t[sample_set->nb_samples()]; + + scalar_t l, prev_l = -1; + + const scalar_t minimum_delta_for_optimization = 1e-5; + + int n = 0; + while(n < 100 && abs(du) > minimum_delta_for_optimization) { + n++; + u += du; + for(int s = 0; s < sample_set->nb_samples(); s++) { + responses[s] = current_responses[s] + u * weak_learner_responses[s] ; + } + l = loss(sample_set, responses); + if(l > prev_l) du = du * -0.25; + prev_l = l; + } + + (*global.log_stream) << "END l = " << l << " du = " << du << endl; + + delete[] responses; + + return u; + } + + default: + cerr << "Unknown loss type in LossMachine::optimal_weight." << endl; + exit(1); + } + +} + +void LossMachine::subsample(int nb, scalar_t *labels, scalar_t *responses, + int nb_to_sample, int *sample_nb_occurences, scalar_t *sample_responses, + int allow_duplicates) { + + switch(_loss_type) { + + case LOSS_EXPONENTIAL: + { + scalar_t *weights = new scalar_t[nb]; + + for(int n = 0; n < nb; n++) { + if(labels[n] == 0) { + weights[n] = 0; + } else { + weights[n] = exp( - labels[n] * responses[n]); + } + sample_nb_occurences[n] = 0; + sample_responses[n] = 0.0; + } + + scalar_t total_weight; + int nb_sampled = 0, sum_sample_nb_occurences = 0; + + int *sampled_indexes = new int[nb_to_sample]; + + (*global.log_stream) << "Sampling " << nb_to_sample << " samples." << endl; + + do { + total_weight = robust_sampling(nb, + weights, + nb_to_sample, + sampled_indexes); + + for(int k = 0; nb_sampled < nb_to_sample && k < nb_to_sample; k++) { + int i = sampled_indexes[k]; + if(allow_duplicates || sample_nb_occurences[i] == 0) nb_sampled++; + sample_nb_occurences[i]++; + sum_sample_nb_occurences++; + } + } while(nb_sampled < nb_to_sample); + + (*global.log_stream) << "nb_sampled = " << nb_sampled << " nb_to_sample = " << nb_to_sample << endl; + + (*global.log_stream) << "Done." << endl; + + delete[] sampled_indexes; + + scalar_t unit_weight = log(total_weight / scalar_t(sum_sample_nb_occurences)); + + for(int n = 0; n < nb; n++) { + if(sample_nb_occurences[n] > 0) { + if(allow_duplicates) { + sample_responses[n] = - labels[n] * unit_weight; + } else { + sample_responses[n] = - labels[n] * (unit_weight + log(scalar_t(sample_nb_occurences[n]))); + sample_nb_occurences[n] = 1; + } + } + } + + delete[] weights; + + } + break; + + default: + cerr << "Unknown loss type in LossMachine::resample." << endl; + exit(1); + } + + +}