2 ///////////////////////////////////////////////////////////////////////////
3 // This program is free software: you can redistribute it and/or modify //
4 // it under the terms of the version 3 of the GNU General Public License //
5 // as published by the Free Software Foundation. //
7 // This program is distributed in the hope that it will be useful, but //
8 // WITHOUT ANY WARRANTY; without even the implied warranty of //
9 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU //
10 // General Public License for more details. //
12 // You should have received a copy of the GNU General Public License //
13 // along with this program. If not, see <http://www.gnu.org/licenses/>. //
15 // Written by Francois Fleuret //
16 // (C) Idiap Research Institute //
18 // Contact <francois.fleuret@idiap.ch> for comments & bug reports //
19 ///////////////////////////////////////////////////////////////////////////
22 #include "loss_machine.h"
24 LossMachine::LossMachine(int loss_type) {
25 _loss_type = loss_type;
28 void LossMachine::get_loss_derivatives(SampleSet *samples,
30 scalar_t *derivatives) {
34 case LOSS_EXPONENTIAL:
36 for(int n = 0; n < samples->nb_samples(); n++) {
38 - samples->label(n) * exp( - samples->label(n) * responses[n]);
43 case LOSS_EV_REGULARIZED:
45 scalar_t sum_pos = 0, sum_sq_pos = 0, nb_pos = 0, m_pos, v_pos;
46 scalar_t sum_neg = 0, sum_sq_neg = 0, nb_neg = 0, m_neg, v_neg;
48 for(int n = 0; n < samples->nb_samples(); n++) {
49 if(samples->label(n) > 0) {
50 sum_pos += responses[n];
51 sum_sq_pos += sq(responses[n]);
54 else if(samples->label(n) < 0) {
55 sum_neg += responses[n];
56 sum_sq_neg += sq(responses[n]);
61 m_pos = sum_pos / nb_pos;
62 v_pos = sum_sq_pos/(nb_pos - 1) - sq(sum_pos)/(nb_pos * (nb_pos - 1));
64 scalar_t loss_pos = nb_pos * exp(v_pos/2 - m_pos);
66 m_neg = sum_neg / nb_neg;
67 v_neg = sum_sq_neg/(nb_neg - 1) - sq(sum_neg)/(nb_neg * (nb_neg - 1));
69 scalar_t loss_neg = nb_neg * exp(v_neg/2 + m_neg);
71 for(int n = 0; n < samples->nb_samples(); n++) {
72 if(samples->label(n) > 0) {
74 ( - 1/nb_pos + (responses[n] - m_pos)/(nb_pos - 1)) * loss_pos;
75 } else if(samples->label(n) < 0) {
77 ( 1/nb_neg + (responses[n] - m_neg)/(nb_neg - 1)) * loss_neg;
86 for(int n = 0; n < samples->nb_samples(); n++) {
87 if(samples->label(n) != 0 && samples->label(n) * responses[n] < 1)
97 for(int n = 0; n < samples->nb_samples(); n++) {
98 if(samples->label(n) == 0)
101 derivatives[n] = samples->label(n) * 1/(1 + exp(samples->label(n) * responses[n]));
107 cerr << "Unknown loss type in BoostedClassifier::get_loss_derivatives."
114 scalar_t LossMachine::loss(SampleSet *samples, scalar_t *responses) {
119 case LOSS_EXPONENTIAL:
121 for(int n = 0; n < samples->nb_samples(); n++) {
122 l += exp( - samples->label(n) * responses[n]);
128 case LOSS_EV_REGULARIZED:
130 scalar_t sum_pos = 0, sum_sq_pos = 0, nb_pos = 0, m_pos, v_pos;
131 scalar_t sum_neg = 0, sum_sq_neg = 0, nb_neg = 0, m_neg, v_neg;
133 for(int n = 0; n < samples->nb_samples(); n++) {
134 if(samples->label(n) > 0) {
135 sum_pos += responses[n];
136 sum_sq_pos += sq(responses[n]);
138 } else if(samples->label(n) < 0) {
139 sum_neg += responses[n];
140 sum_sq_neg += sq(responses[n]);
148 m_pos = sum_pos / nb_pos;
149 v_pos = sum_sq_pos/(nb_pos - 1) - sq(sum_pos)/(nb_pos * (nb_pos - 1));
150 l += nb_pos * exp(v_pos/2 - m_pos);
154 m_neg = sum_neg / nb_neg;
155 v_neg = sum_sq_neg/(nb_neg - 1) - sq(sum_neg)/(nb_neg * (nb_neg - 1));
156 l += nb_neg * exp(v_neg/2 + m_neg);
164 for(int n = 0; n < samples->nb_samples(); n++) {
165 if(samples->label(n) != 0) {
166 if(samples->label(n) * responses[n] < 1)
167 l += (1 - samples->label(n) * responses[n]);
175 for(int n = 0; n < samples->nb_samples(); n++) {
176 if(samples->label(n) != 0) {
177 scalar_t u = - samples->label(n) * responses[n];
181 l += log(1 + exp(u));
189 cerr << "Unknown loss type in LossMachine::loss." << endl;
196 scalar_t LossMachine::optimal_weight(SampleSet *sample_set,
197 scalar_t *weak_learner_responses,
198 scalar_t *current_responses) {
202 case LOSS_EXPONENTIAL:
204 scalar_t num = 0, den = 0, z;
205 for(int n = 0; n < sample_set->nb_samples(); n++) {
206 z = sample_set->label(n) * weak_learner_responses[n];
208 num += exp( - sample_set->label(n) * current_responses[n]);
210 den += exp( - sample_set->label(n) * current_responses[n]);
214 return 0.5 * log(num / den);
218 case LOSS_EV_REGULARIZED:
221 scalar_t u = 0, du = -0.1;
222 scalar_t *responses = new scalar_t[sample_set->nb_samples()];
224 scalar_t l, prev_l = -1;
226 const scalar_t minimum_delta_for_optimization = 1e-5;
231 scalar_t sum_pos = 0, sum_sq_pos = 0, nb_pos = 0, m_pos, v_pos;
232 scalar_t sum_neg = 0, sum_sq_neg = 0, nb_neg = 0, m_neg, v_neg;
234 for(int n = 0; n < sample_set->nb_samples(); n++) {
235 if(sample_set->label(n) > 0) {
236 sum_pos += responses[n];
237 sum_sq_pos += sq(responses[n]);
239 } else if(sample_set->label(n) < 0) {
240 sum_neg += responses[n];
241 sum_sq_neg += sq(responses[n]);
247 m_pos = sum_pos / nb_pos;
248 v_pos = sum_sq_pos/(nb_pos - 1) - sq(sum_pos)/(nb_pos * (nb_pos - 1));
249 shift = max(shift, v_pos/2 - m_pos);
253 m_neg = sum_neg / nb_neg;
254 v_neg = sum_sq_neg/(nb_neg - 1) - sq(sum_neg)/(nb_neg * (nb_neg - 1));
255 shift = max(shift, v_neg/2 + m_neg);
258 // (*global.log_stream) << "nb_pos = " << nb_pos << " nb_neg = " << nb_neg << endl;
264 while(nb < 100 && abs(du) > minimum_delta_for_optimization) {
267 // (*global.log_stream) << "l = " << l << " u = " << u << " du = " << du << endl;
270 for(int s = 0; s < sample_set->nb_samples(); s++) {
271 responses[s] = current_responses[s] + u * weak_learner_responses[s] ;
275 scalar_t sum_pos = 0, sum_sq_pos = 0, nb_pos = 0, m_pos, v_pos;
276 scalar_t sum_neg = 0, sum_sq_neg = 0, nb_neg = 0, m_neg, v_neg;
278 for(int n = 0; n < sample_set->nb_samples(); n++) {
279 if(sample_set->label(n) > 0) {
280 sum_pos += responses[n];
281 sum_sq_pos += sq(responses[n]);
283 } else if(sample_set->label(n) < 0) {
284 sum_neg += responses[n];
285 sum_sq_neg += sq(responses[n]);
293 m_pos = sum_pos / nb_pos;
294 v_pos = sum_sq_pos/(nb_pos - 1) - sq(sum_pos)/(nb_pos * (nb_pos - 1));
295 l += nb_pos * exp(v_pos/2 - m_pos - shift);
299 m_neg = sum_neg / nb_neg;
300 v_neg = sum_sq_neg/(nb_neg - 1) - sq(sum_neg)/(nb_neg * (nb_neg - 1));
301 l += nb_neg * exp(v_neg/2 + m_neg - shift);
306 if(l > prev_l) du = du * -0.25;
319 scalar_t u = 0, du = -0.1;
320 scalar_t *responses = new scalar_t[sample_set->nb_samples()];
322 scalar_t l, prev_l = -1;
324 const scalar_t minimum_delta_for_optimization = 1e-5;
327 while(n < 100 && abs(du) > minimum_delta_for_optimization) {
330 for(int s = 0; s < sample_set->nb_samples(); s++) {
331 responses[s] = current_responses[s] + u * weak_learner_responses[s] ;
333 l = loss(sample_set, responses);
334 if(l > prev_l) du = du * -0.25;
338 (*global.log_stream) << "END l = " << l << " du = " << du << endl;
346 cerr << "Unknown loss type in LossMachine::optimal_weight." << endl;
352 void LossMachine::subsample(int nb, scalar_t *labels, scalar_t *responses,
353 int nb_to_sample, int *sample_nb_occurences, scalar_t *sample_responses,
354 int allow_duplicates) {
358 case LOSS_EXPONENTIAL:
360 scalar_t *weights = new scalar_t[nb];
362 for(int n = 0; n < nb; n++) {
366 weights[n] = exp( - labels[n] * responses[n]);
368 sample_nb_occurences[n] = 0;
369 sample_responses[n] = 0.0;
372 scalar_t total_weight;
373 int nb_sampled = 0, sum_sample_nb_occurences = 0;
375 int *sampled_indexes = new int[nb_to_sample];
377 (*global.log_stream) << "Sampling " << nb_to_sample << " samples." << endl;
380 total_weight = robust_sampling(nb,
385 for(int k = 0; nb_sampled < nb_to_sample && k < nb_to_sample; k++) {
386 int i = sampled_indexes[k];
387 if(allow_duplicates || sample_nb_occurences[i] == 0) nb_sampled++;
388 sample_nb_occurences[i]++;
389 sum_sample_nb_occurences++;
391 } while(nb_sampled < nb_to_sample);
393 (*global.log_stream) << "nb_sampled = " << nb_sampled << " nb_to_sample = " << nb_to_sample << endl;
395 (*global.log_stream) << "Done." << endl;
397 delete[] sampled_indexes;
399 scalar_t unit_weight = log(total_weight / scalar_t(sum_sample_nb_occurences));
401 for(int n = 0; n < nb; n++) {
402 if(sample_nb_occurences[n] > 0) {
403 if(allow_duplicates) {
404 sample_responses[n] = - labels[n] * unit_weight;
406 sample_responses[n] = - labels[n] * (unit_weight + log(scalar_t(sample_nb_occurences[n])));
407 sample_nb_occurences[n] = 1;
418 cerr << "Unknown loss type in LossMachine::resample." << endl;