automatic commit
[folded-ctf.git] / loss_machine.cc
1 /*
2  *  folded-ctf is an implementation of the folded hierarchy of
3  *  classifiers for object detection, developed by Francois Fleuret
4  *  and Donald Geman.
5  *
6  *  Copyright (c) 2008 Idiap Research Institute, http://www.idiap.ch/
7  *  Written by Francois Fleuret <francois.fleuret@idiap.ch>
8  *
9  *  This file is part of folded-ctf.
10  *
11  *  folded-ctf is free software: you can redistribute it and/or modify
12  *  it under the terms of the GNU General Public License as published
13  *  by the Free Software Foundation, either version 3 of the License,
14  *  or (at your option) any later version.
15  *
16  *  folded-ctf is distributed in the hope that it will be useful, but
17  *  WITHOUT ANY WARRANTY; without even the implied warranty of
18  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  *  General Public License for more details.
20  *
21  *  You should have received a copy of the GNU General Public License
22  *  along with folded-ctf.  If not, see <http://www.gnu.org/licenses/>.
23  *
24  */
25
26 #include "tools.h"
27 #include "loss_machine.h"
28
29 LossMachine::LossMachine(int loss_type) {
30   _loss_type = loss_type;
31 }
32
33 void LossMachine::get_loss_derivatives(SampleSet *samples,
34                                        scalar_t *responses,
35                                        scalar_t *derivatives) {
36
37   switch(_loss_type) {
38
39   case LOSS_EXPONENTIAL:
40     {
41       for(int n = 0; n < samples->nb_samples(); n++) {
42         derivatives[n] =
43           - samples->label(n) * exp( - samples->label(n) * responses[n]);
44       }
45     }
46     break;
47
48   case LOSS_HINGE:
49     {
50       for(int n = 0; n < samples->nb_samples(); n++) {
51         if(samples->label(n) != 0 && samples->label(n) * responses[n] < 1)
52           derivatives[n] = 1;
53         else
54           derivatives[n] = 0;
55       }
56     }
57     break;
58
59   case LOSS_LOGISTIC:
60     {
61       for(int n = 0; n < samples->nb_samples(); n++) {
62         if(samples->label(n) == 0)
63           derivatives[n] = 0.0;
64         else
65           derivatives[n] = samples->label(n) * 1/(1 + exp(samples->label(n) * responses[n]));
66       }
67     }
68     break;
69
70   default:
71     cerr << "Unknown loss type in BoostedClassifier::get_loss_derivatives."
72          << endl;
73     exit(1);
74   }
75
76 }
77
78 scalar_t LossMachine::loss(SampleSet *samples, scalar_t *responses) {
79   scalar_t l = 0;
80
81   switch(_loss_type) {
82
83   case LOSS_EXPONENTIAL:
84     {
85       for(int n = 0; n < samples->nb_samples(); n++) {
86         l += exp( - samples->label(n) * responses[n]);
87         ASSERT(!isinf(l));
88       }
89     }
90     break;
91
92   case LOSS_HINGE:
93     {
94       for(int n = 0; n < samples->nb_samples(); n++) {
95         if(samples->label(n) != 0) {
96           if(samples->label(n) * responses[n] < 1)
97             l += (1 - samples->label(n) * responses[n]);
98         }
99       }
100     }
101     break;
102
103   case LOSS_LOGISTIC:
104     {
105       for(int n = 0; n < samples->nb_samples(); n++) {
106         if(samples->label(n) != 0) {
107           scalar_t u = - samples->label(n) * responses[n];
108           if(u > 20) {
109             l += u;
110           } if(u > -20) {
111             l += log(1 + exp(u));
112           }
113         }
114       }
115     }
116     break;
117
118   default:
119     cerr << "Unknown loss type in LossMachine::loss." << endl;
120     exit(1);
121   }
122
123   return l;
124 }
125
126 scalar_t LossMachine::optimal_weight(SampleSet *sample_set,
127                                      scalar_t *weak_learner_responses,
128                                      scalar_t *current_responses) {
129
130   switch(_loss_type) {
131
132   case LOSS_EXPONENTIAL:
133     {
134       scalar_t num = 0, den = 0, z;
135
136       for(int n = 0; n < sample_set->nb_samples(); n++) {
137         z = sample_set->label(n) * weak_learner_responses[n];
138         if(z > 0) {
139           num += exp( - sample_set->label(n) * current_responses[n]);
140         } else if(z < 0) {
141           den += exp( - sample_set->label(n) * current_responses[n]);
142         }
143       }
144
145       return 0.5 * log(num / den);
146     }
147     break;
148
149   case LOSS_HINGE:
150   case LOSS_LOGISTIC:
151     {
152
153       scalar_t u = 0, du = -0.1;
154       scalar_t *responses = new scalar_t[sample_set->nb_samples()];
155
156       scalar_t l, prev_l = -1;
157
158       const scalar_t minimum_delta_for_optimization = 1e-5;
159
160       int n = 0;
161       while(n < 100 && abs(du) > minimum_delta_for_optimization) {
162         n++;
163         u += du;
164         for(int s = 0; s < sample_set->nb_samples(); s++) {
165           responses[s] = current_responses[s] + u * weak_learner_responses[s] ;
166         }
167         l = loss(sample_set, responses);
168         if(l > prev_l) du = du * -0.25;
169         prev_l = l;
170       }
171
172       (*global.log_stream) << "END l = " << l << " du = " << du << endl;
173
174       delete[] responses;
175
176       return u;
177     }
178
179   default:
180     cerr << "Unknown loss type in LossMachine::optimal_weight." << endl;
181     exit(1);
182   }
183
184 }
185
186 void LossMachine::subsample(int nb, scalar_t *labels, scalar_t *responses,
187                             int nb_to_sample, int *sample_nb_occurences, scalar_t *sample_responses,
188                             int allow_duplicates) {
189
190   switch(_loss_type) {
191
192   case LOSS_EXPONENTIAL:
193     {
194       scalar_t *weights = new scalar_t[nb];
195
196       for(int n = 0; n < nb; n++) {
197         if(labels[n] == 0) {
198           weights[n] = 0;
199         } else {
200           weights[n] = exp( - labels[n] * responses[n]);
201         }
202         sample_nb_occurences[n] = 0;
203         sample_responses[n] = 0.0;
204       }
205
206       scalar_t total_weight;
207       int nb_sampled = 0, sum_sample_nb_occurences = 0;
208
209       int *sampled_indexes = new int[nb_to_sample];
210
211       (*global.log_stream) << "Sampling " << nb_to_sample << " samples." << endl;
212
213       do {
214         total_weight = robust_sampling(nb,
215                                        weights,
216                                        nb_to_sample,
217                                        sampled_indexes);
218
219         for(int k = 0; nb_sampled < nb_to_sample && k < nb_to_sample; k++) {
220           int i = sampled_indexes[k];
221           if(allow_duplicates || sample_nb_occurences[i] == 0) nb_sampled++;
222           sample_nb_occurences[i]++;
223           sum_sample_nb_occurences++;
224         }
225       } while(nb_sampled < nb_to_sample);
226
227       (*global.log_stream) << "Done." << endl;
228
229       delete[] sampled_indexes;
230
231       scalar_t unit_weight = log(total_weight / scalar_t(sum_sample_nb_occurences));
232
233       for(int n = 0; n < nb; n++) {
234         if(sample_nb_occurences[n] > 0) {
235           if(allow_duplicates) {
236             sample_responses[n] = - labels[n] * unit_weight;
237           } else {
238             sample_responses[n] = - labels[n] * (unit_weight + log(scalar_t(sample_nb_occurences[n])));
239             sample_nb_occurences[n] = 1;
240           }
241         }
242       }
243
244       delete[] weights;
245
246     }
247     break;
248
249   default:
250     cerr << "Unknown loss type in LossMachine::resample." << endl;
251     exit(1);
252   }
253
254
255 }