2 // Written and (C) by Francois Fleuret
3 // Contact <francois.fleuret@idiap.ch> for comments & bug reports
5 #include "intelligence.h"
7 Intelligence::Intelligence(Map *input,
8 Manipulator *manipulator,
10 int nb_weak_learners) :
11 _nb_actions(manipulator->nb_actions()),
13 _manipulator(manipulator),
14 _max_memory_tick(max_memory_tick),
16 _memory(new scalar_t[_max_memory_tick * _input->nb_parameters]),
17 _rewards(new scalar_t[_max_memory_tick]),
18 _actions(new int[_max_memory_tick]),
19 _q_predictors(new MappingApproximer *[manipulator->nb_actions()]),
20 _nb_weak_learners(nb_weak_learners) {
21 for(int a = 0; a < _nb_actions; a++)
22 _q_predictors[a] = new MappingApproximer(_nb_weak_learners);
25 Intelligence::~Intelligence() {
26 for(int a = 0; a < _nb_actions; a++) delete _q_predictors[a];
27 delete[] _q_predictors;
33 void Intelligence::load(istream &is) {
36 is.read((char *) &na, sizeof(int));
37 is.read((char *) &np, sizeof(int));
39 if(na != _nb_actions || np != _input->nb_parameters) {
40 cerr << "Missmatch between the number of actions or input map size and the saved memory." << endl;
44 is.read((char *) &_memory_tick, sizeof(int));
46 if(_memory_tick > _max_memory_tick) {
47 cerr << "Can not load, too large memory dump." << endl;
51 is.read((char *) _actions, sizeof(int) * _memory_tick);
52 is.read((char *) _rewards, sizeof(scalar_t) * _memory_tick);
53 is.read((char *) _memory, sizeof(scalar_t) * _input->nb_parameters * _memory_tick);
55 for(int a = 0; a < _nb_actions; a++) _q_predictors[a]->load(is);
58 void Intelligence::save(ostream &os) {
59 os.write((char *) &_nb_actions, sizeof(_nb_actions));
60 os.write((char *) &_input->nb_parameters, sizeof(int));
61 os.write((char *) &_memory_tick, sizeof(int));
62 os.write((char *) _actions, sizeof(int) * _memory_tick);
63 os.write((char *) _rewards, sizeof(scalar_t) * _memory_tick);
64 os.write((char *) _memory, sizeof(scalar_t) * _input->nb_parameters * _memory_tick);
66 for(int a = 0; a < _nb_actions; a++) _q_predictors[a]->save(os);
69 void Intelligence::update(int last_action, scalar_t last_reward) {
70 if(_memory_tick == _max_memory_tick) abort();
71 ASSERT(last_action >= 0 && last_action < _nb_actions, "Action number out of bounds.");
72 _actions[_memory_tick] = last_action;
73 _rewards[_memory_tick] = last_reward;
74 int k = _memory_tick * _input->nb_parameters;
75 for(int p = 0; p < _input->nb_parameters; p++) _memory[k++] = _input->parameters[p];
79 void Intelligence::save_memory(char *filename) {
80 ofstream out(filename);
83 cerr << "Can not save to " << filename << "." << endl;
87 out.write((char *) &_input->nb_parameters, sizeof(int));
88 out.write((char *) &_memory_tick, sizeof(int));
89 out.write((char *) _actions, sizeof(int) * _memory_tick);
90 out.write((char *) _rewards, sizeof(scalar_t) * _memory_tick);
91 out.write((char *) _memory, sizeof(scalar_t) * _input->nb_parameters * _memory_tick);
95 void Intelligence::load_memory(char *filename) {
96 ifstream in(filename);
99 cerr << "Can not load from " << filename << "." << endl;
104 in.read((char *) &np, sizeof(int));
105 in.read((char *) &_memory_tick, sizeof(int));
107 if(np != _input->nb_parameters) {
108 cerr << "Missmatch between the input map size and the saved memory." << endl;
112 if(_memory_tick > _max_memory_tick) {
113 cerr << "Can not load, too large memory dump." << endl;
117 in.read((char *) _actions, sizeof(int) * _memory_tick);
118 in.read((char *) _rewards, sizeof(scalar_t) * _memory_tick);
119 in.read((char *) _memory, sizeof(scalar_t) * _input->nb_parameters * _memory_tick);
122 void Intelligence::learn(scalar_t proportion_for_training) {
123 scalar_t **sample_weigths;
124 int nb_train_ticks = int(_memory_tick * proportion_for_training);
125 sample_weigths = new scalar_t *[_nb_actions];
126 for(int a = 0; a < _nb_actions; a++) {
127 sample_weigths[a] = new scalar_t[_memory_tick];
128 for(int t = 0; t < _memory_tick; t++)
129 if(_actions[t] == a && t < nb_train_ticks) sample_weigths[a][t] = 1.0;
130 else sample_weigths[a][t] = 0.0;
131 _q_predictors[a]->set_learning_input(_input->nb_parameters,
137 scalar_t target[_memory_tick];
138 for(int t = 0; t < _memory_tick - 1; t++) target[t] = _rewards[t];
140 for(int u = 0; u < _nb_weak_learners; u++) {
142 for(int t = 0; t < _memory_tick - 1; t++) {
144 for(int a = 0; a < _nb_actions; a++) {
145 u = _q_predictors[a]->_outputs_on_samples[t+1];
148 const scalar_t lambda = 0.0;
149 target[t] = lambda * s + _rewards[t];
152 for(int a = 0; a < _nb_actions; a++) _q_predictors[a]->learn_one_step(target);
155 scalar_t e_train[_nb_actions];
156 for(int a = 0; a < _nb_actions; a++) e_train[a] = 0;
157 for(int t = 0; t < nb_train_ticks; t++)
158 e_train[_actions[t]] += sq(_q_predictors[_actions[t]]->_outputs_on_samples[t] - target[t]);
159 cout << "ERROR_TRAIN " << u+1;
160 for(int a = 0; a < _nb_actions; a++) cout << " " << e_train[a];
165 scalar_t e_test[_nb_actions];
166 for(int a = 0; a < _nb_actions; a++) e_test[a] = 0;
167 for(int t = nb_train_ticks; t < _memory_tick; t++)
168 e_test[_actions[t]] += sq(_q_predictors[_actions[t]]->_outputs_on_samples[t] - target[t]);
169 cout << "ERROR_TEST " << u+1;
170 for(int a = 0; a < _nb_actions; a++) cout << " " << e_test[a];
175 for(int a = 0; a < _nb_actions; a++) delete[] sample_weigths[a];
176 delete[] sample_weigths;
179 int Intelligence::best_action() {
181 scalar_t q, max_q = 0;
182 cout << "ACTION_SCORES";
183 for(int a = 0; a < _nb_actions; a++) {
184 q = _q_predictors[a]->predict(_input->parameters);
186 if(a == 0 || q > max_q) {