2 ////////////////////////////////////////////////////////////////////////////////
3 // This program is free software; you can redistribute it and/or //
4 // modify it under the terms of the GNU General Public License //
5 // version 2 as published by the Free Software Foundation. //
7 // This program is distributed in the hope that it will be useful, but //
8 // WITHOUT ANY WARRANTY; without even the implied warranty of //
9 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU //
10 // General Public License for more details. //
12 // Written and (C) by François Fleuret //
13 // Contact <francois.fleuret@epfl.ch> for comments & bug reports //
14 ////////////////////////////////////////////////////////////////////////////////
16 // $Id: intelligence.cc,v 1.34 2006-12-18 15:01:09 fleuret Exp $
18 #include "intelligence.h"
20 Intelligence::Intelligence(Map *input,
21 Manipulator *manipulator,
23 int nb_weak_learners) :
24 _nb_actions(manipulator->nb_actions()),
26 _manipulator(manipulator),
27 _max_memory_tick(max_memory_tick),
29 _memory(new scalar_t[_max_memory_tick * _input->nb_parameters]),
30 _rewards(new scalar_t[_max_memory_tick]),
31 _actions(new int[_max_memory_tick]),
32 _q_predictors(new MappingApproximer *[manipulator->nb_actions()]),
33 _nb_weak_learners(nb_weak_learners) {
34 for(int a = 0; a < _nb_actions; a++)
35 _q_predictors[a] = new MappingApproximer(_nb_weak_learners);
38 Intelligence::~Intelligence() {
39 for(int a = 0; a < _nb_actions; a++) delete _q_predictors[a];
40 delete[] _q_predictors;
46 void Intelligence::load(istream &is) {
49 is.read((char *) &na, sizeof(int));
50 is.read((char *) &np, sizeof(int));
52 if(na != _nb_actions || np != _input->nb_parameters) {
53 cerr << "Missmatch between the number of actions or input map size and the saved memory." << endl;
57 is.read((char *) &_memory_tick, sizeof(int));
59 if(_memory_tick > _max_memory_tick) {
60 cerr << "Can not load, too large memory dump." << endl;
64 is.read((char *) _actions, sizeof(int) * _memory_tick);
65 is.read((char *) _rewards, sizeof(scalar_t) * _memory_tick);
66 is.read((char *) _memory, sizeof(scalar_t) * _input->nb_parameters * _memory_tick);
68 for(int a = 0; a < _nb_actions; a++) _q_predictors[a]->load(is);
71 void Intelligence::save(ostream &os) {
72 os.write((char *) &_nb_actions, sizeof(_nb_actions));
73 os.write((char *) &_input->nb_parameters, sizeof(int));
74 os.write((char *) &_memory_tick, sizeof(int));
75 os.write((char *) _actions, sizeof(int) * _memory_tick);
76 os.write((char *) _rewards, sizeof(scalar_t) * _memory_tick);
77 os.write((char *) _memory, sizeof(scalar_t) * _input->nb_parameters * _memory_tick);
79 for(int a = 0; a < _nb_actions; a++) _q_predictors[a]->save(os);
82 void Intelligence::update(int last_action, scalar_t last_reward) {
83 if(_memory_tick == _max_memory_tick) abort();
84 ASSERT(last_action >= 0 && last_action < _nb_actions, "Action number out of bounds.");
85 _actions[_memory_tick] = last_action;
86 _rewards[_memory_tick] = last_reward;
87 int k = _memory_tick * _input->nb_parameters;
88 for(int p = 0; p < _input->nb_parameters; p++) _memory[k++] = _input->parameters[p];
92 void Intelligence::save_memory(char *filename) {
93 ofstream out(filename);
96 cerr << "Can not save to " << filename << "." << endl;
100 out.write((char *) &_input->nb_parameters, sizeof(int));
101 out.write((char *) &_memory_tick, sizeof(int));
102 out.write((char *) _actions, sizeof(int) * _memory_tick);
103 out.write((char *) _rewards, sizeof(scalar_t) * _memory_tick);
104 out.write((char *) _memory, sizeof(scalar_t) * _input->nb_parameters * _memory_tick);
108 void Intelligence::load_memory(char *filename) {
109 ifstream in(filename);
112 cerr << "Can not load from " << filename << "." << endl;
117 in.read((char *) &np, sizeof(int));
118 in.read((char *) &_memory_tick, sizeof(int));
120 if(np != _input->nb_parameters) {
121 cerr << "Missmatch between the input map size and the saved memory." << endl;
125 if(_memory_tick > _max_memory_tick) {
126 cerr << "Can not load, too large memory dump." << endl;
130 in.read((char *) _actions, sizeof(int) * _memory_tick);
131 in.read((char *) _rewards, sizeof(scalar_t) * _memory_tick);
132 in.read((char *) _memory, sizeof(scalar_t) * _input->nb_parameters * _memory_tick);
135 void Intelligence::learn(scalar_t proportion_for_training) {
136 scalar_t **sample_weigths;
137 int nb_train_ticks = int(_memory_tick * proportion_for_training);
138 sample_weigths = new scalar_t *[_nb_actions];
139 for(int a = 0; a < _nb_actions; a++) {
140 sample_weigths[a] = new scalar_t[_memory_tick];
141 for(int t = 0; t < _memory_tick; t++)
142 if(_actions[t] == a && t < nb_train_ticks) sample_weigths[a][t] = 1.0;
143 else sample_weigths[a][t] = 0.0;
144 _q_predictors[a]->set_learning_input(_input->nb_parameters,
150 scalar_t target[_memory_tick];
151 for(int t = 0; t < _memory_tick - 1; t++) target[t] = _rewards[t];
153 for(int u = 0; u < _nb_weak_learners; u++) {
155 for(int t = 0; t < _memory_tick - 1; t++) {
157 for(int a = 0; a < _nb_actions; a++) {
158 u = _q_predictors[a]->_outputs_on_samples[t+1];
161 const scalar_t lambda = 0.0;
162 target[t] = lambda * s + _rewards[t];
165 for(int a = 0; a < _nb_actions; a++) _q_predictors[a]->learn_one_step(target);
168 scalar_t e_train[_nb_actions];
169 for(int a = 0; a < _nb_actions; a++) e_train[a] = 0;
170 for(int t = 0; t < nb_train_ticks; t++)
171 e_train[_actions[t]] += sq(_q_predictors[_actions[t]]->_outputs_on_samples[t] - target[t]);
172 cout << "ERROR_TRAIN " << u+1;
173 for(int a = 0; a < _nb_actions; a++) cout << " " << e_train[a];
178 scalar_t e_test[_nb_actions];
179 for(int a = 0; a < _nb_actions; a++) e_test[a] = 0;
180 for(int t = nb_train_ticks; t < _memory_tick; t++)
181 e_test[_actions[t]] += sq(_q_predictors[_actions[t]]->_outputs_on_samples[t] - target[t]);
182 cout << "ERROR_TEST " << u+1;
183 for(int a = 0; a < _nb_actions; a++) cout << " " << e_test[a];
188 for(int a = 0; a < _nb_actions; a++) delete[] sample_weigths[a];
189 delete[] sample_weigths;
192 int Intelligence::best_action() {
195 cout << "ACTION_SCORES";
196 for(int a = 0; a < _nb_actions; a++) {
197 q = _q_predictors[a]->predict(_input->parameters);
199 if(a == 0 || q > max_q) {