- actions = seq[:, :, height * width] - first_actions_code
- rewards = seq[:, :, height * width + 1] - first_rewards_code - 1
- return states, actions, rewards
+ actions = code2action(seq[:, :, height * width + 1])
+ rewards = code2reward(seq[:, :, height * width + 2])
+ return lookahead_rewards, states, actions, rewards
+
+
+def seq2str(seq):
+ def token2str(t):
+ if t >= first_states_code and t < first_states_code + nb_states_codes:
+ return " #@$"[t - first_states_code]
+ elif t >= first_actions_code and t < first_actions_code + nb_actions_codes:
+ return "ISNEW"[t - first_actions_code]
+ elif t >= first_rewards_code and t < first_rewards_code + nb_rewards_codes:
+ return "-0+"[t - first_rewards_code]
+ elif (
+ t >= first_lookahead_rewards_code
+ and t < first_lookahead_rewards_code + nb_lookahead_rewards_codes
+ ):
+ return "n.p"[t - first_lookahead_rewards_code]
+ else:
+ return "?"
+
+ return ["".join([token2str(x.item()) for x in row]) for row in seq]