def status_bar(a, r, lr=None):
a, r = a.item(), r.item()
sb_a = "ISNEW"[a] if a >= 0 and a < 5 else "?"
- sb_r = "- +"[r + 1] if r in {-1, 0, 1} else "?"
+ sb_r = "- +U"[r + 1] if r in {-1, 0, 1, 2} else "?"
if lr is None:
sb_lr = ""
else:
# Generate iteration after iteration
result = self.test_input[:250].clone()
+ # Erase all the content but that of the first iteration
result[:, self.it_len :] = -1
+ # Set the lookahead_reward of the firs to UNKNOWN
result[:, self.index_lookahead_reward] = escape.lookahead_reward2code(2)
+
t = torch.arange(result.size(1), device=result.device)[None, :]
for u in tqdm.tqdm(