Update.
authorFrançois Fleuret <francois@fleuret.org>
Tue, 26 Mar 2024 08:30:05 +0000 (09:30 +0100)
committerFrançois Fleuret <francois@fleuret.org>
Tue, 26 Mar 2024 08:30:05 +0000 (09:30 +0100)
escape.py
tasks.py

index 7596bea..5f34cd1 100755 (executable)
--- a/escape.py
+++ b/escape.py
@@ -242,7 +242,7 @@ def episodes2str(
         def status_bar(a, r, lr=None):
             a, r = a.item(), r.item()
             sb_a = "ISNEW"[a] if a >= 0 and a < 5 else "?"
-            sb_r = "- +"[r + 1] if r in {-1, 0, 1} else "?"
+            sb_r = "- +U"[r + 1] if r in {-1, 0, 1, 2} else "?"
             if lr is None:
                 sb_lr = ""
             else:
index 870ab95..8e8faa9 100755 (executable)
--- a/tasks.py
+++ b/tasks.py
@@ -1951,8 +1951,11 @@ class Escape(Task):
         # Generate iteration after iteration
 
         result = self.test_input[:250].clone()
+        # Erase all the content but that of the first iteration
         result[:, self.it_len :] = -1
+        # Set the lookahead_reward of the firs to UNKNOWN
         result[:, self.index_lookahead_reward] = escape.lookahead_reward2code(2)
+
         t = torch.arange(result.size(1), device=result.device)[None, :]
 
         for u in tqdm.tqdm(