"--task",
type=str,
default="twotargets",
- help="file, byheart, learnop, guessop, mixing, memory, twotargets, addition, picoclvr, mnist, maze, snake, stack, expr, rpl, grid, qmlp",
+ help="file, byheart, learnop, guessop, mixing, memory, twotargets, addition, picoclvr, mnist, maze, snake, stack, expr, rpl, grid, qmlp, escape",
)
parser.add_argument("--log_filename", type=str, default="train.log", help=" ")
parser.add_argument("--mixing_deterministic_start", action="store_true", default=False)
+##############################
+# escape options
+
+parser.add_argument("--escape_height", type=int, default=4)
+
+parser.add_argument("--escape_width", type=int, default=6)
+
+parser.add_argument("--escape_T", type=int, default=20)
+
######################################################################
args = parser.parse_args()
"nb_train_samples": 60000,
"nb_test_samples": 10000,
},
+ "escape": {
+ "model": "37M",
+ "batch_size": 25,
+ "nb_train_samples": 25000,
+ "nb_test_samples": 10000,
+ },
}
if args.task in default_task_args:
device=device,
)
+elif args.task == "escape":
+ task = tasks.Escape(
+ nb_train_samples=args.nb_train_samples,
+ nb_test_samples=args.nb_test_samples,
+ batch_size=args.batch_size,
+ height=args.escape_height,
+ width=args.escape_width,
+ T=args.escape_T,
+ logger=log_string,
+ device=device,
+ )
+
else:
raise ValueError(f"Unknown task {args.task}")
######################################################################
+
+import escape
+
+
+class Escape(Task):
+ def __init__(
+ self,
+ nb_train_samples,
+ nb_test_samples,
+ batch_size,
+ height,
+ width,
+ T,
+ logger=None,
+ device=torch.device("cpu"),
+ ):
+ super().__init__()
+
+ self.batch_size = batch_size
+ self.device = device
+
+ states, actions, rewards = escape.generate_episodes(
+ nb_train_samples + nb_test_samples, height, width, T
+ )
+ seq = escape.episodes2seq(states, actions, rewards)
+ self.train_input = seq[:nb_train_samples]
+ self.test_input = seq[nb_train_samples:]
+
+ self.nb_codes = max(self.train_input.max(), self.test_input.max()) + 1
+
+ # if logger is not None:
+ # for s, a in zip(self.train_input[:100], self.train_ar_mask[:100]):
+ # logger(f"train_sequences {self.problem.seq2str(s)}")
+ # a = "".join(["01"[x.item()] for x in a])
+ # logger(f" {a}")
+
+ def batches(self, split="train", nb_to_use=-1, desc=None):
+ assert split in {"train", "test"}
+ input = self.train_input if split == "train" else self.test_input
+ if nb_to_use > 0:
+ input = input[:nb_to_use]
+ if desc is None:
+ desc = f"epoch-{split}"
+ for batch in tqdm.tqdm(
+ input.split(self.batch_size), dynamic_ncols=True, desc=desc
+ ):
+ yield batch
+
+ def vocabulary_size(self):
+ return self.nb_codes
+
+ def produce_results(
+ self, n_epoch, model, result_dir, logger, deterministic_synthesis, nmax=1000
+ ):
+ pass
+
+
+######################################################################