self.train_input = self.tensorize(self.train_descr)
self.test_input = self.tensorize(self.test_descr)
self.train_input = self.tensorize(self.train_descr)
self.test_input = self.tensorize(self.test_descr)
assert split in {"train", "test"}
input = self.train_input if split == "train" else self.test_input
for batch in tqdm.tqdm(
assert split in {"train", "test"}
input = self.train_input if split == "train" else self.test_input
for batch in tqdm.tqdm(
self.t_nul = self.token2id["#"]
self.t_true = self.token2id["true"]
self.t_false = self.token2id["false"]
self.t_nul = self.token2id["#"]
self.t_true = self.token2id["true"]
self.t_false = self.token2id["false"]
assert split in {"train", "test"}
input = self.train_input if split == "train" else self.test_input
for batch in tqdm.tqdm(
assert split in {"train", "test"}
input = self.train_input if split == "train" else self.test_input
for batch in tqdm.tqdm(
assert split in {"train", "test"}
input = self.train_input if split == "train" else self.test_input
for batch in tqdm.tqdm(
assert split in {"train", "test"}
input = self.train_input if split == "train" else self.test_input
for batch in tqdm.tqdm(
def batches(self, split="train", nb_to_use=-1, desc=None):
assert split in {"train", "test"}
def batches(self, split="train", nb_to_use=-1, desc=None):
assert split in {"train", "test"}
result[:, self.world.it_len :] = -1
# Set the lookahead_reward of the firs to UNKNOWN
result[:, self.world.index_lookahead_reward] = self.world.lookahead_reward2code(
result[:, self.world.it_len :] = -1
# Set the lookahead_reward of the firs to UNKNOWN
result[:, self.world.index_lookahead_reward] = self.world.lookahead_reward2code(
ar_mask = (t >= u + self.world.index_states).long() * (
t < u + self.world.index_states + self.world.state_len
).long()
ar_mask = (t >= u + self.world.index_states).long() * (
t < u + self.world.index_states + self.world.state_len
).long()
# Generate the action and reward with lookahead_reward to +1
result[
:, u + self.world.index_lookahead_reward
# Generate the action and reward with lookahead_reward to +1
result[
:, u + self.world.index_lookahead_reward
# Set the lookahead_reward to UNKNOWN for the next iterations
result[
:, u + self.world.index_lookahead_reward
# Set the lookahead_reward to UNKNOWN for the next iterations
result[
:, u + self.world.index_lookahead_reward
filename = os.path.join(result_dir, f"test_thinking_compute_{n_epoch:04d}.txt")
with open(filename, "w") as f:
filename = os.path.join(result_dir, f"test_thinking_compute_{n_epoch:04d}.txt")
with open(filename, "w") as f:
for s in snapshots:
lr, s, a, r = self.world.seq2episodes(
s[n : n + 1],
for s in snapshots:
lr, s, a, r = self.world.seq2episodes(
s[n : n + 1],