def batches(self, split="train", nb_to_use=-1, desc=None):
assert split in {"train", "test"}
def batches(self, split="train", nb_to_use=-1, desc=None):
assert split in {"train", "test"}
result[:, self.world.it_len :] = -1
# Set the lookahead_reward of the firs to UNKNOWN
result[:, self.world.index_lookahead_reward] = self.world.lookahead_reward2code(
result[:, self.world.it_len :] = -1
# Set the lookahead_reward of the firs to UNKNOWN
result[:, self.world.index_lookahead_reward] = self.world.lookahead_reward2code(
ar_mask = (t >= u + self.world.index_states).long() * (
t < u + self.world.index_states + self.world.state_len
).long()
ar_mask = (t >= u + self.world.index_states).long() * (
t < u + self.world.index_states + self.world.state_len
).long()
# Generate the action and reward with lookahead_reward to +1
result[
:, u + self.world.index_lookahead_reward
# Generate the action and reward with lookahead_reward to +1
result[
:, u + self.world.index_lookahead_reward
# Set the lookahead_reward to UNKNOWN for the next iterations
result[
:, u + self.world.index_lookahead_reward
# Set the lookahead_reward to UNKNOWN for the next iterations
result[
:, u + self.world.index_lookahead_reward
filename = os.path.join(result_dir, f"test_thinking_compute_{n_epoch:04d}.txt")
with open(filename, "w") as f:
filename = os.path.join(result_dir, f"test_thinking_compute_{n_epoch:04d}.txt")
with open(filename, "w") as f: