- lr, _, _, _ = escape.seq2episodes(result[:, :u], self.height, self.width)
-
- # Generate the lookahead_reward and state
- ar_mask = (t % it_len == index_lookahead_reward).long() * (
- t <= u + index_lookahead_reward
- ).long()
- ar(result, ar_mask)
-
- # Generate the lookahead_reward and state
- ar_mask = (t >= u + index_states).long() * (
- t < u + index_states + state_len
+ # Generate the next state but keep the initial one, the
+ # lookahead_reward of previous iterations are set to
+ # UNKNOWN
+ if u > 0:
+ result[
+ :, u + self.world.index_lookahead_reward
+ ] = self.world.lookahead_reward2code(2)
+ ar_mask = (t >= u + self.world.index_states).long() * (
+ t < u + self.world.index_states + self.world.state_len
+ ).long()
+ ar(result, ar_mask)
+
+ # Generate the action and reward with lookahead_reward to +1
+ result[
+ :, u + self.world.index_lookahead_reward
+ ] = self.world.lookahead_reward2code(1)
+ ar_mask = (t >= u + self.world.index_reward).long() * (
+ t <= u + self.world.index_action