+def oneshot_policy_loss(mazes, output, policies, height, width):
+ masks = (mazes == maze.v_empty).unsqueeze(-1)
+ targets = policies.permute(0, 2, 1) * masks
+ output = output * masks
+ return -(output.log_softmax(-1) * targets).sum() / masks.sum()
+
+
+def oneshot_trace_loss(mazes, output, policies, height, width):
+ masks = mazes == maze.v_empty
+ targets = maze.stationary_densities(
+ mazes.view(-1, height, width), policies.view(-1, 4, height, width)
+ ).flatten(-2)
+ targets = targets * masks
+ output = output.squeeze(-1) * masks
+ return (output - targets).abs().sum() / masks.sum()
+
+
+def oneshot(gpt, task):