+ ######################################################################
+
+ def logproba_of_solutions(self, models, c_quizzes):
+ logproba = c_quizzes.new_zeros(
+ c_quizzes.size(0), len(models), device=self.device, dtype=torch.float32
+ )
+
+ for model in models:
+ with torch.autograd.no_grad():
+ t = model.training
+ model.eval()
+
+ for input, l in zip(
+ c_quizzes.split(self.batch_size), logproba.split(self.batch_size)
+ ):
+ input = input.to(self.device)
+ ar_mask = self.make_ar_mask(input)
+ output = model(mygpt.BracketedSequence(input)).x
+ ce = (
+ F.cross_entropy(output.transpose(1, 2), input, reduction="none")
+ * ar_mask
+ )
+ l[:, model.id] = -ce.sum(dim=-1)
+
+ model.train(t)
+
+ return logproba.to("cpu")