+def run_tests(model, quiz_machine, deterministic_synthesis, local_device=main_device):
+ with torch.autograd.no_grad():
+ model.eval().to(local_device)
+
+ nb_test_samples, acc_test_loss = 0, 0.0
+ nb_samples_accumulated = 0
+
+ for input in quiz_machine.batches(model, split="test"):
+ input = input.to(local_device)
+
+ bs = model(mygpt.BracketedSequence(input))
+ output = bs.x
+
+ loss = F.cross_entropy(output.transpose(1, 2), input)
+
+ acc_test_loss += loss.item() * input.size(0)
+
+ nb_test_samples += input.size(0)
+
+ test_perplexity = math.exp(min(100, acc_test_loss / nb_test_samples))
+
+ log_string(f"test_perplexity {n_epoch} model {model.id} {test_perplexity}")
+
+ model.main_test_accuracy = quiz_machine.produce_results(
+ n_epoch=n_epoch,
+ model=model,
+ result_dir=args.result_dir,
+ deterministic_synthesis=deterministic_synthesis,
+ )
+
+
+def one_epoch(model, quiz_machine, local_device=main_device):
+ model.to(local_device).train()
+
+ optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)
+
+ nb_train_samples, acc_train_loss = 0, 0.0
+
+ for input in quiz_machine.batches(model, split="train"):
+ input = input.to(local_device)
+
+ if nb_train_samples % args.batch_size == 0:
+ optimizer.zero_grad()
+
+ output = model(mygpt.BracketedSequence(input)).x
+ loss = F.cross_entropy(output.transpose(1, 2), input)
+ acc_train_loss += loss.item() * input.size(0)
+
+ nb_train_samples += input.size(0)
+
+ loss.backward()
+
+ if nb_train_samples % args.batch_size == 0:
+ optimizer.step()
+
+ train_perplexity = math.exp(min(100, acc_train_loss / nb_train_samples))
+
+ log_string(f"train_perplexity {n_epoch} model {model.id} {train_perplexity}")
+
+ run_tests(model, quiz_machine, deterministic_synthesis=False)
+
+ model.to(main_device)
+
+
+######################################################################
+
+# This is the key routine that decides what generated quizzes to keep
+
+
+# token_logprobas are NxMxT where M is the number of models
+
+
+def compute_valid_quizzes_(token_logprobas):
+ warnings.warn("validation with uniform constraints", RuntimeWarning)
+ l = token_logprobas.min(dim=-1).values.sort(dim=-1).values
+ return (l[:, 0] < math.log(0.1)) & (l[:, 1] > math.log(0.5))
+
+
+def compute_valid_quizzes(token_logprobas):
+ l = token_logprobas.sum(dim=-1).sort(dim=-1).values
+ return (l[:, 0] < math.log(args.proba_not_understands)) & (
+ l[:, 1] > math.log(args.proba_understands)