+ self.train_input = seq[:nb_train_samples]
+ self.train_q_test_set = q_test_set[:nb_train_samples]
+ self.train_ref_test_errors = test_error[:nb_train_samples]
+ self.test_input = seq[nb_train_samples:]
+ self.test_q_test_set = q_test_set[nb_train_samples:]
+ self.test_ref_test_errors = test_error[nb_train_samples:]
+
+ filename = os.path.join(result_dir, f"train_errors_ref.dat")
+ with open(filename, "w") as f:
+ for e in self.train_ref_test_errors:
+ f.write(f"{e}\n")
+
+ filename = os.path.join(result_dir, f"test_errors_ref.dat")
+ with open(filename, "w") as f:
+ for e in self.test_ref_test_errors:
+ f.write(f"{e}\n")
+
+ self.nb_codes = max(self.train_input.max(), self.test_input.max()) + 1
+
+ def batches(self, split="train"):
+ assert split in {"train", "test"}
+ input = self.train_input if split == "train" else self.test_input
+ for batch in tqdm.tqdm(
+ input.split(self.batch_size), dynamic_ncols=True, desc=f"epoch-{split}"
+ ):
+ yield batch
+
+ def vocabulary_size(self):
+ return self.nb_codes
+
+ def produce_results(
+ self, n_epoch, model, result_dir, logger, deterministic_synthesis
+ ):
+ correct = self.test_input[:1000]
+ result = correct.clone()
+ ar_mask = (
+ torch.arange(result.size(1), device=result.device)
+ > self.nb_samples_per_mlp * 3 + 1
+ ).long()[None, :]
+ ar_mask = ar_mask.expand_as(result)
+ result *= 1 - ar_mask # paraaaaanoiaaaaaaa
+
+ masked_inplace_autoregression(
+ model,
+ self.batch_size,
+ result,
+ ar_mask,
+ deterministic_synthesis,
+ device=self.device,