+ masked_inplace_autoregression(
+ model,
+ self.batch_size,
+ result,
+ ar_mask,
+ deterministic_synthesis,
+ progress_bar_desc=None,
+ device=self.device,
+ )
+
+ log_ground_truth = ar_mask.min() == 0
+
+ if logger is not None:
+ for sp, st in zip(result[:10], input[:10]):
+ logger(
+ f"test_sequences {n_epoch} prediction {self.problem.seq2str(sp)}"
+ )
+ if log_ground_truth:
+ logger(
+ f" {n_epoch} ground truth {self.problem.seq2str(st)}"
+ )
+
+ nb_total, nb_correct = self.problem.compute_nb_correct(
+ input, ar_mask, result
+ )
+
+ # nb_total = ar_mask.sum().item()
+ # nb_correct = ((result == input).long() * ar_mask).sum().item()
+
+ return nb_total, nb_correct
+
+ train_nb_total, train_nb_correct = compute_accuracy(
+ self.train_input, self.train_ar_mask
+ )
+
+ logger(
+ f"accuracy_train {n_epoch} nb_total {train_nb_total} nb_correct {train_nb_correct} accuracy {(100.0*train_nb_correct)/train_nb_total:.02f}%"
+ )
+
+ test_nb_total, test_nb_correct = compute_accuracy(
+ self.test_input, self.test_ar_mask, logger
+ )
+
+ logger(
+ f"accuracy_test {n_epoch} nb_total {test_nb_total} nb_correct {test_nb_correct} accuracy {(100.0*test_nb_correct)/test_nb_total:.02f}%"
+ )
+
+ logger(f"main_test_accuracy {n_epoch} {test_nb_correct/test_nb_total}")
+
+ if save_attention_image is not None:
+ for k in range(10):
+ ns = torch.randint(self.test_input.size(0), (1,)).item()
+ input = self.test_input[ns : ns + 1].clone()
+
+ with torch.autograd.no_grad():
+ t = model.training
+ model.eval()
+ # model.record_attention(True)
+ model(BracketedSequence(input))
+ model.train(t)
+ # ram = model.retrieve_attention()
+ # model.record_attention(False)
+
+ # tokens_output = [c for c in self.problem.seq2str(input[0])]
+ # tokens_input = ["n/a"] + tokens_output[:-1]
+ # for n_head in range(ram[0].size(1)):
+ # filename = os.path.join(
+ # result_dir, f"sandbox_attention_{k}_h{n_head}.pdf"
+ # )
+ # attention_matrices = [m[0, n_head] for m in ram]
+ # save_attention_image(
+ # filename,
+ # tokens_input,
+ # tokens_output,
+ # attention_matrices,
+ # k_top=10,
+ ##min_total_attention=0.9,
+ # token_gap=12,
+ # layer_gap=50,
+ # )
+ # logger(f"wrote {filename}")
+
+
+######################################################################
+
+import world
+
+
+class World(Task):
+ def __init__(
+ self,
+ nb_train_samples,
+ nb_test_samples,
+ batch_size,
+ logger=None,
+ device=torch.device("cpu"),
+ ):
+ super().__init__()
+
+ self.batch_size = batch_size
+ self.device = device
+ self.height = 6
+ self.width = 8
+
+ self.train_input = world.generate(
+ nb_train_samples, height=self.height, width=self.width
+ )
+ self.train_ar_mask = (
+ (torch.arange(self.train_input.size(1)) > self.train_input.size(1) // 2)
+ .long()[None, :]
+ .expand_as(self.train_input)
+ )
+
+ self.test_input = world.generate(
+ nb_test_samples, height=self.height, width=self.width
+ )
+ self.test_ar_mask = (
+ (torch.arange(self.test_input.size(1)) > self.test_input.size(1) // 2)
+ .long()[None, :]
+ .expand_as(self.test_input)
+ )
+
+ self.train_input, self.train_ar_mask = self.train_input.to(
+ device
+ ), self.train_ar_mask.to(device)
+ self.test_input, self.test_ar_mask = self.test_input.to(
+ device
+ ), self.test_ar_mask.to(device)
+
+ self.nb_codes = max(self.train_input.max(), self.test_input.max()) + 1
+
+ def batches(self, split="train", nb_to_use=-1, desc=None):
+ assert split in {"train", "test"}
+ input = self.train_input if split == "train" else self.test_input
+ if nb_to_use > 0:
+ input = input[:nb_to_use]
+ if desc is None:
+ desc = f"epoch-{split}"
+ for batch in tqdm.tqdm(
+ input.split(self.batch_size), dynamic_ncols=True, desc=desc
+ ):
+ yield batch
+
+ def vocabulary_size(self):
+ return self.nb_codes
+
+ def produce_results(
+ self, n_epoch, model, result_dir, logger, deterministic_synthesis, nmax=1000
+ ):
+ def compute_accuracy(input, ar_mask, logger=None):
+ input, ar_mask = input[:nmax], ar_mask[:nmax]
+ result = input.clone() * (1 - ar_mask)
+
+ masked_inplace_autoregression(
+ model,
+ self.batch_size,
+ result,
+ ar_mask,
+ deterministic_synthesis,
+ progress_bar_desc=None,
+ device=self.device,
+ )
+
+ nb_total, nb_correct = (
+ input.size(0),
+ (input == result).long().min(dim=1).values.sum(),
+ )
+
+ return nb_total, nb_correct
+
+ train_nb_total, train_nb_correct = compute_accuracy(
+ self.train_input, self.train_ar_mask
+ )
+
+ logger(
+ f"accuracy_train {n_epoch} nb_total {train_nb_total} nb_correct {train_nb_correct} accuracy {(100.0*train_nb_correct)/train_nb_total:.02f}%"
+ )
+
+ test_nb_total, test_nb_correct = compute_accuracy(
+ self.test_input, self.test_ar_mask, logger
+ )
+
+ logger(
+ f"accuracy_test {n_epoch} nb_total {test_nb_total} nb_correct {test_nb_correct} accuracy {(100.0*test_nb_correct)/test_nb_total:.02f}%"
+ )
+
+ logger(f"main_test_accuracy {n_epoch} {test_nb_correct/test_nb_total}")
+
+ ##############################
+
+ input, ar_mask = self.test_input[:64], self.test_ar_mask[:64]
+ result = input.clone() * (1 - ar_mask)
+
+ masked_inplace_autoregression(
+ model,
+ self.batch_size,
+ result,
+ ar_mask,
+ deterministic_synthesis,
+ progress_bar_desc=None,
+ device=self.device,
+ )
+
+ img = world.sample2img(result.to("cpu"), self.height, self.width)
+
+ image_name = os.path.join(result_dir, f"world_result_{n_epoch:04d}.png")
+ torchvision.utils.save_image(img.float() / 255.0, image_name, nrow=8, padding=2)
+ logger(f"wrote {image_name}")
+
+
+######################################################################
+
+import picoclvr
+
+
+class PicoCLVR(Task):
+ # Make a tensor from a list of strings
+ def tensorize(self, descr):
+ token_descr = [s.strip().split(" ") for s in descr]
+ l = max([len(s) for s in token_descr])
+ token_descr = [s + ["<nul>"] * (l - len(s)) for s in token_descr]
+ id_descr = [[self.token2id[u] for u in s] for s in token_descr]
+ return torch.tensor(id_descr, device=self.device)
+
+ # Make a list of strings from a tensor
+ def detensorize(self, x):
+ return [" ".join([self.id2token[t.item()] for t in r]) for r in x]
+
+ # trim all the tensors in the tuple z to remove as much token from
+ # left and right in the first tensor. If z is a tuple, all its
+ # elements are trimed according to the triming for the first
+ def trim(self, z, token="<nul>"):
+ n = self.token2id[token]
+ if type(z) == tuple:
+ x = z[0]
+ i = (1 - (F.pad(x, (1, 1), value=n) == n).min(0).values.long()).cumsum(0)
+ a, b = (i == 0).nonzero().max(), (i == i.max()).nonzero().min()
+ return tuple([t[:, a:b] for t in z])
+ else:
+ i = (1 - (F.pad(z, (1, 1), value=n) == n).min(0).values.long()).cumsum(0)
+ a, b = (i == 0).nonzero().max(), (i == i.max()).nonzero().min()
+ return z[:, a:b]
+
+ ######################
+
+ def __init__(
+ self,
+ nb_train_samples,
+ nb_test_samples,
+ batch_size,
+ height,
+ width,
+ nb_colors=5,
+ logger=None,
+ device=torch.device("cpu"),
+ pruner_train=None,
+ pruner_eval=None,
+ ):
+ super().__init__()
+
+ def generate_descr(nb, cache_suffix, pruner):
+ return picoclvr.generate(
+ nb,
+ height=self.height,
+ width=self.width,
+ nb_colors=nb_colors,
+ pruner=pruner,
+ )
+
+ self.height = height
+ self.width = width
+ self.batch_size = batch_size
+ self.device = device
+ self.pruner_train = pruner_train
+ self.pruner_eval = pruner_eval
+
+ if logger is not None:
+ logger(
+ f"generating {nb_train_samples+nb_test_samples} samples (can take some time)"
+ )
+
+ self.train_descr = generate_descr(
+ nb_train_samples, "train", pruner=self.pruner_train
+ )
+ self.test_descr = generate_descr(nb_test_samples, "test", pruner=None)
+
+ # Build the tokenizer
+ tokens = {"<nul>", "<img>"}
+ for d in [self.train_descr, self.test_descr]:
+ for s in d:
+ for t in s.strip().split(" "):
+ tokens.add(t)
+ # make this set a sorted list to get the same tensors given
+ # the same descr
+ tokens = list(tokens)
+ tokens.sort()
+ self.token2id = dict([(t, n) for n, t in enumerate(tokens)])
+ self.id2token = dict([(n, t) for n, t in enumerate(tokens)])
+ self.t_img, self.t_nul = self.token2id["<img>"], self.token2id["<nul>"]
+
+ # Tokenize the train and test sets
+ self.train_input = self.tensorize(self.train_descr)
+ self.test_input = self.tensorize(self.test_descr)
+
+ def batches(self, split="train", nb_to_use=-1, desc=None):
+ assert split in {"train", "test"}
+ input = self.train_input if split == "train" else self.test_input
+ for batch in tqdm.tqdm(
+ input.split(self.batch_size), dynamic_ncols=True, desc=f"epoch-{split}"
+ ):
+ yield self.trim(batch)
+
+ def vocabulary_size(self):
+ return len(self.token2id)
+
+ def compute_missing_properties(
+ self, n_epoch, model, logger, deterministic_synthesis, pruner=None
+ ):
+ acc_nb_requested_properties = []
+ acc_nb_missing_properties = []
+ acc_nb_results = 0
+
+ for input in tqdm.tqdm(
+ self.test_input.split(self.batch_size),
+ dynamic_ncols=True,
+ desc=f"test-properties",
+ ):
+ result = input.clone()
+ ar_mask = (result == self.t_img).long().cumsum(dim=1).clamp(max=1)
+ result = (1 - ar_mask) * result + ar_mask * self.t_nul
+ masked_inplace_autoregression(
+ model,
+ self.batch_size,
+ result,
+ ar_mask,
+ deterministic_synthesis,
+ progress_bar_desc=None,
+ device=self.device,
+ )
+
+ result_descr = self.detensorize(result)
+ np = picoclvr.nb_properties(
+ result_descr,
+ height=self.height,
+ width=self.width,
+ pruner=pruner,
+ )
+ nb_requested_properties, _, nb_missing_properties = zip(*np)
+ acc_nb_requested_properties += nb_requested_properties
+ acc_nb_missing_properties += nb_missing_properties
+ acc_nb_results += len(result_descr)
+
+ nb_requested_properties = sum(acc_nb_requested_properties)
+ nb_missing_properties = sum(acc_nb_missing_properties)
+
+ prefix = "" if pruner is None else "pruned_"
+ logger(f"nb_{prefix}samples {n_epoch} {acc_nb_results}")
+ logger(
+ f"property_{prefix}nb {n_epoch} requested {sum(acc_nb_requested_properties)} missing {sum(acc_nb_missing_properties)}"
+ )
+ logger(
+ f"property_{prefix}miss {n_epoch} {100*nb_missing_properties/nb_requested_properties:.02f}%"
+ )
+
+ logger(
+ f"main_test_accuracy {n_epoch} {1-nb_missing_properties/nb_requested_properties}"
+ )
+
+ ######################################################################
+
+ def produce_results(
+ self, n_epoch, model, result_dir, logger, deterministic_synthesis
+ ):
+ self.compute_missing_properties(n_epoch, model, logger, deterministic_synthesis)
+
+ if self.pruner_eval is not None:
+ self.compute_missing_properties(n_epoch, model, self.pruner_eval)
+
+ nb_tokens_to_generate = self.height * self.width + 3
+ result_descr = []
+ nb_per_primer = 8
+ primer = []
+
+ for primer_descr in [
+ "red above green <sep> green top <sep> blue right of red",
+ "there is red <sep> there is yellow <sep> there is blue",
+ "red below yellow <sep> yellow below green <sep> green below blue <sep> red right <sep> yellow left <sep> green right <sep> blue left",
+ "green bottom <sep> yellow bottom <sep> green left of blue <sep> yellow right of blue <sep> blue top",
+ ]:
+ primer += [primer_descr + " <img>"] * nb_per_primer
+
+ result = self.tensorize(primer)
+ fill = result.new_full(
+ result.size()[:-1] + (self.height * self.width + 1,), self.t_nul
+ )
+ result = torch.cat((result, fill), 1)
+ ar_mask = (result == self.t_nul).long()
+ masked_inplace_autoregression(
+ model,
+ self.batch_size,
+ result,
+ ar_mask,
+ deterministic_synthesis,
+ device=self.device,
+ )
+ result_descr = self.detensorize(result)
+
+ np = picoclvr.nb_properties(result_descr, height=self.height, width=self.width)
+
+ acc_nb_requested_properties, _, acc_nb_missing_properties = zip(*np)
+ acc_nb_results = len(result_descr)
+
+ nb_requested_properties = sum(acc_nb_requested_properties)
+ nb_missing_properties = sum(acc_nb_missing_properties)
+
+ prefix = "demo_"
+ logger(f"nb_{prefix}samples {n_epoch} {acc_nb_results}")
+ logger(
+ f"property_{prefix}nb {n_epoch} requested {sum(acc_nb_requested_properties)} missing {sum(acc_nb_missing_properties)}"
+ )
+ logger(
+ f"property_{prefix}miss {n_epoch} {100*nb_missing_properties/nb_requested_properties:.02f}%"
+ )
+
+ img = picoclvr.descr2img(result_descr, height=self.height, width=self.width)
+
+ if img.dim() == 5:
+ if img.size(1) == 1:
+ img = F.pad(img.squeeze(1), pad=(1, 1, 1, 1), value=64)
+ else:
+ img = torch.cat(
+ [
+ torchvision.utils.make_grid(x, padding=1, pad_value=64)[None]
+ for x in img
+ ],
+ 0,
+ )
+
+ image_name = os.path.join(result_dir, f"picoclvr_result_{n_epoch:04d}.png")
+ torchvision.utils.save_image(
+ img / 255.0, image_name, nrow=nb_per_primer, padding=1, pad_value=0.0
+ )
+ logger(f"wrote {image_name}")
+
+
+######################################################################
+
+
+class MNIST(Task):
+ def __init__(
+ self, nb_train_samples, nb_test_samples, batch_size, device=torch.device("cpu")
+ ):
+ super().__init__()
+
+ self.nb_train_samples = (nb_train_samples,)
+ self.nb_test_samples = (nb_test_samples,)
+ self.batch_size = batch_size
+ self.device = device
+ data_set = torchvision.datasets.MNIST(root="./data", train=True, download=True)
+ self.train_input = data_set.data[:nb_train_samples].view(-1, 28 * 28).long()
+ data_set = torchvision.datasets.MNIST(root="./data", train=False, download=True)
+ self.test_input = data_set.data[:nb_test_samples].view(-1, 28 * 28).long()
+
+ def batches(self, split="train", nb_to_use=-1, desc=None):
+ assert split in {"train", "test"}
+ input = self.train_input if split == "train" else self.test_input
+ if nb_to_use > 0:
+ input = input[:nb_to_use]
+ if desc is None:
+ desc = f"epoch-{split}"
+ for batch in tqdm.tqdm(
+ input.split(self.batch_size), dynamic_ncols=True, desc=desc
+ ):
+ yield batch
+
+ def vocabulary_size(self):
+ return 256
+
+ def produce_results(
+ self, n_epoch, model, result_dir, logger, deterministic_synthesis
+ ):
+ results = torch.empty(64, 28 * 28, device=self.device, dtype=torch.int64)
+ ar_mask = torch.full_like(results, 1)
+ masked_inplace_autoregression(
+ model,
+ self.batch_size,
+ results,
+ ar_mask,
+ deterministic_synthesis,
+ device=self.device,
+ )
+ image_name = os.path.join(result_dir, f"mnist_result_{n_epoch:04d}.png")
+ torchvision.utils.save_image(
+ 1 - results.reshape(-1, 1, 28, 28) / 255.0,
+ image_name,
+ nrow=16,
+ pad_value=0.8,
+ )
+ logger(f"wrote {image_name}")
+
+
+######################################################################
+
+import maze
+
+
+class Maze(Task):
+ def map2seq(self, *m):
+ return torch.cat([x.flatten(1) for x in m], 1)
+
+ def seq2map(self, s):
+ s = s.reshape(s.size(0), -1, self.height, self.width)
+ return (s[:, k] for k in range(s.size(1)))
+
+ def __init__(
+ self,
+ nb_train_samples,
+ nb_test_samples,
+ batch_size,
+ height,
+ width,
+ nb_walls,
+ device=torch.device("cpu"),
+ ):
+ super().__init__()
+
+ self.batch_size = batch_size
+ self.height = height
+ self.width = width
+ self.device = device
+
+ train_mazes, train_paths, _ = maze.create_maze_data(
+ nb_train_samples,
+ height=height,
+ width=width,
+ nb_walls=nb_walls,
+ progress_bar=lambda x: tqdm.tqdm(x, dynamic_ncols=True, desc=f"data-train"),
+ )
+ self.train_input = self.map2seq(train_mazes.to(device), train_paths.to(device))
+
+ test_mazes, test_paths, _ = maze.create_maze_data(
+ nb_test_samples,
+ height=height,
+ width=width,
+ nb_walls=nb_walls,
+ progress_bar=lambda x: tqdm.tqdm(x, dynamic_ncols=True, desc=f"data-test"),
+ )
+ self.test_input = self.map2seq(test_mazes.to(device), test_paths.to(device))
+
+ self.nb_codes = max(self.train_input.max(), self.test_input.max()) + 1
+
+ def batches(self, split="train", nb_to_use=-1, desc=None):
+ assert split in {"train", "test"}
+ input = self.train_input if split == "train" else self.test_input
+ if nb_to_use > 0:
+ input = input[:nb_to_use]
+ if desc is None:
+ desc = f"epoch-{split}"
+ for batch in tqdm.tqdm(
+ input.split(self.batch_size), dynamic_ncols=True, desc=desc
+ ):
+ yield batch
+
+ def vocabulary_size(self):
+ return self.nb_codes
+
+ def compute_error(
+ self, model, split="train", nb_to_use=-1, deterministic_synthesis=False
+ ):
+ model_device = next(model.parameters()).device
+ nb_total, nb_correct = 0, 0
+ count = torch.zeros(
+ self.width * self.height,
+ self.width * self.height,
+ device=model_device,
+ dtype=torch.int64,
+ )
+
+ for input in self.batches(split, nb_to_use):
+ input = input.to(model_device)
+ result = input.clone()
+ ar_mask = result.new_zeros(result.size())
+ ar_mask[:, self.height * self.width :] = 1
+ result *= 1 - ar_mask
+ masked_inplace_autoregression(
+ model,
+ self.batch_size,
+ result,
+ ar_mask,
+ deterministic_synthesis,
+ progress_bar_desc=None,
+ device=self.device,
+ )
+ mazes, paths = self.seq2map(result)
+ path_correctness = maze.path_correctness(mazes, paths)
+ nb_correct += path_correctness.long().sum()
+ nb_total += mazes.size(0)
+
+ optimal_path_lengths = (
+ (input[:, self.height * self.width :] == maze.v_path).long().sum(1)
+ )
+ predicted_path_lengths = (
+ (result[:, self.height * self.width :] == maze.v_path).long().sum(1)
+ )
+ optimal_path_lengths = optimal_path_lengths[path_correctness]
+ predicted_path_lengths = predicted_path_lengths[path_correctness]
+ count[optimal_path_lengths, predicted_path_lengths] += 1