Update.

author François Fleuret <francois@fleuret.org>

Sun, 23 Jun 2024 21:01:56 +0000 (23:01 +0200)

committer François Fleuret <francois@fleuret.org>

Sun, 23 Jun 2024 21:01:56 +0000 (23:01 +0200)
author François Fleuret <francois@fleuret.org>
Sun, 23 Jun 2024 21:01:56 +0000 (23:01 +0200)
committer François Fleuret <francois@fleuret.org>
Sun, 23 Jun 2024 21:01:56 +0000 (23:01 +0200)
diff --git a/main.py b/main.py

index 683c07d..a021a71 100755 (executable)
--- a/main.py
+++ b/main.py
@@ -335,23 +335,30 @@ def create_quizzes(
      task,
      nb_for_train=1000,
      nb_for_test=100,
+    desired_average_logits=None,
  ):
      kept = []
+    nb_generated_tokens, sum_logits = 0, 0
  
      while sum([x.size(0) for x in kept]) < nb_for_train + nb_for_test:
-        new_quizzes, nb_correct = task.create_new_quizzes(
+        nb_to_generate = 4 * (nb_for_train + nb_for_test)
+        new_quizzes, nb_correct, average_logits = task.create_new_quizzes(
              n_epoch=n_epoch,
              result_dir=args.result_dir,
              logger=log_string,
-            nb=4 * (nb_for_train + nb_for_test),
+            nb=nb_to_generate,
              model=model,
              other_models=other_models,
+            desired_average_logits=desired_average_logits,
          )
  
-        print(nb_correct)
+        nb_generated_tokens += new_quizzes.numel()
+        sum_logits += average_logits * new_quizzes.numel()
  
          to_keep = new_quizzes[nb_correct == len(other_models) - 1]
-        log_string(f"keep {to_keep.size(0)} quizzes")
+        log_string(
+            f"keep {to_keep.size(0)}/{new_quizzes.size(0)} quizzes ({to_keep.size(0)*100/new_quizzes.size(0):.02f}%)"
+        )
          kept.append(to_keep)
  
      new_quizzes = torch.cat(kept, dim=0)[: nb_for_train + nb_for_test]
@@ -366,6 +373,8 @@ def create_quizzes(
          log_string,
      )
  
+    return sum_logits / nb_generated_tokens
+
  
  ######################################################################
  
@@ -403,6 +412,8 @@ if args.check:
      nb_new_quizzes_for_train = 10
      nb_new_quizzes_for_test = 10
  
+desired_average_logits = None
+
  for n_epoch in range(args.nb_epochs):
      a = [(model.id, float(model.main_test_accuracy)) for model in models]
      a.sort(key=lambda p: p[0])
@@ -428,18 +439,31 @@ for n_epoch in range(args.nb_epochs):
      # test it
      run_tests(model, task, deterministic_synthesis=False)
  
+    log_string(
+        f"test_set_composition world {task.nb_batch_samples_world} quizzes {task.nb_batch_samples_quizzes}"
+    )
+
      if min([m.main_test_accuracy for m in models]) >= accuracy_to_make_quizzes:
          other_models = models.copy()
          other_models.remove(model)
  
-        create_quizzes(
+        average_logits = create_quizzes(
              model,
              other_models,
              task,
              nb_for_train=nb_new_quizzes_for_train,
              nb_for_test=nb_new_quizzes_for_test,
+            desired_average_logits=desired_average_logits,
          )
  
+        # We keep the first average logits as a reference
+        if desired_average_logits is None:
+            desired_average_logits = average_logits
+        else:
+            log_string(
+                f"desired_average_logits {desired_average_logits} average_logits {average_logits}"
+            )
+
          # We update everyone
          for model in models:
              run_tests(model, task, deterministic_synthesis=False)
diff --git a/mygpt.py b/mygpt.py

index 131c822..3bb3519 100755 (executable)
--- a/mygpt.py
+++ b/mygpt.py
@@ -279,10 +279,12 @@ class MyGPT(nn.Module):
          self,
          input,
          ar_mask,
+        temperature=1.0,
          deterministic_synthesis=False,
          forbidden_tokens=None,
          forced_biases=None,
      ):
+        sum_logits = 0
          to_generate = (ar_mask.sum(0) > 0).nonzero()
          if to_generate.min() > 0:
              self(
@@ -300,8 +302,13 @@ class MyGPT(nn.Module):
              else:
                  dist = torch.distributions.categorical.Categorical(logits=logits)
                  t_next = dist.sample()
+                sum_logits += logits.log_softmax(dim=-1)[
+                    torch.arange(t_next.size(0)), t_next
+                ]
              input[:, s] = ar_mask[:, s] * t_next + (1 - ar_mask[:, s]) * input[:, s]
  
+        return sum_logits
+
      def record_attention(self, v=True):
          for m in self.modules():
              if isinstance(m, QKVAttention):
diff --git a/tasks.py b/tasks.py

index 2c88333..cdf8f9e 100755 (executable)
--- a/tasks.py
+++ b/tasks.py
@@ -22,6 +22,7 @@ def masked_inplace_autoregression(
      batch_size,
      input,
      ar_mask,
+    temperature,
      deterministic_synthesis,
      forbidden_tokens=None,
      logit_biases=None,
@@ -44,17 +45,22 @@ def masked_inplace_autoregression(
          t = model.training
          model.eval()
  
+        sum_logits = 0
+
          for input, ar_mask in batches:
-            model.masked_inplace_autoregression(
-                input,
-                ar_mask,
-                deterministic_synthesis,
-                forbidden_tokens,
-                logit_biases,
+            sum_logits += model.masked_inplace_autoregression(
+                input=input,
+                ar_mask=ar_mask,
+                temperature=temperature,
+                deterministic_synthesis=deterministic_synthesis,
+                forbidden_tokens=forbidden_tokens,
+                forced_biases=logit_biases,
              )
  
          model.train(t)
  
+        return sum_logits
+
  
  ######################################################################
  
@@ -79,7 +85,7 @@ import world
  
  class World(Task):
      def save_image(self, input, result_dir, filename, logger):
-        img = world.sample2img(input.to("cpu"), self.height, self.width)
+        img = world.seq2img(input.to("cpu"), self.height, self.width)
          image_name = os.path.join(result_dir, filename)
          torchvision.utils.save_image(img.float() / 255.0, image_name, nrow=6, padding=4)
          logger(f"wrote {image_name}")
@@ -167,11 +173,12 @@ class World(Task):
              result = input.clone() * (1 - ar_mask)
  
              masked_inplace_autoregression(
-                model,
-                self.batch_size,
-                result,
-                ar_mask,
-                deterministic_synthesis,
+                model=model,
+                batch_size=self.batch_size,
+                input=result,
+                ar_mask=ar_mask,
+                temperature=1.0,
+                deterministic_synthesis=deterministic_synthesis,
                  progress_bar_desc=None,
                  device=self.device,
              )
@@ -205,11 +212,12 @@ class World(Task):
          result = input.clone() * (1 - ar_mask)
  
          masked_inplace_autoregression(
-            model,
-            self.batch_size,
-            result,
-            ar_mask,
-            deterministic_synthesis,
+            model=model,
+            batch_size=self.batch_size,
+            input=result,
+            ar_mask=ar_mask,
+            temperature=1.0,
+            deterministic_synthesis=deterministic_synthesis,
              progress_bar_desc=None,
              device=self.device,
          )
@@ -245,6 +253,7 @@ class World(Task):
          nb,
          model,
          other_models,
+        desired_average_logits=None,
      ):
          ###############################################################
          # Generate quizzes with model
@@ -254,16 +263,32 @@ class World(Task):
          )
          ar_mask = torch.full(quizzes.size(), 1, device=self.device)
  
-        masked_inplace_autoregression(
-            model,
-            self.batch_size,
-            quizzes,
-            ar_mask,
+        sum_logits = masked_inplace_autoregression(
+            model=model,
+            batch_size=self.batch_size,
+            input=quizzes,
+            ar_mask=ar_mask,
+            temperature=1.0,
              deterministic_synthesis=False,
              progress_bar_desc="creating quizzes",
              device=self.device,
          )
  
+        average_logits = sum_logits / quizzes.numel()
+
+        if desired_average_logits is not None:
+            temperature = average_logits / desired_average_logits
+            masked_inplace_autoregression(
+                model=model,
+                batch_size=self.batch_size,
+                input=quizzes,
+                ar_mask=ar_mask,
+                temperature=temperature,
+                deterministic_synthesis=False,
+                progress_bar_desc="creating quizzes",
+                device=self.device,
+            )
+
          ###############################################################
          # Create the reverse quizzes
  
@@ -288,10 +313,11 @@ class World(Task):
              result = quizzes.clone()
  
              masked_inplace_autoregression(
-                m,
-                self.batch_size,
-                result,
-                ar_mask,
+                model=m,
+                batch_size=self.batch_size,
+                input=result,
+                ar_mask=ar_mask,
+                temperature=1.0,
                  deterministic_synthesis=True,
                  progress_bar_desc="solving quizzes",
                  device=self.device,
@@ -302,10 +328,11 @@ class World(Task):
              reverse_result = reverse_quizzes.clone()
  
              masked_inplace_autoregression(
-                m,
-                self.batch_size,
-                reverse_result,
-                ar_mask,
+                model=m,
+                batch_size=self.batch_size,
+                input=reverse_result,
+                ar_mask=ar_mask,
+                temperature=1.0,
                  deterministic_synthesis=True,
                  progress_bar_desc="solving reversed quizzes",
                  device=self.device,
@@ -324,4 +351,4 @@ class World(Task):
              for k in nb_correct:
                  f.write(f"{k}\n")
  
-        return quizzes, nb_correct.sum(dim=0)
+        return quizzes, nb_correct.sum(dim=0), average_logits
diff --git a/world.py b/world.py

index 839f4ff..36aa1e9 100755 (executable)
--- a/world.py
+++ b/world.py
@@ -41,16 +41,15 @@ token2char = "_" + "".join([chr(ord("A") + n) for n in range(len(colors) - 1)])
  
  
  def generate_seq(
-    nb,
-    height,
-    width,
-    nb_birds=3,
-    nb_iterations=2,
+    nb, height, width, nb_birds=3, nb_iterations=2, return_iterations=False
  ):
      pairs = []
+    kept_iterations = []
  
      for _ in tqdm.tqdm(range(nb), dynamic_ncols=True, desc="world generation"):
          while True:
+            iterations = []
+
              f_start = torch.zeros(height, width, dtype=torch.int64)
  
              i, j, vi, vj = (
@@ -90,6 +89,7 @@ def generate_seq(
              f_end = f_start.clone()
  
              for l in range(nb_iterations):
+                iterations.append(f_end.clone())
                  f_end[...] = 0
                  nb_collisions = 0
                  for n in range(nb_birds):
@@ -125,9 +125,12 @@ def generate_seq(
                      f_end[i[n] - vi[n], j[n]] = c
                      f_end[i[n], j[n] - vj[n]] = c
  
+            iterations.append(f_end.clone())
+
              if nb_collisions == 0:
                  break
  
+        kept_iterations.append(iterations)
          pairs.append((f_start, f_end))
  
      result = []
@@ -147,7 +150,11 @@ def generate_seq(
                  )[None, :]
              )
  
-    return torch.cat(result, dim=0)
+    if return_iterations:
+        # iterations = torch.cat([ torch.cat([ x[None, None] for x in l], dim = 1) for l in kept_iterations ], dim=0)
+        return torch.cat(result, dim=0), kept_iterations
+    else:
+        return torch.cat(result, dim=0)
  
  
  ######################################################################
@@ -219,32 +226,33 @@ def generate_seq_old(
      return torch.cat(result, dim=0)
  
  
-def sample2img(seq, height, width, upscale=15):
-    f_first = seq[:, : height * width].reshape(-1, height, width)
-    f_second = seq[:, height * width + 1 :].reshape(-1, height, width)
-    direction = seq[:, height * width]
+def frame2img(x, height, width, upscale=15):
+    x = x.reshape(-1, height, width)
+    m = torch.logical_and(x >= 0, x < first_bird_token + nb_bird_tokens).long()
+    x = colors[x * m].permute(0, 3, 1, 2)
+    s = x.shape
+    x = x[:, :, :, None, :, None].expand(-1, -1, -1, upscale, -1, upscale)
+    x = x.reshape(s[0], s[1], s[2] * upscale, s[3] * upscale)
  
-    def mosaic(x, upscale):
-        x = x.reshape(-1, height, width)
-        m = torch.logical_and(x >= 0, x < first_bird_token + nb_bird_tokens).long()
-        x = colors[x * m].permute(0, 3, 1, 2)
-        s = x.shape
-        x = x[:, :, :, None, :, None].expand(-1, -1, -1, upscale, -1, upscale)
-        x = x.reshape(s[0], s[1], s[2] * upscale, s[3] * upscale)
+    x[:, :, :, torch.arange(0, x.size(3), upscale)] = 0
+    x[:, :, torch.arange(0, x.size(2), upscale), :] = 0
+    x = x[:, :, 1:, 1:]
  
-        x[:, :, :, torch.arange(0, x.size(3), upscale)] = 0
-        x[:, :, torch.arange(0, x.size(2), upscale), :] = 0
-        x = x[:, :, 1:, 1:]
+    for n in range(m.size(0)):
+        for i in range(m.size(1)):
+            for j in range(m.size(2)):
+                if m[n, i, j] == 0:
+                    for k in range(2, upscale - 2):
+                        x[n, :, i * upscale + k, j * upscale + k] = 0
+                        x[n, :, i * upscale + upscale - 1 - k, j * upscale + k] = 0
  
-        for n in range(m.size(0)):
-            for i in range(m.size(1)):
-                for j in range(m.size(2)):
-                    if m[n, i, j] == 0:
-                        for k in range(2, upscale - 2):
-                            x[n, :, i * upscale + k, j * upscale + k] = 0
-                            x[n, :, i * upscale + upscale - 1 - k, j * upscale + k] = 0
+    return x
  
-        return x
+
+def seq2img(seq, height, width, upscale=15):
+    f_first = seq[:, : height * width].reshape(-1, height, width)
+    f_second = seq[:, height * width + 1 :].reshape(-1, height, width)
+    direction = seq[:, height * width]
  
      direction_symbol = torch.full((direction.size(0), height * upscale - 1, upscale), 0)
      direction_symbol = colors[direction_symbol].permute(0, 3, 1, 2)
@@ -278,11 +286,11 @@ def sample2img(seq, height, width, upscale=15):
  
      return torch.cat(
          [
-            mosaic(f_first, upscale),
+            frame2img(f_first, height, width, upscale),
              separator,
              direction_symbol,
              separator,
-            mosaic(f_second, upscale),
+            frame2img(f_second, height, width, upscale),
          ],
          dim=3,
      )
@@ -302,16 +310,28 @@ if __name__ == "__main__":
  
      height, width = 6, 8
      start_time = time.perf_counter()
-    seq = generate_seq(nb=90, height=height, width=width)
+    seq, it = generate_seq(
+        nb=64, height=height, width=width, nb_iterations=100, return_iterations=True
+    )
      delay = time.perf_counter() - start_time
      print(f"{seq.size(0)/delay:02f} samples/s")
  
      print(seq2str(seq[:4]))
  
+    for t in range(len(it[0])):
+        img = torch.cat([frame2img(f[t], height, width) for f in it], dim=0)
+        torchvision.utils.save_image(
+            img.float() / 255.0,
+            f"/tmp/frame_{t:03d}.png",
+            nrow=8,
+            padding=6,
+            pad_value=0,
+        )
+
      # m = (torch.rand(seq.size()) < 0.05).long()
      # seq = (1 - m) * seq + m * 23
  
-    img = sample2img(seq, height, width)
+    img = seq2img(seq, height, width)
      print(img.size())
  
      torchvision.utils.save_image(
author	François Fleuret <francois@fleuret.org>
	Sun, 23 Jun 2024 21:01:56 +0000 (23:01 +0200)
committer	François Fleuret <francois@fleuret.org>
	Sun, 23 Jun 2024 21:01:56 +0000 (23:01 +0200)
main.py		patch \| blob \| history
mygpt.py		patch \| blob \| history
tasks.py		patch \| blob \| history
world.py		patch \| blob \| history