From: François Fleuret <francois@fleuret.org>
Date: Mon, 24 Jun 2024 10:13:01 +0000 (+0200)
Subject: Update.
X-Git-Url: https://fleuret.org/cgi-bin/gitweb/gitweb.cgi?a=commitdiff_plain;h=702e672dcf9ebcfad11ae4034e64117f2c67ead5;p=culture.git

Update.
---

diff --git a/mygpt.py b/mygpt.py
index a178491..c58bea1 100755
--- a/mygpt.py
+++ b/mygpt.py
@@ -292,11 +292,16 @@ class MyGPT(nn.Module):
             )  # Needed to initialize the model's cache
         for s in range(to_generate.min(), to_generate.max() + 1):
             output = self(BracketedSequence(input, s, 1)).x
-            logits = output[:, s] / temperature
+            logits = output[:, s]
+
+            logits = logits.log_softmax(dim=-1) / temperature
+
             if forbidden_tokens is not None:
                 logits = logits.masked_fill(forbidden_tokens, float("-inf"))
+
             if forced_biases is not None:
                 logits = logits + forced_biases[None, :]
+
             if deterministic_synthesis:
                 t_next = logits.argmax(1)
             else:
diff --git a/tasks.py b/tasks.py
index b967465..5edb472 100755
--- a/tasks.py
+++ b/tasks.py
@@ -274,6 +274,10 @@ class World(Task):
             device=self.device,
         )
 
+        # Should not be necessary though, the autoregression is done
+        # in eval mode
+        sum_logits = sum_logits.detach()
+
         average_logits = sum_logits / quizzes.numel()
 
         # It's a bit brutal to do it twice, we should probably have a