projects
/
mygptrnn.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
aa09a88
)
Update.
author
François Fleuret
<francois@fleuret.org>
Sun, 7 Jan 2024 15:21:02 +0000
(16:21 +0100)
committer
François Fleuret
<francois@fleuret.org>
Sun, 7 Jan 2024 15:21:02 +0000
(16:21 +0100)
mygpt.py
patch
|
blob
|
history
diff --git
a/mygpt.py
b/mygpt.py
index
6e13ff8
..
5ea927e
100755
(executable)
--- a/
mygpt.py
+++ b/
mygpt.py
@@
-656,23
+656,14
@@
class Caterpillar(nn.Module):
self.rec_K[:, :, t0:t1] = next_K.flatten(2, 3)
if self.training and self.proba_flashback:
self.rec_K[:, :, t0:t1] = next_K.flatten(2, 3)
if self.training and self.proba_flashback:
+ # insert_flash_back(self.rec_V,V,self.rec_K,K,t0,t1,CL,proba=self.proba_flashback / CL,)
+
# This piece of code makes the assumption that there is
# nothing informative before t0, otherwise we'd have to
# implement a cache for V and K too. This should not be
# too much of a problem since this is used only during
# train, where full sequence are available
# This piece of code makes the assumption that there is
# nothing informative before t0, otherwise we'd have to
# implement a cache for V and K too. This should not be
# too much of a problem since this is used only during
# train, where full sequence are available
- # insert_flash_back(
- # self.rec_V,
- # V,
- # self.rec_K,
- # K,
- # t0,
- # t1,
- # CL,
- # proba=self.proba_flashback / CL,
- # )
-
n = torch.arange(N, device=X.device)[:, None, None, None]
t = torch.arange(t0, t1, device=X.device)[None, None, :, None]
dv = torch.arange(DV, device=X.device)[None, None, None, :]
n = torch.arange(N, device=X.device)[:, None, None, None]
t = torch.arange(t0, t1, device=X.device)[None, None, :, None]
dv = torch.arange(DV, device=X.device)[None, None, None, :]