######################################################################
+
+######################################################################
+
+2024 Jan 07 21:38:11 (from mygpt.py)
+
+ # insert_flash_back(self.rec_V,V,self.rec_K,K,t0,t1,CL,proba=self.proba_flashback / CL,)
+
self.rec_K[:, :, t0:t1] = next_K.flatten(2, 3)
if self.training and self.proba_flashback > 0.0:
- # insert_flash_back(self.rec_V,V,self.rec_K,K,t0,t1,CL,proba=self.proba_flashback / CL,)
-
# This piece of code makes the assumption that there is
# nothing informative before t0, otherwise we'd have to
# implement a cache for V and K too. This should not be