X-Git-Url: https://fleuret.org/cgi-bin/gitweb/gitweb.cgi?a=blobdiff_plain;f=mygpt.py;fp=mygpt.py;h=633ad642c19a3045064ef858c0ee494a7c733425;hb=6e87fe0cb8bd8a0042bbf7b2ede9d8ed0372fb6b;hp=9d3abb62cc8b6d95ce8be5b291d1c9e36e7f100d;hpb=cebc20b3608a41bfd27b2ab9d950c082f9b7ea89;p=mygptrnn.git diff --git a/mygpt.py b/mygpt.py index 9d3abb6..633ad64 100755 --- a/mygpt.py +++ b/mygpt.py @@ -569,17 +569,20 @@ class Caterpillar(nn.Module): # Roll the gating indexes warnings.warn("rotating barrel", RuntimeWarning) + + # print(f"SANITY2 {N=} {H=} {R=} {t0=} {t1=} {G.size()=}") + n_barrel = torch.arange(N, device=G.device)[:, None, None, None] h_barrel = torch.arange(H, device=G.device)[None, :, None, None] r_barrel = torch.arange(R, device=G.device)[None, None, :, None] t_barrel = torch.arange(t1 - t0, device=G.device)[None, None, None, :] - r_barrel = (r_barrel + t_barrel + t0) % R - - # print(f"({N}, {H}, {R}, {t1-t0}) {G.size()=}") + r_barrel = (r_barrel + (t_barrel + t0) // L) % R + # GG = G.gather(dim=2,index=r_barrel) G = G[n_barrel, h_barrel, r_barrel, t_barrel] - # print(G.sum()) + # print("SANITY", (GG-G).abs()) + # exit(0) ###################################################################### # The "flashbacks"