projects
/
mygpt.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
38c69cc
)
OCDC
author
Francois Fleuret
<francois@fleuret.org>
Fri, 29 Jul 2022 08:07:59 +0000
(10:07 +0200)
committer
Francois Fleuret
<francois@fleuret.org>
Fri, 29 Jul 2022 08:07:59 +0000
(10:07 +0200)
mygpt.py
patch
|
blob
|
history
diff --git
a/mygpt.py
b/mygpt.py
index
9da2e68
..
954f4f0
100755
(executable)
--- a/
mygpt.py
+++ b/
mygpt.py
@@
-24,14
+24,12
@@
class WithResidual(nn.Module):
##############################
##############################
-class PositionalEncoding(nn.Module):
+class
Add
PositionalEncoding(nn.Module):
def __init__(self, len_max):
super().__init__()
self.len_max = len_max
def __init__(self, len_max):
super().__init__()
self.len_max = len_max
- # From Vaswani et al 2018
- # PE_{t,2i} = sin(t/(L^{2i/D}))
- # PE_{t,2i+1} = cos(t/(L^{2i/D}))
+ # [Vaswani et al 2018] PE_{t,2i} = sin(t/(L^{2i/D})), PE_{t,2i+1} = cos(t/(L^{2i/D}))
def forward(self, x):
t = torch.arange(x.size(1), dtype = x.dtype, device = x.device)[:, None]
j = torch.arange(x.size(2), dtype = x.dtype, device = x.device)[None, :]
def forward(self, x):
t = torch.arange(x.size(1), dtype = x.dtype, device = x.device)[:, None]
j = torch.arange(x.size(2), dtype = x.dtype, device = x.device)[None, :]
@@
-96,7
+94,7
@@
class MyGPT(nn.Module):
self.embedding = nn.Sequential(
nn.Embedding(vocabulary_size, dim_model),
nn.Dropout(dropout),
self.embedding = nn.Sequential(
nn.Embedding(vocabulary_size, dim_model),
nn.Dropout(dropout),
- PositionalEncoding(len_max),
+
Add
PositionalEncoding(len_max),
)
trunk_blocks = [ ]
)
trunk_blocks = [ ]