projects
/
mygpt.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Update.
[mygpt.git]
/
mygpt.py
diff --git
a/mygpt.py
b/mygpt.py
index
d6879dc
..
9da2e68
100755
(executable)
--- a/
mygpt.py
+++ b/
mygpt.py
@@
-14,7
+14,7
@@
from torch.nn import functional as F
##############################
##############################
-class Residual(nn.Module):
+class
With
Residual(nn.Module):
def __init__(self, *f):
super().__init__()
self.f = f[0] if len(f) == 1 else nn.Sequential(*f)
def __init__(self, *f):
super().__init__()
self.f = f[0] if len(f) == 1 else nn.Sequential(*f)
@@
-103,7
+103,7
@@
class MyGPT(nn.Module):
for _ in range(nb_blocks):
trunk_blocks += [
for _ in range(nb_blocks):
trunk_blocks += [
- Residual(
+
With
Residual(
nn.LayerNorm((dim_model,)),
QKVAttention(
dim_in = dim_model,
nn.LayerNorm((dim_model,)),
QKVAttention(
dim_in = dim_model,
@@
-113,7
+113,7
@@
class MyGPT(nn.Module):
causal = True, attention_dropout = dropout
),
),
causal = True, attention_dropout = dropout
),
),
- Residual(
+
With
Residual(
nn.LayerNorm((dim_model,)),
nn.Linear(in_features = dim_model, out_features = dim_hidden),
nn.ReLU(),
nn.LayerNorm((dim_model,)),
nn.Linear(in_features = dim_model, out_features = dim_hidden),
nn.ReLU(),