t_generated = [ ]
for j in range(nb_tokens):
- t = [ [ self.token2id[u] for u in t_primer + t_generated ] + [ 0 ] ]
+ t = [ [ self.token2id[u] for u in t_primer + t_generated ] ]
input = torch.tensor(t, device = self.device)
+ input = F.pad(input, (0, 1)) # Add the next token, the one to predict
output = model(input)
logits = output[0, -1]
if args.synthesis_sampling:
for j in range(nb_tokens):
input = self.tensorize([ t_primer + t_generated ]).to(self.device)
+ input = F.pad(input, (0, 1)) # Add the next token, the one to predict
output = model(input)
logits = output[0, -1]
if args.synthesis_sampling:
self.readout = nn.Linear(in_features = dim_model, out_features = vocabulary_size)
def forward(self, x):
- x = torch.cat((x.new_zeros(x.size(0), 1), x), 1)
+ x = F.pad(x, (1, 0))
x = self.embedding(x)
x = self.trunk(x)
x = self.readout(x)