nb_train_samples,
nb_variables=nb_variables,
length=sequence_length,
- # length=2 * sequence_length,
- # randomize_length=True,
)
+
test_sequences = expr.generate_sequences(
nb_test_samples,
nb_variables=nb_variables,
for batch in tqdm.tqdm(
input.split(self.batch_size), dynamic_ncols=True, desc=desc
):
- if split == "train":
- last = (batch != self.filler).max(0).values.nonzero().max() + 3
- batch = batch[:, :last]
+ last = (batch != self.filler).max(0).values.nonzero().max() + 3
+ batch = batch[:, :last]
yield batch
def vocabulary_size(self):
def compute_nb_correct(input):
result = input.clone()
- ar_mask = (result == self.space).long().cumsum(dim=1).clamp(max=1)
+ s = (result == self.space).long()
+ ar_mask = (s.cumsum(dim=1) - s).clamp(min=0, max=1)
result = (1 - ar_mask) * result + ar_mask * self.filler
masked_inplace_autoregression(
model,
test_nb_correct,
test_nb_delta,
test_nb_missed,
- ) = compute_nb_correct(self.test_input[:1000])
+ ) = compute_nb_correct(self.test_input[:10000])
logger(
f"accuracy_test {n_epoch} nb_total {test_nb_total} nb_correct {test_nb_correct} accuracy {(100.0*test_nb_correct)/test_nb_total:.02f}%"
input = self.tensorize(sequences)
result = input.clone()
- ar_mask = (result == self.space).long().cumsum(dim=1).clamp(max=1)
+ s = (result == self.space).long()
+ ar_mask = (s.cumsum(dim=1) - s).clamp(min=0, max=1)
result = (1 - ar_mask) * result + ar_mask * self.filler
- # for n in range(result.size(0)):
- # logger(f"test_before {self.seq2str(result[n])}")
+ for n in range(result.size(0)):
+ logger(f"test_before {self.seq2str(result[n])}")
masked_inplace_autoregression(
model,