- train_sequences = expr.generate_sequences(nb_train_samples)
- test_sequences = expr.generate_sequences(nb_test_samples)
- self.char2id = dict([ (c,n) for n,c in enumerate(set("".join(train_sequences + test_sequences))) ])
- self.id2char = dict([ (n,c) for n,c in self.char2id.items() ])
- len_max = max([len(x) for x in train_sequences + test_sequences])
- self.train_input = torch.cat([torch.tensor([char2id(c) for c in s + " "*(len_max-len(s))] for s in train_sequences)], 0)
- self.test_input = torch.cat([torch.tensor([char2id(c) for c in s + " "*(len_max-len(s))] for s in test_sequences)], 0)
+ train_sequences = expr.generate_sequences(
+ nb_train_samples,
+ nb_variables=nb_variables,
+ length=sequence_length,
+ # length=2 * sequence_length,
+ # randomize_length=True,
+ )
+ test_sequences = expr.generate_sequences(
+ nb_test_samples,
+ nb_variables=nb_variables,
+ length=sequence_length,
+ )
+ self.char2id = dict(
+ [
+ (c, n)
+ for n, c in enumerate(
+ set("#" + "".join(train_sequences + test_sequences))
+ )
+ ]
+ )
+ self.id2char = dict([(n, c) for c, n in self.char2id.items()])
+
+ self.filler, self.space = self.char2id["#"], self.char2id[" "]
+
+ len_max = max([len(x) for x in train_sequences])
+ self.train_input = torch.cat(
+ [
+ torch.tensor(
+ [
+ [self.char2id[c] for c in s + "#" * (len_max - len(s))]
+ for s in train_sequences
+ ]
+ )
+ ],
+ 0,
+ ).to(device)
+
+ len_max = max([len(x) for x in test_sequences])
+ self.test_input = torch.cat(
+ [
+ torch.tensor(
+ [
+ [self.char2id[c] for c in s + "#" * (len_max - len(s))]
+ for s in test_sequences
+ ]
+ )
+ ],
+ 0,
+ ).to(device)
+