- # Build the tokenizer
- tokens = set()
- for d in [self.train_descr, self.test_descr]:
- for s in d:
- for t in s.strip().split(" "):
- tokens.add(t)
- # make this set a sorted list to get the same tensors given
- # the same descr
- tokens = list(tokens)
- tokens.sort()
- tokens = ["#"] + tokens
- self.token2id = dict([(t, n) for n, t in enumerate(tokens)])
- self.id2token = dict([(n, t) for n, t in enumerate(tokens)])
- self.t_nul = self.token2id["#"]
- self.t_true = self.token2id["true"]
- self.t_false = self.token2id["false"]
+ self.train_input = seq[:nb_train_samples]
+ self.train_q_test_set = q_test_set[:nb_train_samples]
+ self.test_input = seq[nb_train_samples:]
+ self.test_q_test_set = q_test_set[nb_train_samples:]
+ self.ref_test_errors = test_error