+ operators = F.one_hot(
+ torch.rand(nb, self.len_result, self.len_source).argmax(-1),
+ num_classes=self.len_source,
+ )
+ source1 = torch.rand(nb, 10).sort(dim=1).indices[:, : self.len_source]
+ # source1 = torch.randint(10, (nb, self.len_source))
+ marker1 = torch.full((nb, 1), 10)
+ result1 = operators.bmm(source1[:, :, None]).squeeze(-1)
+ marker2 = torch.full((nb, 1), 11)
+ source2 = torch.randint(10, (nb, self.len_source))
+ marker3 = torch.full((nb, 1), 12)
+ result2 = operators.bmm(source2[:, :, None]).squeeze(-1)
+
+ sequences = torch.cat(
+ (source1, marker1, result1, marker2, source2, marker3, result2), 1
+ )
+ ar_mask = (sequences == 12).long()
+ ar_mask = (ar_mask.cumsum(1) - ar_mask).clamp(max=1)
+ return sequences, ar_mask
+
+ def seq2str(self, seq):
+ return "".join("0123456789>|~"[x.item()] for x in seq)
+
+
+####################
+
+
+class ProblemAddition(Problem):
+ def __init__(self, nb_digits=10, zero_padded=False, inverted_result=False):
+ self.nb_digits = nb_digits
+ self.zero_padded = zero_padded
+ self.inverted_result = inverted_result
+ self.char2id = dict([(c, n) for n, c in enumerate("0123456789+=$")])
+ self.id2char = dict([(n, c) for c, n in self.char2id.items()])
+
+ def tensorize(self, strings):
+ len_max = max([len(x) for x in strings])
+ return torch.cat(
+ [
+ torch.tensor(
+ [
+ [self.char2id[c] for c in s + "$" * (len_max - len(s))]
+ for s in strings
+ ]
+ )
+ ],
+ 0,
+ )
+
+ def generate_sequences(self, nb):
+ sequences = []
+ for k in range(nb):
+ a, b = torch.randint(10**self.nb_digits, (2,))
+ c = a + b
+ a, b, c = str(a.item()), str(b.item()), str(c.item())
+ if self.zero_padded:
+ a = "0" * (self.nb_digits - len(a)) + a
+ b = "0" * (self.nb_digits - len(b)) + b
+ c = "0" * (self.nb_digits + 1 - len(c)) + c
+ if self.inverted_result:
+ c = c[::-1]
+ sequences.append(f"{a}+{b}={c}$")
+
+ sequences = self.tensorize(sequences)
+ ar_mask = (sequences == self.char2id["="]).long()
+ ar_mask = (ar_mask.cumsum(1) - ar_mask).clamp(max=1)
+ return sequences, ar_mask
+
+ def seq2str(self, seq):
+ return "".join(self.id2char[x.item()] for x in seq)
+
+
+# class ProblemUnion(Problem):
+# problems = [ProblemByheart()]
+# nb_common_codes = 100
+
+# def generate_sequences(nb_samples):
+# problem_indexes = torch.randint(len(problems), (nb_samples,))
+# nb_samples_per_problem = torch.one_hot(problem_indexes).sum(0)
+# print(f"{nb_samples_per_problem}")
+# all_seq = []
+# for nb, p in zip(nb_samples_per_problem, problems):
+# all_seq.append(p.generate_sequences(nb_samples_per_problem[nb]))
+# return all_seq
+
+# for strain, stest in zip(train_seq, test_seq):
+# s = torch.cat((strain, stest), 0)
+
+####################
+