X-Git-Url: https://fleuret.org/cgi-bin/gitweb/gitweb.cgi?a=blobdiff_plain;f=main.py;h=b01ea0ae078f0e4a494b25f2971d64c2b358bfe0;hb=c3621f9a75cd4d79410d90a29dc9fdec401eaa2d;hp=bcba9ee0b2e9bedaa0cd1792cb4fe50c3afe3bbe;hpb=4e5fc31bbaf94c291f18c785b227e64bea1690cb;p=mygpt.git diff --git a/main.py b/main.py index bcba9ee..b01ea0a 100755 --- a/main.py +++ b/main.py @@ -160,8 +160,8 @@ class TaskPicoCLVR(Task): def tensorize(self, descr): token_descr = [ s.strip().split(' ') for s in descr ] l = max([ len(s) for s in token_descr ]) - token_descr = [ [ '' ] * (l - len(s)) + s for s in token_descr ] - #token_descr = [ s + [ '' ] * (l - len(s)) for s in token_descr ] + #token_descr = [ [ '' ] * (l - len(s)) + s for s in token_descr ] + token_descr = [ s + [ '' ] * (l - len(s)) for s in token_descr ] id_descr = [ [ self.token2id[u] for u in s ] for s in token_descr ] return torch.tensor(id_descr, device = self.device) @@ -188,6 +188,7 @@ class TaskPicoCLVR(Task): self.device = device nb = args.data_size if args.data_size > 0 else 250000 + log_string(f'generating {nb} samples (can take some time)') self.train_descr = generate_descr((nb * 4) // 5) self.test_descr = generate_descr((nb * 1) // 5)