class TaskPicoCLVR(Task):
+ # Make a tensor from a list of strings
def tensorize(self, descr):
- descr = [ s.strip().split(' ') for s in descr ]
- l = max([ len(s) for s in descr ])
- #descr = [ [ '<nul>' ] * (l - len(s)) + s for s in descr ]
- descr = [ s + [ '<nul>' ] * (l - len(s)) for s in descr ]
- t = [ [ self.token2id[u] for u in s ] for s in descr ]
- return torch.tensor(t, device = self.device)
+ token_descr = [ s.strip().split(' ') for s in descr ]
+ l = max([ len(s) for s in token_descr ])
+ #token_descr = [ [ '<nul>' ] * (l - len(s)) + s for s in token_descr ]
+ token_descr = [ s + [ '<nul>' ] * (l - len(s)) for s in token_descr ]
+ id_descr = [ [ self.token2id[u] for u in s ] for s in token_descr ]
+ return torch.tensor(id_descr, device = self.device)
def __init__(self, batch_size,
height, width, nb_colors = 5,
self.vocab.set_default_index(self.vocab[ '<unk>' ])
+ # makes a tensor from a list of list of tokens
def tensorize(self, s):
a = max(len(x) for x in s)
return torch.tensor([ self.vocab(x + [ '<nul>' ] * (a - len(x))) for x in s ])