descr = [ s.strip().split(' ') for s in descr ]
l = max([ len(s) for s in descr ])
+ #descr = [ [ '<unk>' ] * (l - len(s)) + s for s in descr ]
descr = [ s + [ '<unk>' ] * (l - len(s)) for s in descr ]
return descr
self.token2id = dict([ (t, n) for n, t in enumerate(tokens) ])
self.id2token = dict([ (n, t) for n, t in enumerate(tokens) ])
+ # Tokenize the train and test sets
t = [ [ self.token2id[u] for u in s ] for s in self.train_descr ]
self.train_input = torch.tensor(t, device = self.device)
t = [ [ self.token2id[u] for u in s ] for s in self.test_descr ]
)
log_string(f'wrote {image_name}')
- nb_missing = sum( [
- x[2] for x in picoclvr.nb_missing_properties(
- descr,
- height = self.height, width = self.width
- )
- ] )
+ np = picoclvr.nb_properties(
+ descr,
+ height = self.height, width = self.width
+ )
+
+ nb_requested_properties, _, nb_missing_properties = zip(*np)
- log_string(f'nb_missing {nb_missing / len(descr):.02f}')
+ log_string(f'nb_requested_properties {sum(nb_requested_properties) / len(descr):.02f} nb_missing_properties {sum(nb_missing_properties) / len(descr):.02f}')
######################################################################