'azure', 'snow', 'silver', 'gainsboro', 'white_smoke',
]
+color_id = dict( [ (n, k) for k, n in enumerate(color_names) ] )
color_tokens = dict( [ (n, c) for n, c in zip(color_names, colors) ] )
######################################################################
######################################################################
-def generate(nb, height = 6, width = 8,
- max_nb_squares = 5, max_nb_statements = 10,
- many_colors = False):
+def generate(nb, height, width,
+ max_nb_squares = 5, max_nb_properties = 10,
+ nb_colors = 5):
- nb_colors = len(color_tokens) - 1 if many_colors else max_nb_squares
+ assert nb_colors >= max_nb_squares and nb_colors <= len(color_tokens) - 1
descr = [ ]
img = [ 0 ] * height * width
for k in range(nb_squares): img[square_position[k]] = square_c[k]
- # generates all the true relations
+ # generates all the true properties
s = all_properties(height, width, nb_squares, square_i, square_j, square_c)
- # pick at most max_nb_statements at random
+ # pick at most max_nb_properties at random
- nb_statements = torch.randint(max_nb_statements, (1,)) + 1
- s = ' <sep> '.join([ s[k] for k in torch.randperm(len(s))[:nb_statements] ] )
+ nb_properties = torch.randint(max_nb_properties, (1,)) + 1
+ s = ' <sep> '.join([ s[k] for k in torch.randperm(len(s))[:nb_properties] ] )
s += ' <img> ' + ' '.join([ f'{color_names[n]}' for n in img ])
descr += [ s ]
######################################################################
-def descr2img(descr, height = 6, width = 8):
+def descr2img(descr, height, width):
+
+ if type(descr) == list:
+ return torch.cat([ descr2img(d, height, width) for d in descr ], 0)
def token2color(t):
try:
except KeyError:
return [ 128, 128, 128 ]
- def img_descr(x):
- u = x.split('<img>', 1)
- return u[1] if len(u) > 1 else ''
-
- img = torch.full((len(descr), 3, height, width), 255)
- d = [ img_descr(x) for x in descr ]
- d = [ u.strip().split(' ')[:height * width] for u in d ]
- d = [ u + [ '<unk>' ] * (height * width - len(u)) for u in d ]
- d = [ [ token2color(t) for t in u ] for u in d ]
- img = torch.tensor(d).permute(0, 2, 1)
- img = img.reshape(img.size(0), 3, height, width)
+ d = descr.split('<img>', 1)
+ d = d[-1] if len(d) > 1 else ''
+ d = d.strip().split(' ')[:height * width]
+ d = d + [ '<unk>' ] * (height * width - len(d))
+ d = [ token2color(t) for t in d ]
+ img = torch.tensor(d).permute(1, 0)
+ img = img.reshape(1, 3, height, width)
return img
######################################################################
+def descr2properties(descr, height, width):
+
+ if type(descr) == list:
+ return [ descr2properties(d, height, width) for d in descr ]
+
+ d = descr.split('<img>', 1)
+ d = d[-1] if len(d) > 1 else ''
+ d = d.strip().split(' ')[:height * width]
+
+ seen = {}
+ if len(d) != height * width: return []
+ for k, x in enumerate(d):
+ if x != color_names[0]:
+ if x in color_tokens:
+ if x in seen: return []
+ else:
+ return []
+ seen[x] = (color_id[x], k // width, k % width)
+
+ square_c = torch.tensor( [ x[0] for x in seen.values() ] )
+ square_i = torch.tensor( [ x[1] for x in seen.values() ] )
+ square_j = torch.tensor( [ x[2] for x in seen.values() ] )
+
+ s = all_properties(height, width, len(seen), square_i, square_j, square_c)
+
+ return s
+
+######################################################################
+
+def nb_missing_properties(descr, height, width):
+ if type(descr) == list:
+ return [ nb_missing_properties(d, height, width) for d in descr ]
+
+ d = descr.split('<img>', 1)
+ if len(d) == 0: return 0
+ d = d[0].strip().split('<sep>')
+ d = [ x.strip() for x in d ]
+
+ requested_properties = set(d)
+ all_properties = set(descr2properties(descr, height, width))
+ missing_properties = requested_properties - all_properties
+
+ return (len(requested_properties), len(all_properties), len(missing_properties))
+
+######################################################################
+
if __name__ == '__main__':
descr = generate(nb = 5)
+ #print(descr2properties(descr))
+ print(nb_missing_properties(descr))
+
with open('picoclvr_example.txt', 'w') as f:
for d in descr:
f.write(f'{d}\n\n')
import time
start_time = time.perf_counter()
- descr = generate(10000)
+ descr = generate(nb = 1000)
end_time = time.perf_counter()
print(f'{len(descr) / (end_time - start_time):.02f} samples per second')