From 93cea45f62046a3481d6c05ab2cfe70f6dbc93b3 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Fran=C3=A7ois=20Fleuret?= Date: Mon, 8 Jul 2024 18:00:45 +0300 Subject: [PATCH] Update. --- grids.py | 101 +++++++++++++++++++++++++++++++++++++----------- main.py | 7 ++++ problem.py | 74 ++++++++++++++++++++++++++++++++++- quiz_machine.py | 4 +- 4 files changed, 160 insertions(+), 26 deletions(-) diff --git a/grids.py b/grids.py index 9125810..a2e253e 100755 --- a/grids.py +++ b/grids.py @@ -74,6 +74,7 @@ class Grids(problem.Problem): predicted_prompts=None, predicted_answers=None, nrow=4, + margin=8, ): S = self.height * self.width As = prompts[:, 0 * (S + 1) : 0 * (S + 1) + S].view(-1, self.height, self.width) @@ -120,8 +121,6 @@ class Grids(problem.Problem): return y - margin = 8 - img_prompts = torch.cat( [ add_frame( @@ -195,10 +194,39 @@ class Grids(problem.Problem): def nb_token_values(self): return len(self.colors) + @torch.compile + def rec_coo_(self, nb_rec, min_height=3, min_width=3): + @torch.compile + def overlap(ia, ja, ib, jb): + return ( + ia[1] >= ib[0] and ia[0] <= ib[1] and ja[1] >= jb[0] and ja[0] <= jb[1] + ) + + if nb_rec == 3: + while True: + i = torch.randint(self.height + 1, (nb_rec, 2)).sort(dim=1).values + j = torch.randint(self.width + 1, (nb_rec, 2)).sort(dim=1).values + if ( + not ( + overlap(i[0], j[0], i[1], j[1]) + or overlap(i[0], j[0], i[2], j[2]) + or overlap(i[1], j[1], i[2], j[2]) + ) + and (i[:, 1] - i[:, 0]).min() >= min_height + and (j[:, 1] - j[:, 0]).min() >= min_width + ): + break + return ( + (i[0, 0], j[0, 0], i[0, 1], j[0, 1]), + (i[1, 0], j[1, 0], i[1, 1], j[1, 1]), + (i[2, 0], j[2, 0], i[2, 1], j[2, 1]), + ) + # That's quite a tensorial spaghetti mess to sample # non-overlapping rectangles quickly, but made the generation of # 100k samples go from 1h50 with a lame pure python code to 3min30s # with this one. + @torch.compile def rec_coo(self, nb_rec, min_height=3, min_width=3): nb_trials = 200 @@ -260,6 +288,7 @@ class Grids(problem.Problem): ) ] + @torch.compile def rec_coo_(self, x, n, min_height=3, min_width=3): collision = x.new(x.size()) while True: @@ -284,6 +313,7 @@ class Grids(problem.Problem): ###################################################################### + @torch.compile def task_replace_color(self, A, f_A, B, f_B): nb_rec = 3 c = torch.randperm(len(self.colors) - 1)[: nb_rec + 1] + 1 @@ -294,6 +324,7 @@ class Grids(problem.Problem): X[i1:i2, j1:j2] = c[n] f_X[i1:i2, j1:j2] = c[n if n > 0 else -1] + @torch.compile def task_translate(self, A, f_A, B, f_B): di, dj = torch.randint(3, (2,)) - 1 nb_rec = 3 @@ -318,6 +349,7 @@ class Grids(problem.Problem): else: f_X[i1:i2, j1:j2] = c[n] + @torch.compile def task_grow(self, A, f_A, B, f_B): di, dj = torch.randint(2, (2,)) * 2 - 1 nb_rec = 3 @@ -343,6 +375,7 @@ class Grids(problem.Problem): X[i1:i2, j1:j2] = c[n] f_X[i1:i2, j1:j2] = c[n] + @torch.compile def task_color_grow(self, A, f_A, B, f_B): di, dj = torch.randint(2, (2,)) * 2 - 1 nb_rec = 3 @@ -384,6 +417,7 @@ class Grids(problem.Problem): else: f_X[i1:i2, j : j + 1] = c[2 * n + 1] + @torch.compile def task_frame(self, A, f_A, B, f_B): nb_rec = 3 c = torch.randperm(len(self.colors) - 1)[: nb_rec + 1] + 1 @@ -396,6 +430,7 @@ class Grids(problem.Problem): if n == nb_rec - 1: f_X[i1 + 1 : i2 - 1, j1 + 1 : j2 - 1] = 0 + @torch.compile def task_detect(self, A, f_A, B, f_B): nb_rec = 3 c = torch.randperm(len(self.colors) - 1)[: nb_rec + 1] + 1 @@ -407,6 +442,7 @@ class Grids(problem.Problem): if n < nb_rec - 1: f_X[i1, j1] = c[-1] + @torch.compile def contact(self, X, i, j, q): nq, nq_diag = 0, 0 no = 0 @@ -442,8 +478,9 @@ class Grids(problem.Problem): return no, nq, nq_diag + @torch.compile def task_count(self, A, f_A, B, f_B): - N = torch.randint(4, (1,)) + 2 + N = (torch.randint(4, (1,)) + 2).item() c = torch.randperm(len(self.colors) - 1)[:N] + 1 for X, f_X in [(A, f_A), (B, f_B)]: @@ -465,6 +502,7 @@ class Grids(problem.Problem): for j in range(nb[n]): f_X[n, j] = c[n] + @torch.compile def task_trajectory(self, A, f_A, B, f_B): c = torch.randperm(len(self.colors) - 1)[:2] + 1 for X, f_X in [(A, f_A), (B, f_B)]: @@ -492,10 +530,12 @@ class Grids(problem.Problem): f_X[i + k * di, j + k * dj] = c[min(k, 1)] k += 1 + @torch.compile def task_bounce(self, A, f_A, B, f_B): c = torch.randperm(len(self.colors) - 1)[:3] + 1 for X, f_X in [(A, f_A), (B, f_B)]: + @torch.compile def free(i, j): return ( i >= 0 @@ -555,6 +595,7 @@ class Grids(problem.Problem): if l > 3: break + @torch.compile def task_scale(self, A, f_A, B, f_B): c = torch.randperm(len(self.colors) - 1)[:2] + 1 @@ -579,6 +620,7 @@ class Grids(problem.Problem): X[i, j] = c[1] f_X[0:2, 0:2] = c[1] + @torch.compile def task_symbols(self, A, f_A, B, f_B): nb_rec = 4 c = torch.randperm(len(self.colors) - 1)[: nb_rec + 1] + 1 @@ -614,6 +656,7 @@ class Grids(problem.Problem): f_X[i[0] : i[0] + delta, j[0] : j[0] + delta] = c[q] + @torch.compile def task_ortho(self, A, f_A, B, f_B): nb_rec = 3 di, dj = torch.randint(3, (2,)) - 1 @@ -668,6 +711,7 @@ class Grids(problem.Problem): ): break + @torch.compile def task_islands(self, A, f_A, B, f_B): pass @@ -704,7 +748,9 @@ class Grids(problem.Problem): f_Bs = answers return (Bs == f_Bs).long().min(dim=-1).values > 0 - def generate_prompts_and_answers(self, nb, tasks=None, device="cpu"): + def generate_prompts_and_answers( + self, nb, tasks=None, progress_bar=False, device="cpu" + ): if tasks is None: tasks = self.all_tasks() @@ -712,12 +758,17 @@ class Grids(problem.Problem): prompts = torch.zeros(nb, 3 * S + 2, dtype=torch.int64) answers = torch.zeros(nb, S, dtype=torch.int64) - for prompt, answer in tqdm.tqdm( - zip(prompts, answers), - dynamic_ncols=True, - desc="world generation", - total=prompts.size(0), - ): + bunch = zip(prompts, answers) + + if progress_bar: + bunch = tqdm.tqdm( + bunch, + dynamic_ncols=True, + desc="world generation", + total=prompts.size(0), + ) + + for prompt, answer in bunch: A = prompt[0 * (S + 1) : 0 * (S + 1) + S].view(self.height, self.width) f_A = prompt[1 * (S + 1) : 1 * (S + 1) + S].view(self.height, self.width) B = prompt[2 * (S + 1) : 2 * (S + 1) + S].view(self.height, self.width) @@ -753,24 +804,28 @@ class Grids(problem.Problem): if __name__ == "__main__": import time - nb = 48 - grids = Grids() - # for t in grids.all_tasks(): - for t in [grids.task_ortho]: - print(t.__name__) - prompts, answers = grids.generate_prompts_and_answers(nb, tasks=[t]) - grids.save_quizzes("/tmp", t.__name__, prompts[:nb], answers[:nb], nrow=4) + if False: + nb = 8 - exit(0) + for t in grids.all_tasks(): + # for t in [grids.task_ortho]: + print(t.__name__) + prompts, answers = grids.generate_prompts_and_answers(nb, tasks=[t]) + grids.save_quizzes("/tmp", t.__name__, prompts[:nb], answers[:nb], nrow=2) + + exit(0) - nb = 72 + nb = 500 - start_time = time.perf_counter() - prompts, answers = grids.generate_prompts_and_answers(nb) - delay = time.perf_counter() - start_time - print(f"{prompts.size(0)/delay:02f} seq/s") + for t in grids.all_tasks(): + start_time = time.perf_counter() + prompts, answers = grids.generate_prompts_and_answers(nb, tasks=[t]) + delay = time.perf_counter() - start_time + print(f"{t.__name__} {prompts.size(0)/delay:02f} seq/s") + + exit(0) m = torch.randint(2, (prompts.size(0),)) predicted_prompts = m * (torch.randint(2, (prompts.size(0),)) * 2 - 1) diff --git a/main.py b/main.py index 4ff50d7..9c3d7f1 100755 --- a/main.py +++ b/main.py @@ -12,8 +12,10 @@ from torch import nn from torch.nn import functional as F import ffutils + import mygpt import sky, grids, quiz_machine +from problem import MultiThreadProblem # world quizzes vs. culture quizzes @@ -76,6 +78,8 @@ parser.add_argument("--deterministic_synthesis", action="store_true", default=Fa parser.add_argument("--problem", type=str, default="grids") +parser.add_argument("--multi_thread_problem", action="store_true", default=False) + parser.add_argument("--nb_gpts", type=int, default=5) parser.add_argument("--min_to_validate", type=int, default=None) @@ -244,6 +248,9 @@ elif args.problem == "grids": else: raise ValueError +if args.multi_thread_problem: + problem = MultiThreadProblem(problem, args.nb_train_samples, chunk_size=1000) + quiz_machine = quiz_machine.QuizMachine( problem=problem, nb_train_samples=args.nb_train_samples, diff --git a/problem.py b/problem.py index 0bc83a1..7dd60dc 100755 --- a/problem.py +++ b/problem.py @@ -5,11 +5,16 @@ # Written by Francois Fleuret +import threading, queue, torch + class Problem: def nb_token_values(self): pass + def trivial_prompts_and_answers(self, prompts, answers): + pass + # returns two tensors nb x D and nb x D' def generate_prompts_and_answers(self, nb): pass @@ -21,7 +26,74 @@ class Problem: filename_prefix, prompts, answers, - predicted_prompt=None, + predicted_prompts=None, predicted_answers=None, ): pass + + +class MultiThreadProblem: + def __init__(self, problem, max_nb_cached_chunks, chunk_size): + self.problem = problem + self.chunk_size = chunk_size + self.queue = queue.Queue(maxsize=max_nb_cached_chunks) + threading.Thread(target=self.fill_cache, daemon=True).start() + self.rest = None + + def nb_token_values(self): + return self.problem.nb_token_values() + + def save_quizzes( + self, + result_dir, + filename_prefix, + prompts, + answers, + predicted_prompts=None, + predicted_answers=None, + ): + self.problem.save_quizzes( + result_dir, + filename_prefix, + prompts, + answers, + predicted_prompts=None, + predicted_answers=None, + ) + + def fill_cache(self): + while True: + prompts, answers = self.problem.generate_prompts_and_answers( + self.chunk_size + ) + + self.queue.put((prompts, answers), block=True) + + def trivial_prompts_and_answers(self, prompts, answers): + return self.problem.trivial_prompts_and_answers(prompts, answers) + + def generate_prompts_and_answers(self, nb): + if self.rest is not None: + prompts, answers = rest + else: + prompts, answers = [], [] + + self.rest = None + + n = sum([p.size(0) for p in prompts]) + + while n < nb: + p, s = self.queue.get(block=True) + prompts.append(p) + answers.append(s) + n += p.size(0) + + prompts, answers = torch.cat(prompts, dim=0), torch.cat(answers, dim=0) + + k = n - nb + + if k > 0: + rest = (prompts[-k:], answers[-k:]) + prompts, answers = prompts[:-k], answers[:-k] + + return prompts, answers diff --git a/quiz_machine.py b/quiz_machine.py index 9f4fe96..f0fb408 100755 --- a/quiz_machine.py +++ b/quiz_machine.py @@ -27,8 +27,8 @@ def one_batch_masked_inplace_autoregression( input, ar_mask, seq_logproba, - temperature=1.0, - deterministic_synthesis=False, + temperature, + deterministic_synthesis, ): to_generate = (ar_mask.sum(0) > 0).nonzero() -- 2.20.1