("gray", [128, 128, 128]),
]
- def __init__(self, device=torch.device("cpu")):
+ def __init__(
+ self,
+ max_nb_cached_chunks=None,
+ chunk_size=None,
+ nb_threads=-1,
+ ):
self.colors = torch.tensor([c for _, c in self.named_colors])
self.height = 10
self.width = 10
- self.device = device
+ super().__init__(max_nb_cached_chunks, chunk_size, nb_threads)
######################################################################
c = c.long()[:, None]
c = (
(1 - ((c == 1).long() + (c == 0).long() + (c == -1).long()))
- * torch.tensor([64, 64, 64], device=c.device)
- + (c == 1).long() * torch.tensor([0, 255, 0], device=c.device)
- + (c == 0).long() * torch.tensor([255, 255, 255], device=c.device)
- + (c == -1).long() * torch.tensor([255, 0, 0], device=c.device)
+ * torch.tensor([64, 64, 64])
+ + (c == 1).long() * torch.tensor([0, 255, 0])
+ + (c == 0).long() * torch.tensor([255, 255, 255])
+ + (c == -1).long() * torch.tensor([255, 0, 0])
)
y[...] = c[:, :, None, None]
def nb_token_values(self):
return len(self.colors)
- # @torch.compile
- def rec_coo_(self, nb_rec, min_height=3, min_width=3):
- # @torch.compile
- def overlap(ia, ja, ib, jb):
- return (
- ia[1] >= ib[0] and ia[0] <= ib[1] and ja[1] >= jb[0] and ja[0] <= jb[1]
- )
-
- if nb_rec == 3:
- while True:
- i = torch.randint(self.height + 1, (nb_rec, 2)).sort(dim=1).values
- j = torch.randint(self.width + 1, (nb_rec, 2)).sort(dim=1).values
- if (
- not (
- overlap(i[0], j[0], i[1], j[1])
- or overlap(i[0], j[0], i[2], j[2])
- or overlap(i[1], j[1], i[2], j[2])
- )
- and (i[:, 1] - i[:, 0]).min() >= min_height
- and (j[:, 1] - j[:, 0]).min() >= min_width
- ):
- break
- return (
- (i[0, 0], j[0, 0], i[0, 1], j[0, 1]),
- (i[1, 0], j[1, 0], i[1, 1], j[1, 1]),
- (i[2, 0], j[2, 0], i[2, 1], j[2, 1]),
- )
-
- # That's quite a tensorial spaghetti mess to sample
- # non-overlapping rectangles quickly, but made the generation of
- # 100k samples go from 1h50 with a lame pure python code to 3min30s
- # with this one.
- # @torch.compile
def rec_coo(self, nb_rec, min_height=3, min_width=3):
- nb_trials = 200
-
+ N = 10
while True:
- v = (
- (
- torch.rand(nb_trials * nb_rec, self.height + 1, device=self.device)
- .sort(dim=-1)
- .indices
- < 2
+ i = torch.randint(self.height, (N, nb_rec, 2)).sort(dim=-1).values
+ j = torch.randint(self.width, (N, nb_rec, 2)).sort(dim=-1).values
+ if nb_rec == 2:
+ A_i1, A_i2, A_j1, A_j2 = i[:, 0, 0], i[:, 0, 1], j[:, 0, 0], j[:, 0, 1]
+ B_i1, B_i2, B_j1, B_j2 = i[:, 1, 0], i[:, 1, 1], j[:, 1, 0], j[:, 1, 1]
+ no_overlap = torch.logical_not(
+ (A_i1 > B_i2) & (A_i2 < B_i1) & (A_j1 > B_j1) & (A_j2 < B_j1)
)
- .long()
- .cumsum(dim=1)
- == 1
- ).long()
-
- h = (
- (
- torch.rand(nb_trials * nb_rec, self.width + 1, device=self.device)
- .sort(dim=-1)
- .indices
- < 2
+ i, j = i[no_overlap], j[no_overlap]
+ elif nb_rec == 3:
+ A_i1, A_i2, A_j1, A_j2 = i[:, 0, 0], i[:, 0, 1], j[:, 0, 0], j[:, 0, 1]
+ B_i1, B_i2, B_j1, B_j2 = i[:, 1, 0], i[:, 1, 1], j[:, 1, 0], j[:, 1, 1]
+ C_i1, C_i2, C_j1, C_j2 = i[:, 2, 0], i[:, 2, 1], j[:, 2, 0], j[:, 2, 1]
+ no_overlap = (
+ torch.logical_not(
+ (A_i1 > B_i2) & (A_i2 < B_i1) & (A_j1 > B_j1) & (A_j2 < B_j1)
+ )
+ & torch.logical_not(
+ (A_i1 > C_i2) & (A_i2 < C_i1) & (A_j1 > C_j1) & (A_j2 < C_j1)
+ )
+ & torch.logical_not(
+ (B_i1 > C_i2) & (B_i2 < C_i1) & (B_j1 > C_j1) & (B_j2 < C_j1)
+ )
)
- .long()
- .cumsum(dim=1)
- == 1
- ).long()
-
- i = torch.logical_and(
- v.sum(dim=-1) >= min_height, h.sum(dim=-1) >= min_width
- )
-
- v, h = v[i], h[i]
- v = v[: v.size(0) - v.size(0) % nb_rec]
- h = h[: h.size(0) - h.size(0) % nb_rec]
- v = v.reshape(v.size(0) // nb_rec, nb_rec, -1)
- h = h.reshape(h.size(0) // nb_rec, nb_rec, -1)
-
- r = v[:, :, :, None] * h[:, :, None, :]
-
- valid = r.sum(dim=1).flatten(1).max(dim=-1).values == 1
-
- v = v[valid]
- h = h[valid]
+ i, j = (i[no_overlap], j[no_overlap])
+ else:
+ assert nb_rec == 1
- if v.size(0) > 0:
+ if i.size(0) > 1:
break
- av = torch.arange(v.size(2), device=self.device)[None, :]
- ah = torch.arange(h.size(2), device=self.device)[None, :]
-
- return [
- (i1.item(), j1.item(), i2.item() + 1, j2.item() + 1)
- for i1, j1, i2, j2 in zip(
- v.size(2) - (v[0] * (v.size(2) - av)).max(dim=-1).values,
- h.size(2) - (h[0] * (h.size(2) - ah)).max(dim=-1).values,
- (v[0] * av).max(dim=-1).values,
- (h[0] * ah).max(dim=-1).values,
- )
- ]
-
- # @torch.compile
- def rec_coo_(self, x, n, min_height=3, min_width=3):
- collision = x.new(x.size())
- while True:
- collision[...] = 0
- result = []
- for _ in range(n):
- while True:
- i1, i2 = torch.randint(x.size(0), (2,))
- if i1 + min_height <= i2:
- break
- while True:
- j1, j2 = torch.randint(x.size(1), (2,))
- if j1 + min_width <= j2:
- break
- collision[i1:i2, j1:j2] += 1
- if collision.max() > 1:
- break
- result.append((i1, j1, i2, j2))
- if collision.max() == 1:
- break
- return result
+ return [(i[0, k, 0], j[0, k, 0], i[0, k, 1], j[0, k, 1]) for k in range(nb_rec)]
######################################################################
return no, nq, nq_diag
- # @torch.compile
def task_count(self, A, f_A, B, f_B):
N = (torch.randint(4, (1,)) + 2).item()
c = torch.randperm(len(self.colors) - 1)[:N] + 1
for X, f_X in [(A, f_A), (B, f_B)]:
+ l_q = torch.randperm(self.height * self.width)[
+ : self.height * self.width // 20
+ ]
+ l_d = torch.randint(N, l_q.size())
nb = torch.zeros(N, dtype=torch.int64)
- q = torch.randint(N, (self.height * self.width,))
- k = torch.randperm(self.height * self.width)
- for p in range(self.height * self.width):
- i, j = k[p] % self.height, k[p] // self.height
- no, nq, nq_diag = self.contact(X, i, j, c[q[p]])
- if no == 0 and nq_diag == 0:
- if nq == 0:
- if nb[q[p]] < self.width:
- X[i, j] = c[q[p]]
- nb[q[p]] += 1
- if nq == 1:
- X[i, j] = c[q[p]]
-
- for n in range(N):
- for j in range(nb[n]):
- f_X[n, j] = c[n]
+
+ for q, e in zip(l_q, l_d):
+ d = c[e]
+ i, j = q % self.height, q // self.height
+ if (
+ nb[e] < self.width
+ and X[max(0, i - 1) : i + 2, max(0, j - 1) : j + 2] == 0
+ ).all():
+ X[i, j] = d
+ nb[e] += 1
+
+ l_q = torch.randperm((self.height - 2) * (self.width - 2))[
+ : self.height * self.width // 2
+ ]
+ l_d = torch.randint(N, l_q.size())
+ for q, e in zip(l_q, l_d):
+ d = c[e]
+ i, j = q % (self.height - 2) + 1, q // (self.height - 2) + 1
+ a1, a2, a3 = X[i - 1, j - 1 : j + 2]
+ a8, a4 = X[i, j - 1], X[i, j + 1]
+ a7, a6, a5 = X[i + 1, j - 1 : j + 2]
+ if (
+ X[i, j] == 0
+ and nb[e] < self.width
+ and (a2 == 0 or a2 == d)
+ and (a4 == 0 or a4 == d)
+ and (a6 == 0 or a6 == d)
+ and (a8 == 0 or a8 == d)
+ and (a1 == 0 or a2 == d or a8 == d)
+ and (a3 == 0 or a4 == d or a2 == d)
+ and (a5 == 0 or a6 == d or a4 == d)
+ and (a7 == 0 or a8 == d or a6 == d)
+ ):
+ o = (
+ (a2 != 0).long()
+ + (a4 != 0).long()
+ + (a6 != 0).long()
+ + (a8 != 0).long()
+ )
+ if o <= 1:
+ X[i, j] = d
+ nb[e] += 1 - o
+
+ for e in range(N):
+ for j in range(nb[e]):
+ f_X[e, j] = c[e]
# @torch.compile
def task_trajectory(self, A, f_A, B, f_B):
f_Bs = answers
return (Bs == f_Bs).long().min(dim=-1).values > 0
- def generate_prompts_and_answers(
- self, nb, tasks=None, progress_bar=False, device="cpu"
- ):
+ def generate_prompts_and_answers_(self, nb, tasks=None, progress_bar=False):
if tasks is None:
tasks = self.all_tasks()
if __name__ == "__main__":
import time
+ # grids = Grids(max_nb_cached_chunks=5, chunk_size=100, nb_threads=4)
grids = Grids()
# nb = 1000
# print(f"{prompts.size(0)/delay:02f} seq/s")
# exit(0)
- if True:
- nb = 72
+ # if True:
+ # nb = 72
- for t in grids.all_tasks():
- # for t in [grids.task_ortho]:
- print(t.__name__)
- prompts, answers = grids.generate_prompts_and_answers(nb, tasks=[t])
- grids.save_quizzes("/tmp", t.__name__, prompts[:nb], answers[:nb], nrow=4)
+ # for t in grids.all_tasks():
+ # for t in [grids.task_count]:
+ # print(t.__name__)
+ # prompts, answers = grids.generate_prompts_and_answers_(nb, tasks=[t])
+ # grids.save_quizzes("/tmp", t.__name__, prompts[:nb], answers[:nb], nrow=4)
- exit(0)
+ # exit(0)
- nb = 500
+ nb = 1000
for t in grids.all_tasks():
start_time = time.perf_counter()
- prompts, answers = grids.generate_prompts_and_answers(nb, tasks=[t])
+ prompts, answers = grids.generate_prompts_and_answers_(nb, tasks=[t])
delay = time.perf_counter() - start_time
print(f"{t.__name__} {prompts.size(0)/delay:02f} seq/s")
import mygpt
import sky, grids, quiz_machine
-from problem import MultiThreadProblem
# world quizzes vs. culture quizzes
parser.add_argument("--problem", type=str, default="grids")
-parser.add_argument("--multi_thread_problem", action="store_true", default=False)
+parser.add_argument("--nb_threads", type=int, default=-1)
parser.add_argument("--nb_gpts", type=int, default=5)
default_args = {
"model": "37M",
- "batch_size": 100,
+ "batch_size": 25,
"nb_train_samples": 100000,
"nb_test_samples": 10000,
}
nb_birds=args.sky_nb_birds,
nb_iterations=args.sky_nb_iterations,
speed=args.sky_speed,
+ max_nb_cached_chunks=args.nb_train_samples // 100,
+ chunk_size=100,
+ nb_threads=args.nb_threads,
)
back_accuracy = False
elif args.problem == "grids":
- problem = grids.Grids(device=device)
+ problem = grids.Grids(
+ device=device,
+ max_nb_cached_chunks=args.nb_train_samples // 100,
+ chunk_size=100,
+ nb_threads=args.nb_threads,
+ )
back_accuracy = True
else:
raise ValueError
-if args.multi_thread_problem:
- problem = MultiThreadProblem(problem, args.nb_train_samples, chunk_size=1000)
-
quiz_machine = quiz_machine.QuizMachine(
problem=problem,
nb_train_samples=args.nb_train_samples,
######################################################################
+def standard_validity(logproba):
+ l = logproba.sort(dim=-1).values
+ return logical_and(l[0] < math.log(0.5), l[1] > math.log(0.95))
+
+
def valid_c_quizzes(recorded, criteria):
- result = [q[criteria(c)] for q, c in recorded]
+ result = [q[criteria(lp)] for q, lp in recorded]
return torch.cat(result, dim=0) if len(result) > 0 else torch.tensor([])
quiz_machine,
nb_for_train=1000,
nb_for_test=100,
+):
+ quizzes_and_logproba_records = []
+
+ nb_to_create = nb_for_train + nb_for_test
+
+ # ------------------------------------------------------------
+
+ file_name = os.path.join(args.result_dir, f"culture_c_quiz_{n_epoch:04d}_logp.dat")
+
+ with open(file_name, "w") as logp_file:
+ while (
+ valid_c_quizzes(quizzes_and_logproba_records, standard_validity).size(0)
+ < nb_to_create
+ ):
+ # Select a model at random to generate the new quizzes
+
+ model_for_generation = models[torch.randint(len(models), (1,))]
+
+ c_quizzes = quiz_machine.generate_quizzes(
+ nb_to_create,
+ model_for_generation=model_for_generation,
+ temperature=args.generation_temperature,
+ )
+
+ c_quizzes = c_quizzes[quiz_machine.non_trivial(c_quizzes)]
+
+ if c_quizzes.size(0) > 0:
+ logproba = c_quizzes.new(c_quizzes.size(0), len(models))
+ for q, l in zip(
+ c_quizzes.split(args.batch_size), logits.split(args.batch_size)
+ ):
+ for model in models:
+ l[model.id] = F.cross_entropy(model(q))
+
+ for l in logproba:
+ s = " ".join([str(x.item()) for x in l])
+ logp_file.write(s + "\n")
+
+ quizzes_and_logproba_records.append((c_quizzes, logproba))
+
+ nb_validated = valid_c_quizzes(
+ quizzes_and_logproba_records, standard_validity
+ ).size(0)
+
+ log_string(
+ f"keep c_quizzes model {model_for_generation.id} nb_accumulated {nb_validated} / {nb_to_create}"
+ )
+
+ # store the new c_quizzes which have been validated
+
+ new_c_quizzes = valid_c_quizzes(quizzes_and_logproba_records, standard_validity)
+
+ quiz_machine.reverse_random_half_in_place(new_c_quizzes)
+
+ quiz_machine.store_c_quizzes(new_c_quizzes[:nb_for_train], for_train=True)
+ quiz_machine.store_c_quizzes(new_c_quizzes[nb_for_train:], for_train=False)
+
+ # save a bunch of images to investigate what quizzes with a
+ # certain nb of correct predictions look like
+
+ q = new_c_quizzes[:72]
+
+ if q.size(0) > 0:
+ quiz_machine.save_quizzes(args.result_dir, f"culture_c_quiz_{n_epoch:04d}", q)
+
+
+######################################################################
+
+
+def create_c_quizzes_(
+ models,
+ quiz_machine,
+ nb_for_train=1000,
+ nb_for_test=100,
):
quizzes_and_nb_correct_records = []
temperature=args.generation_temperature,
)
+ # if args.prediction_correctness:
+
+ # else:
+ # logproba = quiz_machine.new(quiz_machine.size(0), len(models))
+ # for q,l in zip(quizzes.split(args.batch_size), logits.split(args.batch_size)):
+ # for model in models:
+ # l[...] = F.cross_entropy(model(q))
+
c_quizzes = c_quizzes[quiz_machine.non_trivial(c_quizzes)]
if c_quizzes.size(0) > 0: