From: Francois Fleuret Date: Wed, 27 Jul 2022 13:58:34 +0000 (+0200) Subject: Cosmetics. X-Git-Url: https://fleuret.org/cgi-bin/gitweb/gitweb.cgi?a=commitdiff_plain;h=7dbeac21672006bcb2d4e316d6d83c40c87d3751;p=mygpt.git Cosmetics. --- diff --git a/main.py b/main.py index b2adf98..76aeebd 100755 --- a/main.py +++ b/main.py @@ -112,7 +112,7 @@ for n in vars(args): def autoregression( model, batch_size, - nb_samples, nb_tokens_to_generate, starting_input = None, + nb_samples, nb_tokens_to_generate, primer = None, device = torch.device('cpu') ): results = torch.zeros( @@ -120,11 +120,11 @@ def autoregression( dtype = torch.int64, device = device ) - if starting_input is None: + if primer is None: first = 0 else: - first = starting_input.size(1) - results = torch.cat((starting_input, results), 1) + first = primer.size(1) + results = torch.cat((primer, results), 1) for input in results.split(batch_size): for s in tqdm.tqdm(range(first, input.size(1)), desc = 'synth'): @@ -157,6 +157,10 @@ import picoclvr class TaskPicoCLVR(Task): + def descr2tensor(self, descr): + t = [ [ self.token2id[u] for u in s ] for s in descr ] + return torch.tensor(t, device = self.device) + def __init__(self, batch_size, height, width, nb_colors = 5, device = torch.device('cpu')): @@ -193,10 +197,8 @@ class TaskPicoCLVR(Task): self.id2token = dict([ (n, t) for n, t in enumerate(tokens) ]) # Tokenize the train and test sets - t = [ [ self.token2id[u] for u in s ] for s in self.train_descr ] - self.train_input = torch.tensor(t, device = self.device) - t = [ [ self.token2id[u] for u in s ] for s in self.test_descr ] - self.test_input = torch.tensor(t, device = self.device) + self.train_input = descr2tensor(self.train_descr) + self.test_input = descr2tensor(self.test_descr) def batches(self, split = 'train'): assert split in { 'train', 'test' } @@ -210,32 +212,21 @@ class TaskPicoCLVR(Task): def vocabulary_size(self): return len(self.token2id) - def generate(self, primer, model, nb_tokens): - t_primer = primer.strip().split(' ') - t_generated = [ ] - - for j in range(nb_tokens): - t = [ [ self.token2id[u] for u in t_primer + t_generated ] ] - input = torch.tensor(t, device = self.device) - input = F.pad(input, (0, 1)) # Add the next token, the one to predict - output = model(input) - logits = output[0, -1] - if args.synthesis_sampling: - dist = torch.distributions.categorical.Categorical(logits = logits) - t_next = dist.sample() - else: - t_next = logits.argmax() - t_generated.append(self.id2token[t_next.item()]) - - return ' '.join(t_primer + t_generated) + def generate(self, descr_primer, model, nb_tokens): + results = autoregression( + model, self.batch_size, + 1, nb_tokens, primer = descr2tensor(descr_primer), + device = self.device + ) + return ' '.join([ self.id2token[t.item()] for t in results.flatten() ]) def produce_results(self, n_epoch, model, nb_tokens = None): if nb_tokens is None: nb_tokens = self.height * self.width + 3 - descr = [ ] + result_descr = [ ] nb_per_primer = 8 - for primer in [ + for descr_primer in [ 'red above green green top blue right of red ', 'there is red there is yellow there is blue ', 'red below yellow yellow below green green below blue red right yellow left green right blue left ', @@ -243,9 +234,10 @@ class TaskPicoCLVR(Task): ]: for k in range(nb_per_primer): - descr.append(self.generate(primer, model, nb_tokens)) + result_descr.append(self.generate(descr_primer, model, nb_tokens)) - img = [ picoclvr.descr2img(d, height = self.height, width = self.width) for d in descr ] + img = [ picoclvr.descr2img(d, height = self.height, width = self.width) + for d in result_descr ] img = torch.cat(img, 0) image_name = f'result_picoclvr_{n_epoch:04d}.png' torchvision.utils.save_image( @@ -255,13 +247,13 @@ class TaskPicoCLVR(Task): log_string(f'wrote {image_name}') np = picoclvr.nb_properties( - descr, + result_descr, height = self.height, width = self.width ) nb_requested_properties, _, nb_missing_properties = zip(*np) - log_string(f'nb_requested_properties {sum(nb_requested_properties) / len(descr):.02f} nb_missing_properties {sum(nb_missing_properties) / len(descr):.02f}') + log_string(f'nb_requested_properties {sum(nb_requested_properties) / len(result_descr):.02f} nb_missing_properties {sum(nb_missing_properties) / len(result_descr):.02f}') ###################################################################### @@ -470,9 +462,9 @@ token_count = 0 for input in task.batches(split = 'train'): token_count += F.one_hot(input, num_classes = task.vocabulary_size()).sum((0, 1)) token_probas = token_count / token_count.sum() -h = -torch.xlogy(token_probas, token_probas).sum() -train_set_perplexity = math.exp(h) -log_string(f'train set perplexity {train_set_perplexity}') +entropy = -torch.xlogy(token_probas, token_probas).sum() +train_set_perplexity = math.exp(entropy) +#log_string(f'train set perplexity {train_set_perplexity}') for k in range(nb_epochs_finished, nb_epochs): @@ -507,7 +499,7 @@ for k in range(nb_epochs_finished, nb_epochs): train_perplexity = math.exp(min(100, acc_train_loss/nb_train_samples)) test_perplexity = math.exp(min(100, acc_test_loss/nb_test_samples)) - log_string(f'perplexity {k} train {train_perplexity} test {test_perplexity}') + log_string(f'perplexity {k} train_set {train_set_perplexity} train_prediction {train_perplexity} test_prediction {test_perplexity}') task.produce_results(k, model)