X-Git-Url: https://fleuret.org/cgi-bin/gitweb/gitweb.cgi?a=blobdiff_plain;f=main.py;h=339d18569da3945b0f5617a654e17f99d469ccb6;hb=0c51561334475af559cda12627388c9d5567a55f;hp=65922040a78fc60ada7bbf75c5373ace576ee4c6;hpb=a238fb780003e847d388861c41f0facdc5738dd0;p=mygpt.git diff --git a/main.py b/main.py index 6592204..339d185 100755 --- a/main.py +++ b/main.py @@ -18,7 +18,6 @@ import mygpt device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') ###################################################################### - parser = argparse.ArgumentParser(description = 'My own GPT.') parser.add_argument('--log_filename', @@ -111,8 +110,8 @@ for n in vars(args): ###################################################################### def autoregression( - model, - nb_samples, nb_tokens_to_generate, starting_input = None, + model, batch_size, + nb_samples, nb_tokens_to_generate, primer = None, device = torch.device('cpu') ): results = torch.zeros( @@ -120,13 +119,13 @@ def autoregression( dtype = torch.int64, device = device ) - if starting_input is None: + if primer is None: first = 0 else: - first = starting_input.size(1) - results = torch.cat((starting_input, results), 1) + first = primer.size(1) + results = torch.cat((primer, results), 1) - for input in results.split(args.batch_size): + for input in results.split(batch_size): for s in tqdm.tqdm(range(first, input.size(1)), desc = 'synth'): output = model(input) logits = output[:, s] @@ -148,7 +147,7 @@ class Task: def vocabulary_size(self): pass - def produce_results(self, n_epoch, model, nb_tokens = 50): + def produce_results(self, n_epoch, model): pass ###################################################################### @@ -157,6 +156,10 @@ import picoclvr class TaskPicoCLVR(Task): + def descr2tensor(self, descr): + t = [ [ self.token2id[u] for u in s ] for s in descr ] + return torch.tensor(t, device = self.device) + def __init__(self, batch_size, height, width, nb_colors = 5, device = torch.device('cpu')): @@ -193,49 +196,32 @@ class TaskPicoCLVR(Task): self.id2token = dict([ (n, t) for n, t in enumerate(tokens) ]) # Tokenize the train and test sets - t = [ [ self.token2id[u] for u in s ] for s in self.train_descr ] - self.train_input = torch.tensor(t, device = self.device) - t = [ [ self.token2id[u] for u in s ] for s in self.test_descr ] - self.test_input = torch.tensor(t, device = self.device) + self.train_input = descr2tensor(self.train_descr) + self.test_input = descr2tensor(self.test_descr) def batches(self, split = 'train'): assert split in { 'train', 'test' } - if split == 'train': - for batch in tqdm.tqdm(self.train_input.split(self.batch_size), desc = f'epoch-{split}'): - yield batch - else: - for batch in tqdm.tqdm(self.test_input.split(self.batch_size), desc = f'epoch-{split}'): - yield batch + input = self.train_input if split == 'train' else self.test_input + for batch in tqdm.tqdm(input.split(self.batch_size), desc = f'epoch-{split}'): + yield batch def vocabulary_size(self): return len(self.token2id) - def generate(self, primer, model, nb_tokens): - t_primer = primer.strip().split(' ') - t_generated = [ ] - - for j in range(nb_tokens): - t = [ [ self.token2id[u] for u in t_primer + t_generated ] ] - input = torch.tensor(t, device = self.device) - input = F.pad(input, (0, 1)) # Add the next token, the one to predict - output = model(input) - logits = output[0, -1] - if args.synthesis_sampling: - dist = torch.distributions.categorical.Categorical(logits = logits) - t_next = dist.sample() - else: - t_next = logits.argmax() - t_generated.append(self.id2token[t_next.item()]) - - return ' '.join(t_primer + t_generated) + def generate(self, primer_descr, model, nb_tokens): + results = autoregression( + model, self.batch_size, + nb_samples = 1, nb_tokens = nb_tokens, primer = descr2tensor(primer_descr), + device = self.device + ) + return ' '.join([ self.id2token[t.item()] for t in results.flatten() ]) - def produce_results(self, n_epoch, model, nb_tokens = None): - if nb_tokens is None: - nb_tokens = self.height * self.width + 3 - descr = [ ] + def produce_results(self, n_epoch, model): + nb_tokens = self.height * self.width + 3 + result_descr = [ ] nb_per_primer = 8 - for primer in [ + for primer_descr in [ 'red above green green top blue right of red ', 'there is red there is yellow there is blue ', 'red below yellow yellow below green green below blue red right yellow left green right blue left ', @@ -243,9 +229,10 @@ class TaskPicoCLVR(Task): ]: for k in range(nb_per_primer): - descr.append(self.generate(primer, model, nb_tokens)) + result_descr.append(self.generate(primer_descr, model, nb_tokens)) - img = [ picoclvr.descr2img(d, height = self.height, width = self.width) for d in descr ] + img = [ picoclvr.descr2img(d, height = self.height, width = self.width) + for d in result_descr ] img = torch.cat(img, 0) image_name = f'result_picoclvr_{n_epoch:04d}.png' torchvision.utils.save_image( @@ -255,13 +242,13 @@ class TaskPicoCLVR(Task): log_string(f'wrote {image_name}') np = picoclvr.nb_properties( - descr, + result_descr, height = self.height, width = self.width ) nb_requested_properties, _, nb_missing_properties = zip(*np) - log_string(f'nb_requested_properties {sum(nb_requested_properties) / len(descr):.02f} nb_missing_properties {sum(nb_missing_properties) / len(descr):.02f}') + log_string(f'nb_requested_properties {sum(nb_requested_properties) / len(result_descr):.02f} nb_missing_properties {sum(nb_missing_properties) / len(result_descr):.02f}') ###################################################################### @@ -324,7 +311,8 @@ class TaskWiki103(Task): def vocabulary_size(self): return len(self.vocab) - def produce_results(self, n_epoch, model, nb_tokens = 50): + def produce_results(self, n_epoch, model): + nb_tokens = 50 file_name = f'result_wiki103_{n_epoch:04d}.txt' with open(file_name, 'w') as outfile: @@ -385,8 +373,9 @@ class TaskMNIST(Task): def vocabulary_size(self): return 256 - def produce_results(self, n_epoch, model, nb_samples = 64): - results = autoregression(model, nb_samples, 28 * 28, device = self.device) + def produce_results(self, n_epoch, model): + nb_samples = 64 + results = autoregression(model, self.batch_size, nb_samples, 28 * 28, device = self.device) image_name = f'result_mnist_{n_epoch:04d}.png' torchvision.utils.save_image(1 - results.reshape(-1, 1, 28, 28) / 255., image_name, nrow = 16, pad_value = 0.8) @@ -470,9 +459,9 @@ token_count = 0 for input in task.batches(split = 'train'): token_count += F.one_hot(input, num_classes = task.vocabulary_size()).sum((0, 1)) token_probas = token_count / token_count.sum() -h = -torch.xlogy(token_probas, token_probas).sum() -train_set_perplexity = math.exp(h) -log_string(f'train set perplexity {train_set_perplexity}') +entropy = -torch.xlogy(token_probas, token_probas).sum() +train_set_perplexity = math.exp(entropy) +#log_string(f'train set perplexity {train_set_perplexity}') for k in range(nb_epochs_finished, nb_epochs): @@ -507,7 +496,7 @@ for k in range(nb_epochs_finished, nb_epochs): train_perplexity = math.exp(min(100, acc_train_loss/nb_train_samples)) test_perplexity = math.exp(min(100, acc_test_loss/nb_test_samples)) - log_string(f'perplexity {k} train {train_perplexity} test {test_perplexity}') + log_string(f'perplexity {k} train_set {train_set_perplexity} train_prediction {train_perplexity} test_prediction {test_perplexity}') task.produce_results(k, model)