3 # @XREMOTE_HOST: elk.fleuret.org
4 # @XREMOTE_EXEC: python
5 # @XREMOTE_PRE: source ${HOME}/misc/venv/pytorch/bin/activate
6 # @XREMOTE_PRE: ln -sf ${HOME}/data/pytorch ./data
9 # Any copyright is dedicated to the Public Domain.
10 # https://creativecommons.org/publicdomain/zero/1.0/
12 # Written by Francois Fleuret <francois@fleuret.org>
14 import sys, os, argparse, time, math, itertools
16 import torch, torchvision
18 from torch import optim, nn
19 from torch.nn import functional as F
21 ######################################################################
23 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
25 ######################################################################
27 parser = argparse.ArgumentParser(
28 description="Very simple implementation of a VAE for teaching."
31 parser.add_argument("--nb_epochs", type=int, default=100)
33 parser.add_argument("--learning_rate", type=float, default=2e-4)
35 parser.add_argument("--batch_size", type=int, default=100)
37 parser.add_argument("--data_dir", type=str, default="./data/")
39 parser.add_argument("--log_filename", type=str, default="train.log")
41 parser.add_argument("--latent_dim", type=int, default=32)
43 parser.add_argument("--nb_channels", type=int, default=128)
45 parser.add_argument("--no_dkl", action="store_true")
47 args = parser.parse_args()
49 log_file = open(args.log_filename, "w")
51 ######################################################################
55 t = time.strftime("%Y-%m-%d_%H:%M:%S - ", time.localtime())
57 if log_file is not None:
58 log_file.write(t + s + "\n")
65 ######################################################################
68 def sample_gaussian(mu, log_var):
69 std = log_var.mul(0.5).exp()
70 return torch.randn(mu.size(), device=mu.device) * std + mu
73 def log_p_gaussian(x, mu, log_var):
76 (-0.5 * ((x - mu).pow(2) / var) - 0.5 * log_var - 0.5 * math.log(2 * math.pi))
82 def dkl_gaussians(mean_a, log_var_a, mean_b, log_var_b):
83 mean_a, log_var_a = mean_a.flatten(1), log_var_a.flatten(1)
84 mean_b, log_var_b = mean_b.flatten(1), log_var_b.flatten(1)
85 var_a = log_var_a.exp()
86 var_b = log_var_b.exp()
88 log_var_b - log_var_a - 1 + (mean_a - mean_b).pow(2) / var_b + var_a / var_b
92 ######################################################################
95 class LatentGivenImageNet(nn.Module):
96 def __init__(self, nb_channels, latent_dim):
99 self.model = nn.Sequential(
100 nn.Conv2d(1, nb_channels, kernel_size=1), # to 28x28
101 nn.ReLU(inplace=True),
102 nn.Conv2d(nb_channels, nb_channels, kernel_size=5), # to 24x24
103 nn.ReLU(inplace=True),
104 nn.Conv2d(nb_channels, nb_channels, kernel_size=5), # to 20x20
105 nn.ReLU(inplace=True),
106 nn.Conv2d(nb_channels, nb_channels, kernel_size=4, stride=2), # to 9x9
107 nn.ReLU(inplace=True),
108 nn.Conv2d(nb_channels, nb_channels, kernel_size=3, stride=2), # to 4x4
109 nn.ReLU(inplace=True),
110 nn.Conv2d(nb_channels, 2 * latent_dim, kernel_size=4),
113 def forward(self, x):
114 output = self.model(x).view(x.size(0), 2, -1)
115 mu, log_var = output[:, 0], output[:, 1]
119 class ImageGivenLatentNet(nn.Module):
120 def __init__(self, nb_channels, latent_dim):
123 self.model = nn.Sequential(
124 nn.ConvTranspose2d(latent_dim, nb_channels, kernel_size=4),
125 nn.ReLU(inplace=True),
127 nb_channels, nb_channels, kernel_size=3, stride=2
129 nn.ReLU(inplace=True),
131 nb_channels, nb_channels, kernel_size=4, stride=2
133 nn.ReLU(inplace=True),
134 nn.ConvTranspose2d(nb_channels, nb_channels, kernel_size=5), # from 20x20
135 nn.ReLU(inplace=True),
136 nn.ConvTranspose2d(nb_channels, 2, kernel_size=5), # from 24x24
139 def forward(self, z):
140 output = self.model(z.view(z.size(0), -1, 1, 1))
141 mu, log_var = output[:, 0:1], output[:, 1:2]
142 # log_var.flatten(1)[...]=log_var.flatten(1)[:,:1]
146 ######################################################################
148 data_dir = os.path.join(args.data_dir, "mnist")
150 train_set = torchvision.datasets.MNIST(data_dir, train=True, download=True)
151 train_input = train_set.data.view(-1, 1, 28, 28).float()
153 test_set = torchvision.datasets.MNIST(data_dir, train=False, download=True)
154 test_input = test_set.data.view(-1, 1, 28, 28).float()
156 ######################################################################
158 model_q_Z_given_x = LatentGivenImageNet(
159 nb_channels=args.nb_channels, latent_dim=args.latent_dim
162 model_p_X_given_z = ImageGivenLatentNet(
163 nb_channels=args.nb_channels, latent_dim=args.latent_dim
166 optimizer = optim.Adam(
167 itertools.chain(model_p_X_given_z.parameters(), model_q_Z_given_x.parameters()),
168 lr=args.learning_rate,
171 model_p_X_given_z.to(device)
172 model_q_Z_given_x.to(device)
174 ######################################################################
176 train_input, test_input = train_input.to(device), test_input.to(device)
178 train_mu, train_std = train_input.mean(), train_input.std()
179 train_input.sub_(train_mu).div_(train_std)
180 test_input.sub_(train_mu).div_(train_std)
182 ######################################################################
184 mean_p_Z = train_input.new_zeros(1, args.latent_dim)
185 log_var_p_Z = mean_p_Z
187 for epoch in range(args.nb_epochs):
190 for x in train_input.split(args.batch_size):
191 mean_q_Z_given_x, log_var_q_Z_given_x = model_q_Z_given_x(x)
192 z = sample_gaussian(mean_q_Z_given_x, log_var_q_Z_given_x)
193 mean_p_X_given_z, log_var_p_X_given_z = model_p_X_given_z(z)
196 log_q_z_given_x = log_p_gaussian(z, mean_q_Z_given_x, log_var_q_Z_given_x)
197 log_p_x_z = log_p_gaussian(
198 x, mean_p_X_given_z, log_var_p_X_given_z
199 ) + log_p_gaussian(z, mean_p_Z, log_var_p_Z)
200 loss = -(log_p_x_z - log_q_z_given_x).mean()
202 log_p_x_given_z = log_p_gaussian(x, mean_p_X_given_z, log_var_p_X_given_z)
203 dkl_q_Z_given_x_from_p_Z = dkl_gaussians(
204 mean_q_Z_given_x, log_var_q_Z_given_x, mean_p_Z, log_var_p_Z
206 loss = (-log_p_x_given_z + dkl_q_Z_given_x_from_p_Z).mean()
208 optimizer.zero_grad()
212 acc_loss += loss.item() * x.size(0)
214 log_string(f"acc_loss {epoch} {acc_loss/train_input.size(0)}")
216 ######################################################################
219 def save_image(x, filename):
220 x = x * train_std + train_mu
221 x = x.clamp(min=0, max=255) / 255
222 torchvision.utils.save_image(1 - x, filename, nrow=16, pad_value=0.8)
225 # Save a bunch of test images
228 save_image(x, "input.png")
230 # Save the same images after encoding / decoding
232 mean_q_Z_given_x, log_var_q_Z_given_x = model_q_Z_given_x(x)
233 z = sample_gaussian(mean_q_Z_given_x, log_var_q_Z_given_x)
234 mean_p_X_given_z, log_var_p_X_given_z = model_p_X_given_z(z)
235 x = sample_gaussian(mean_p_X_given_z, log_var_p_X_given_z)
236 save_image(x, "output.png")
238 # Generate a bunch of images
240 z = sample_gaussian(mean_p_Z.expand(x.size(0), -1), log_var_p_Z.expand(x.size(0), -1))
241 mean_p_X_given_z, log_var_p_X_given_z = model_p_X_given_z(z)
242 x = sample_gaussian(mean_p_X_given_z, log_var_p_X_given_z)
243 save_image(x, "synth.png")
245 ######################################################################