else:
return s
+class DiscreteSampler2d(nn.Module):
+ def __init__(self):
+ super().__init__()
+
+ def forward(self, x):
+ s = (x >= x.max(-3,keepdim=True).values).float()
+
+ if self.training:
+ u = x.softmax(dim=-3)
+ return s + u - u.detach()
+ else:
+ return s
+
def loss_H(binary_logits, h_threshold=1):
p = binary_logits.sigmoid().mean(0)
for input in tqdm.tqdm(train_input.split(batch_size), desc="vqae-train"):
input = input.to(device)
z = encoder(input)
- zq = z if k < 2 else quantizer(z)
+ zq = quantizer(z)
output = decoder(zq)
output = output.reshape(
train_loss = F.cross_entropy(output, input)
if lambda_entropy > 0:
- loss = loss + lambda_entropy * loss_H(z, h_threshold=0.5)
+ train_loss = train_loss + lambda_entropy * loss_H(z, h_threshold=0.5)
acc_train_loss += train_loss.item() * input.size(0)
for input in tqdm.tqdm(test_input.split(batch_size), desc="vqae-test"):
input = input.to(device)
z = encoder(input)
- zq = z if k < 1 else quantizer(z)
+ zq = quantizer(z)
output = decoder(zq)
output = output.reshape(
frame2seq,
seq2frame,
) = create_data_and_processors(
- # 10000, 1000,
- 100,
- 100,
- nb_epochs=2,
+ 25000, 1000,
+ nb_epochs=5,
mode="first_last",
nb_steps=20,
)
- input = test_input[:64]
+ input = test_input[:256]
seq = frame2seq(input)
-
- print(f"{seq.size()=} {seq.dtype=} {seq.min()=} {seq.max()=}")
-
output = seq2frame(seq)
torchvision.utils.save_image(
- input.float() / (Box.nb_rgb_levels - 1), "orig.png", nrow=8
+ input.float() / (Box.nb_rgb_levels - 1), "orig.png", nrow=16
)
torchvision.utils.save_image(
- output.float() / (Box.nb_rgb_levels - 1), "qtiz.png", nrow=8
+ output.float() / (Box.nb_rgb_levels - 1), "qtiz.png", nrow=16
)