3 # Any copyright is dedicated to the Public Domain.
4 # https://creativecommons.org/publicdomain/zero/1.0/
6 # Written by Francois Fleuret <francois@fleuret.org>
9 import torch, torchvision
12 lr, nb_epochs, batch_size = 2e-3, 100, 100
14 data_dir = os.environ.get("PYTORCH_DATA_DIR") or "./data/"
16 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
18 ######################################################################
20 train_set = torchvision.datasets.MNIST(root=data_dir, train=True, download=True)
21 train_input = train_set.data.view(-1, 1, 28, 28).float()
22 train_targets = train_set.targets
24 test_set = torchvision.datasets.MNIST(root=data_dir, train=False, download=True)
25 test_input = test_set.data.view(-1, 1, 28, 28).float()
26 test_targets = test_set.targets
28 train_input, train_targets = train_input.to(device), train_targets.to(device)
29 test_input, test_targets = test_input.to(device), test_targets.to(device)
31 mu, std = train_input.mean(), train_input.std()
33 train_input.sub_(mu).div_(std)
34 test_input.sub_(mu).div_(std)
36 ######################################################################
39 class QLinear(nn.Module):
40 def __init__(self, dim_in, dim_out):
42 self.w = nn.Parameter(torch.randn(dim_out, dim_in))
43 self.b = nn.Parameter(torch.randn(dim_out) * 1e-1)
45 def quantize(self, z):
47 zr = z / (z.abs().mean() + epsilon)
48 zq = -(zr <= -0.5).long() + (zr >= 0.5).long()
50 return zq + z - z.detach()
55 return x @ self.quantize(self.w).t() + self.quantize(self.b)
58 ######################################################################
60 errors = {QLinear: [], nn.Linear: []}
62 for linear_layer in errors.keys():
63 for nb_hidden in [16, 32, 64, 128, 256, 512, 1024]:
66 model = nn.Sequential(
68 linear_layer(784, nb_hidden),
70 linear_layer(nb_hidden, 10),
73 nb_parameters = sum(p.numel() for p in model.parameters())
75 print(f"nb_parameters {nb_parameters}")
77 optimizer = torch.optim.Adam(model.parameters(), lr=lr)
81 for k in range(nb_epochs):
88 for input, targets in zip(
89 train_input.split(batch_size), train_targets.split(batch_size)
92 loss = torch.nn.functional.cross_entropy(output, targets)
93 acc_train_loss += loss.item() * input.size(0)
104 for input, targets in zip(
105 test_input.split(batch_size), test_targets.split(batch_size)
107 wta = model(input).argmax(1)
108 nb_test_errors += (wta != targets).long().sum()
109 test_error = nb_test_errors / test_input.size(0)
111 if (k + 1) % 10 == 0:
113 f"loss {k+1} {acc_train_loss/train_input.size(0)} {test_error*100:.02f}%"
117 ######################################################################
119 errors[linear_layer].append((nb_hidden, test_error))
121 import matplotlib.pyplot as plt
127 ax = fig.add_subplot(1, 1, 1)
130 ax.spines.right.set_visible(False)
131 ax.spines.top.set_visible(False)
133 ax.set_xlabel("Nb hidden units")
134 ax.set_ylabel("Test error (%)")
136 X = torch.tensor([x[0] for x in errors[nn.Linear]])
137 Y = torch.tensor([x[1] for x in errors[nn.Linear]])
138 ax.plot(X, Y, color="gray", label="nn.Linear")
140 X = torch.tensor([x[0] for x in errors[QLinear]])
141 Y = torch.tensor([x[1] for x in errors[QLinear]])
142 ax.plot(X, Y, color="red", label="QLinear")
144 ax.legend(frameon=False, loc=1)
146 filename = f"bit_mlp.pdf"
147 print(f"saving {filename}")
148 fig.savefig(filename, bbox_inches="tight")