######################################################################
-def sigma_for_grids(input):
- l = input.size(1) // 4 - 1
- sigma = input.new(input.size())
- r = sigma.view(sigma.size(0), 4, sigma.size(1) // 4)
- r[:, 0] = 0 * l
- r[:, 1] = 1 * l
- r[:, 2] = 2 * l
- r[:, 3] = 3 * l
- r[:, :, 1:] += (
- torch.rand(input.size(0), 4, l, device=input.device).sort(dim=2).indices
- ) + 1
- return sigma
-
-
def run_tests(model, quiz_machine, local_device=main_device):
with torch.autograd.no_grad():
model.eval().to(local_device)
for input in tqdm.tqdm(src, dynamic_ncols=True, desc="test"):
input = input.to(local_device)
- sigma = sigma_for_grids(input)
+ sigma = quiz_machine.sigma_for_grids(input)
output = model(mygpt.BracketedSequence(input), sigma).x
loss = F.cross_entropy(output.transpose(1, 2), input)
acc_test_loss += loss.item() * input.size(0)
targets = input
- sigma = sigma_for_grids(input)
+ sigma = quiz_machine.sigma_for_grids(input)
output = model(mygpt.BracketedSequence(input), sigma).x
loss_per_token = F.cross_entropy(
output.transpose(1, 2), targets, reduction="none"
def one_batch_masked_inplace_autoregression(
model,
input,
+ sigma,
ar_mask,
seq_logproba,
deterministic_synthesis=False,
BracketedSequence(input, 0, to_generate.min())
) # Needed to initialize the model's cache
for s in range(to_generate.min(), to_generate.max() + 1):
- output = model(BracketedSequence(input, s, 1)).x
+ output = model(BracketedSequence(input, s, 1), sigma).x
logits = output[:, s]
######################################################################
+ def sigma_for_grids(self, input):
+ l = input.size(1) // 4
+ sigma = input.new(input.size())
+ r = sigma.view(sigma.size(0), 4, l)
+ r[:, 0] = 0 * l
+ r[:, 1] = 1 * l
+ r[:, 2] = 2 * l
+ r[:, 3] = 3 * l
+ r[:, :, 1:] += (
+ torch.rand(input.size(0), 4, l - 1, device=input.device).sort(dim=2).indices
+ ) + 1
+ return sigma
+
def autoregression(
self,
model,
model.eval()
for input, ar_mask, seq_logproba in batches:
+ sigma = self.sigma_for_grids(input)
one_batch_masked_inplace_autoregression(
model=model,
input=input,
+ sigma=sigma,
ar_mask=ar_mask,
seq_logproba=seq_logproba,
deterministic_synthesis=False,
):
input = input.to(device)
ar_mask = self.make_ar_mask(input, struct=struct, mask=mask)
- output = model(mygpt.BracketedSequence(input)).x
+ sigma = self.sigma_for_grids(input)
+ output = model(mygpt.BracketedSequence(input), sigma).x
l[:, model.id] = (
-F.cross_entropy(
output.transpose(1, 2), input, reduction="none"