-!/usr/bin/env python
+#!/usr/bin/env python
# Any copyright is dedicated to the Public Domain.
# https://creativecommons.org/publicdomain/zero/1.0/
)
parser.add_argument(
- "--task", type=str, default="picoclvr", help="picoclvr, mnist, maze, snake"
+ "--task", type=str, default="picoclvr", help="picoclvr, mnist, maze, snake, stack, expr"
)
parser.add_argument("--log_filename", type=str, default="train.log", help=" ")
-parser.add_argument("--result_dir", type=str, default="results_default")
+parser.add_argument("--result_dir", type=str, default=None)
parser.add_argument("--seed", type=int, default=0)
parser.add_argument("--batch_size", type=int, default=None)
-parser.add_argument("--nb_train_samples", type=int, default=250000)
+parser.add_argument("--nb_train_samples", type=int, default=None)
-parser.add_argument("--nb_test_samples", type=int, default=10000)
+parser.add_argument("--nb_test_samples", type=int, default=None)
parser.add_argument("--optim", type=str, default="adam")
parser.add_argument("--snake_length", type=int, default=200)
+##############################
+# Snake options
+
+parser.add_argument("--stack_nb_steps", type=int, default=100)
+
+parser.add_argument("--stack_nb_stacks", type=int, default=1)
+
+parser.add_argument("--stack_nb_digits", type=int, default=3)
+
+parser.add_argument("--stack_fraction_values_for_train", type=float, default=None)
+
######################################################################
args = parser.parse_args()
assert args.picocvlr_prune_properties in {"none", "train+eval", "eval"}
-try:
- os.mkdir(args.result_dir)
-except FileExistsError:
- if not args.overwrite_results:
- print(f"result directory {args.result_dir} already exists")
- exit(1)
-
-log_file = open(os.path.join(args.result_dir, args.log_filename), "a")
-
-if args.seed >= 0:
- # torch.backends.cudnn.deterministic = True
- # torch.backends.cudnn.benchmark = False
- # torch.use_deterministic_algorithms(True)
- torch.manual_seed(args.seed)
- if torch.cuda.is_available():
- torch.cuda.manual_seed_all(args.seed)
+if args.result_dir is None:
+ args.result_dir = f"results_{args.task}"
######################################################################
"picoclvr": {
"nb_epochs": 25,
"batch_size": 25,
+ "nb_train_samples": 250000,
+ "nb_test_samples": 10000,
},
"mnist": {
"nb_epochs": 25,
"batch_size": 10,
+ "nb_train_samples": 250000,
+ "nb_test_samples": 10000,
},
"maze": {
"nb_epochs": 25,
"batch_size": 25,
+ "nb_train_samples": 250000,
+ "nb_test_samples": 10000,
},
"snake": {
"nb_epochs": 5,
"batch_size": 25,
+ "nb_train_samples": 250000,
+ "nb_test_samples": 10000,
+ },
+ "stack": {
+ "nb_epochs": 5,
+ "batch_size": 25,
+ "nb_train_samples": 100000,
+ "nb_test_samples": 1000,
+ },
+ "expr": {
+ "nb_epochs": 5,
+ "batch_size": 25,
+ "nb_train_samples": 100000,
+ "nb_test_samples": 1000,
},
}
######################################################################
+try:
+ os.mkdir(args.result_dir)
+except FileExistsError:
+ if not args.overwrite_results:
+ print(f"result directory {args.result_dir} already exists")
+ exit(1)
+
+log_file = open(os.path.join(args.result_dir, args.log_filename), "a")
+
+if args.seed >= 0:
+ # torch.backends.cudnn.deterministic = True
+ # torch.backends.cudnn.benchmark = False
+ # torch.use_deterministic_algorithms(True)
+ torch.manual_seed(args.seed)
+ if torch.cuda.is_available():
+ torch.cuda.manual_seed_all(args.seed)
+
+######################################################################
+
def log_string(s):
t = time.strftime("%Y%m%d-%H:%M:%S ", time.localtime())
progress_bar_desc="autoregression",
device=torch.device("cpu"),
):
+
batches = zip(input.split(batch_size), ar_mask.split(batch_size))
+
if progress_bar_desc is not None:
- tqdm.tqdm(
+ batches = tqdm.tqdm(
batches,
dynamic_ncols=True,
desc=progress_bar_desc,
total=input.size(0) // batch_size,
)
+
for input, ar_mask in batches:
i = (ar_mask.sum(0) > 0).nonzero()
if i.min() > 0:
image_name = os.path.join(args.result_dir, f"picoclvr_result_{n_epoch:04d}.png")
torchvision.utils.save_image(
- img / 255.0, image_name, nrow=nb_per_primer, padding=1, pad_value=1.0
+ img / 255.0, image_name, nrow=nb_per_primer, padding=1, pad_value=0.0
)
log_string(f"wrote {image_name}")
def compute_error(self, model, split="train", nb_to_use=-1):
nb_total, nb_correct = 0, 0
count = torch.zeros(
- self.width * self.height, self.width * self.height, device=self.device, dtype=torch.int64
+ self.width * self.height,
+ self.width * self.height,
+ device=self.device,
+ dtype=torch.int64,
)
- for input in task.batches(split, nb_to_use):
+ for input in tqdm.tqdm(
+ task.batches(split, nb_to_use),
+ dynamic_ncols=True,
+ desc=f"test-mazes",
+ ):
result = input.clone()
ar_mask = result.new_zeros(result.size())
ar_mask[:, self.height * self.width :] = 1
result *= 1 - ar_mask
masked_inplace_autoregression(
- model, self.batch_size, result, ar_mask, device=self.device
+ model,
+ self.batch_size,
+ result,
+ ar_mask,
+ progress_bar_desc=None,
+ device=self.device,
)
mazes, paths = self.seq2map(result)
path_correctness = maze.path_correctness(mazes, paths)
model, "train", nb_to_use=1000
)
log_string(
- f"accuracy_train nb_total {train_nb_total} nb_correct {train_nb_correct} accuracy {(100.0*train_nb_correct)/train_nb_total:.02f}%"
+ f"accuracy_train {n_epoch} nb_total {train_nb_total} nb_correct {train_nb_correct} accuracy {(100.0*train_nb_correct)/train_nb_total:.02f}%"
)
test_nb_total, test_nb_correct, count = self.compute_error(
model, "test", nb_to_use=1000
)
log_string(
- f"accuracy_test nb_total {test_nb_total} nb_correct {test_nb_correct} accuracy {(100.0*test_nb_correct)/test_nb_total:.02f}%"
+ f"accuracy_test {n_epoch} nb_total {test_nb_total} nb_correct {test_nb_correct} accuracy {(100.0*test_nb_correct)/test_nb_total:.02f}%"
)
if count is not None:
target_paths=paths,
predicted_paths=predicted_paths,
path_correct=maze.path_correctness(mazes, predicted_paths),
+ path_optimal=maze.path_optimality(paths, predicted_paths),
)
log_string(f"wrote {filename}")
)
log_string(
- f"accuracy_test nb_total {test_nb_total} nb_correct {test_nb_correct} accuracy {(100.0*test_nb_correct)/test_nb_total:.02f}%"
+ f"accuracy_test {n_epoch} nb_total {test_nb_total} nb_correct {test_nb_correct} accuracy {(100.0*test_nb_correct)/test_nb_total:.02f}%"
+ )
+
+ model.train(t)
+
+
+######################################################################
+
+
+import stack
+
+
+class TaskStack(Task):
+ def __init__(
+ self,
+ nb_train_samples,
+ nb_test_samples,
+ batch_size,
+ nb_steps,
+ nb_stacks,
+ nb_digits,
+ fraction_values_for_train=None,
+ device=torch.device("cpu"),
+ ):
+ self.batch_size = batch_size
+ self.nb_steps = nb_steps
+ self.nb_stacks = nb_stacks
+ self.nb_digits = nb_digits
+ self.device = device
+
+ if fraction_values_for_train is None:
+ values_for_train = None
+ values_for_test = None
+ else:
+ all = torch.randperm(10**nb_digits)
+ nb_for_train = int(all.size(0) * fraction_values_for_train)
+ values_for_train = all[:nb_for_train]
+ values_for_test = all[nb_for_train:]
+
+ self.train_input, self.train_stack_counts = stack.generate_sequences(
+ nb_train_samples,
+ nb_steps,
+ nb_stacks,
+ nb_digits,
+ values_for_train,
+ self.device,
+ )
+
+ self.test_input, self.test_stack_counts = stack.generate_sequences(
+ nb_test_samples,
+ nb_steps,
+ nb_stacks,
+ nb_digits,
+ values_for_test,
+ self.device,
+ )
+
+ i = torch.logical_and(self.test_input % 2 == 1, self.test_input < 2 * nb_stacks)
+ counts = self.test_stack_counts.flatten()[i.flatten()]
+ counts = F.one_hot(counts).sum(0)
+ log_string(f"test_pop_stack_counts {counts}")
+
+ self.nb_codes = max(self.train_input.max(), self.test_input.max()) + 1
+
+ def batches(self, split="train", nb_to_use=-1, desc=None):
+ assert split in {"train", "test"}
+ input = self.train_input if split == "train" else self.test_input
+ if nb_to_use > 0:
+ input = input[:nb_to_use]
+ if desc is None:
+ desc = f"epoch-{split}"
+ for batch in tqdm.tqdm(
+ input.split(self.batch_size), dynamic_ncols=True, desc=desc
+ ):
+ yield batch
+
+ def vocabulary_size(self):
+ return self.nb_codes
+
+ def produce_results(self, n_epoch, model):
+ with torch.autograd.no_grad():
+ t = model.training
+ model.eval()
+
+ def compute_nb_correct(input):
+ result = input.clone()
+ stack.remove_popped_values(result, self.nb_stacks, self.nb_digits)
+ ar_mask = (result != input).long()
+ masked_inplace_autoregression(
+ model, self.batch_size, result, ar_mask, device=self.device
+ )
+
+ errors = ((result != input).long() * ar_mask).reshape(
+ -1, 1 + self.nb_digits
+ )
+ ar_mask = ar_mask.reshape(-1, 1 + self.nb_digits)
+
+ nb_total = ar_mask.max(1).values.sum()
+ nb_correct = nb_total - errors.max(1).values.sum()
+
+ return nb_total, nb_correct
+
+ test_nb_total, test_nb_correct = compute_nb_correct(self.test_input[:1000])
+
+ log_string(
+ f"accuracy_test {n_epoch} nb_total {test_nb_total} nb_correct {test_nb_correct} accuracy {(100.0*test_nb_correct)/test_nb_total:.02f}%"
+ )
+
+ ##############################################################
+ # Log a few generated sequences
+ input = self.test_input[:10, : 12 * (1 + self.nb_digits)]
+ result = input.clone()
+ stack.remove_popped_values(result, self.nb_stacks, self.nb_digits)
+ ar_mask = (result != input).long()
+ for n in range(result.size(0)):
+ log_string(
+ f"test_before {stack.seq_to_str(result[n],nb_stacks=self.nb_stacks,nb_digits=self.nb_digits)}"
+ )
+ masked_inplace_autoregression(
+ model, self.batch_size, result, ar_mask, device=self.device
+ )
+ for n in range(result.size(0)):
+ log_string(
+ f"test_after {stack.seq_to_str(result[n],nb_stacks=self.nb_stacks,nb_digits=self.nb_digits)}"
+ )
+ ##############################################################
+
+ model.train(t)
+
+
+######################################################################
+
+
+import expr
+
+
+class TaskExpr(Task):
+ def __init__(
+ self,
+ nb_train_samples,
+ nb_test_samples,
+ batch_size,
+ device=torch.device("cpu"),
+ ):
+ self.batch_size = batch_size
+ self.device = device
+
+ train_sequences = expr.generate_sequences(nb_train_samples)
+ test_sequences = expr.generate_sequences(nb_test_samples)
+ self.char2id = dict([ (c,n) for n,c in enumerate(set("".join(train_sequences + test_sequences))) ])
+ self.id2char = dict([ (n,c) for n,c in self.char2id.items() ])
+ len_max = max([len(x) for x in train_sequences + test_sequences])
+ self.train_input = torch.cat([torch.tensor([char2id(c) for c in s + " "*(len_max-len(s))] for s in train_sequences)], 0)
+ self.test_input = torch.cat([torch.tensor([char2id(c) for c in s + " "*(len_max-len(s))] for s in test_sequences)], 0)
+ self.nb_codes = max(self.train_input.max(), self.test_input.max()) + 1
+
+ def batches(self, split="train", nb_to_use=-1, desc=None):
+ assert split in {"train", "test"}
+ input = self.train_input if split == "train" else self.test_input
+ if nb_to_use > 0:
+ input = input[:nb_to_use]
+ if desc is None:
+ desc = f"epoch-{split}"
+ for batch in tqdm.tqdm(
+ input.split(self.batch_size), dynamic_ncols=True, desc=desc
+ ):
+ yield batch
+
+ def vocabulary_size(self):
+ return self.nb_codes
+
+ def produce_results(self, n_epoch, model):
+ # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+ with torch.autograd.no_grad():
+ t = model.training
+ model.eval()
+
+ def compute_nb_correct(input):
+ result = input.clone()
+ stack.remove_popped_values(result, self.nb_stacks, self.nb_digits)
+ ar_mask = (result != input).long()
+ masked_inplace_autoregression(
+ model, self.batch_size, result, ar_mask, device=self.device
+ )
+
+ errors = ((result != input).long() * ar_mask).reshape(
+ -1, 1 + self.nb_digits
+ )
+ ar_mask = ar_mask.reshape(-1, 1 + self.nb_digits)
+
+ nb_total = ar_mask.max(1).values.sum()
+ nb_correct = nb_total - errors.max(1).values.sum()
+
+ return nb_total, nb_correct
+
+ test_nb_total, test_nb_correct = compute_nb_correct(self.test_input[:1000])
+
+ log_string(
+ f"accuracy_test {n_epoch} nb_total {test_nb_total} nb_correct {test_nb_correct} accuracy {(100.0*test_nb_correct)/test_nb_total:.02f}%"
)
+ ##############################################################
+ # Log a few generated sequences
+ input = self.test_input[:10, : 12 * (1 + self.nb_digits)]
+ result = input.clone()
+ stack.remove_popped_values(result, self.nb_stacks, self.nb_digits)
+ ar_mask = (result != input).long()
+ for n in range(result.size(0)):
+ log_string(
+ f"test_before {stack.seq_to_str(result[n],nb_stacks=self.nb_stacks,nb_digits=self.nb_digits)}"
+ )
+ masked_inplace_autoregression(
+ model, self.batch_size, result, ar_mask, device=self.device
+ )
+ for n in range(result.size(0)):
+ log_string(
+ f"test_after {stack.seq_to_str(result[n],nb_stacks=self.nb_stacks,nb_digits=self.nb_digits)}"
+ )
+ ##############################################################
+
model.train(t)
device=device,
)
+elif args.task == "stack":
+ task = TaskStack(
+ nb_train_samples=args.nb_train_samples,
+ nb_test_samples=args.nb_test_samples,
+ batch_size=args.batch_size,
+ nb_steps=args.stack_nb_steps,
+ nb_stacks=args.stack_nb_stacks,
+ nb_digits=args.stack_nb_digits,
+ fraction_values_for_train=args.stack_fraction_values_for_train,
+ device=device,
+ )
+
+elif args.task == "expr":
+ task = TaskExpr(
+ nb_train_samples=args.nb_train_samples,
+ nb_test_samples=args.nb_test_samples,
+ batch_size=args.batch_size,
+ device=device,
+ )
+
else:
raise ValueError(f"Unknown task {args.task}")