import ffutils
import mygpt
-import sky, wireworld, quizz_machine
+import sky, reasoning, quizz_machine
# world quizzes vs. culture quizzes
parser.add_argument("--deterministic_synthesis", action="store_true", default=False)
-parser.add_argument("--both_directions", action="store_true", default=False)
-
parser.add_argument("--problem", type=str, default="sky")
parser.add_argument("--nb_gpts", type=int, default=5)
-parser.add_argument("--min_to_validate", type=int, default=4)
+parser.add_argument("--min_to_validate", type=int, default=None)
-parser.add_argument("--max_to_validate", type=int, default=4)
+parser.add_argument("--max_to_validate", type=int, default=None)
parser.add_argument("--accuracy_to_make_c_quizzes", type=float, default=0.975)
-parser.add_argument("--dirty_debug", action="store_true", default=False)
+parser.add_argument("--generation_temperature", type=float, default=2.0)
+
+parser.add_argument("--deterministic_validation", action="store_true", default=False)
-parser.add_argument("--generation_temperature", type=float, default=1.0)
+parser.add_argument("--bidirectional_validation", action="store_true", default=False)
-parser.add_argument("--stochastic_validation", action="store_true", default=False)
+parser.add_argument("--dirty_debug", action="store_true", default=False)
######################################################################
args = parser.parse_args()
+if args.min_to_validate is None:
+ args.min_to_validate = args.nb_gpts - 1
+
+if args.max_to_validate is None:
+ args.max_to_validate = args.nb_gpts - 1
+
if args.result_dir is None:
args.result_dir = f"results_culture"
nb_iterations=args.sky_nb_iterations,
speed=args.sky_speed,
)
-elif args.problem == "wireworld":
- problem = wireworld.Wireworld(height=8, width=10, nb_iterations=2, speed=5)
+ back_accuracy = False
+elif args.problem == "reasoning":
+ problem = reasoning.Reasoning(device=device)
+ back_accuracy = True
else:
raise ValueError
problem=problem,
nb_train_samples=args.nb_train_samples,
nb_test_samples=args.nb_test_samples,
+ back_accuracy=back_accuracy,
batch_size=args.physical_batch_size,
result_dir=args.result_dir,
logger=log_string,
nb_correct, seq_logproba = quizz_machine.compute_correctness(
c_quizzes,
models,
- both_directions=args.both_directions,
- deterministic_validation=not args.stochastic_validation,
+ bidirectional_validation=args.bidirectional_validation,
+ deterministic_validation=args.deterministic_validation,
)
for n, l in zip(nb_correct, seq_logproba):
for n_epoch in range(args.nb_epochs):
log_string(f"--- epoch {n_epoch} ----------------------------------------")
+ cta = " ".join([f"{float(m.main_test_accuracy):.04f}" for m in models])
+ log_string(f"current_test_accuracies {cta}")
+
# Select, improve, and eval the worst model
weakest_model = min(models, key=lambda m: float(m.main_test_accuracy))
f"test_set_composition w_quizzes {quizz_machine.nb_batch_w_quizzes} c_quizzes {quizz_machine.nb_batch_c_quizzes}"
)
- cta = " ".join([f"{float(m.main_test_accuracy):.04f}" for m in models])
- log_string(f"current_test_accuracies {cta}")
-
# Replace a fraction of the w_quizzes with fresh ones
quizz_machine.renew_w_quizzes(args.nb_train_samples // args.nb_gpts)