projects
/
culture.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Update.
[culture.git]
/
main.py
diff --git
a/main.py
b/main.py
index
918f75d
..
43241dd
100755
(executable)
--- a/
main.py
+++ b/
main.py
@@
-13,7
+13,7
@@
from torch.nn import functional as F
import ffutils
import mygpt
import ffutils
import mygpt
-import sky,
wireworld
, quizz_machine
+import sky,
reasoning
, quizz_machine
# world quizzes vs. culture quizzes
# world quizzes vs. culture quizzes
@@
-79,23
+79,23
@@
parser.add_argument("--dropout", type=float, default=0.1)
parser.add_argument("--deterministic_synthesis", action="store_true", default=False)
parser.add_argument("--deterministic_synthesis", action="store_true", default=False)
-parser.add_argument("--both_directions", action="store_true", default=False)
-
parser.add_argument("--problem", type=str, default="sky")
parser.add_argument("--nb_gpts", type=int, default=5)
parser.add_argument("--problem", type=str, default="sky")
parser.add_argument("--nb_gpts", type=int, default=5)
-parser.add_argument("--min_to_validate", type=int, default=
4
)
+parser.add_argument("--min_to_validate", type=int, default=
None
)
-parser.add_argument("--max_to_validate", type=int, default=
4
)
+parser.add_argument("--max_to_validate", type=int, default=
None
)
parser.add_argument("--accuracy_to_make_c_quizzes", type=float, default=0.975)
parser.add_argument("--accuracy_to_make_c_quizzes", type=float, default=0.975)
-parser.add_argument("--dirty_debug", action="store_true", default=False)
+parser.add_argument("--generation_temperature", type=float, default=2.0)
+
+parser.add_argument("--deterministic_validation", action="store_true", default=False)
-parser.add_argument("--
generation_temperature", type=float, default=1.0
)
+parser.add_argument("--
bidirectional_validation", action="store_true", default=False
)
-parser.add_argument("--
stochastic_validation
", action="store_true", default=False)
+parser.add_argument("--
dirty_debug
", action="store_true", default=False)
######################################################################
######################################################################
@@
-113,6
+113,12
@@
parser.add_argument("--sky_speed", type=int, default=3)
args = parser.parse_args()
args = parser.parse_args()
+if args.min_to_validate is None:
+ args.min_to_validate = args.nb_gpts - 1
+
+if args.max_to_validate is None:
+ args.max_to_validate = args.nb_gpts - 1
+
if args.result_dir is None:
args.result_dir = f"results_culture"
if args.result_dir is None:
args.result_dir = f"results_culture"
@@
-243,8
+249,10
@@
if args.problem == "sky":
nb_iterations=args.sky_nb_iterations,
speed=args.sky_speed,
)
nb_iterations=args.sky_nb_iterations,
speed=args.sky_speed,
)
-elif args.problem == "wireworld":
- problem = wireworld.Wireworld(height=8, width=10, nb_iterations=2, speed=5)
+ back_accuracy = False
+elif args.problem == "reasoning":
+ problem = reasoning.Reasoning(device=device)
+ back_accuracy = True
else:
raise ValueError
else:
raise ValueError
@@
-252,6
+260,7
@@
quizz_machine = quizz_machine.QuizzMachine(
problem=problem,
nb_train_samples=args.nb_train_samples,
nb_test_samples=args.nb_test_samples,
problem=problem,
nb_train_samples=args.nb_train_samples,
nb_test_samples=args.nb_test_samples,
+ back_accuracy=back_accuracy,
batch_size=args.physical_batch_size,
result_dir=args.result_dir,
logger=log_string,
batch_size=args.physical_batch_size,
result_dir=args.result_dir,
logger=log_string,
@@
-423,8
+432,8
@@
def create_c_quizzes(
nb_correct, seq_logproba = quizz_machine.compute_correctness(
c_quizzes,
models,
nb_correct, seq_logproba = quizz_machine.compute_correctness(
c_quizzes,
models,
- b
oth_directions=args.both_directions
,
- deterministic_validation=
not args.stocha
stic_validation,
+ b
idirectional_validation=args.bidirectional_validation
,
+ deterministic_validation=
args.determini
stic_validation,
)
for n, l in zip(nb_correct, seq_logproba):
)
for n, l in zip(nb_correct, seq_logproba):
@@
-502,6
+511,9
@@
log_string(f"nb_parameters {nb_parameters} ({int(nb_parameters/1e6)}M)")
for n_epoch in range(args.nb_epochs):
log_string(f"--- epoch {n_epoch} ----------------------------------------")
for n_epoch in range(args.nb_epochs):
log_string(f"--- epoch {n_epoch} ----------------------------------------")
+ cta = " ".join([f"{float(m.main_test_accuracy):.04f}" for m in models])
+ log_string(f"current_test_accuracies {cta}")
+
# Select, improve, and eval the worst model
weakest_model = min(models, key=lambda m: float(m.main_test_accuracy))
# Select, improve, and eval the worst model
weakest_model = min(models, key=lambda m: float(m.main_test_accuracy))
@@
-522,9
+534,6
@@
for n_epoch in range(args.nb_epochs):
f"test_set_composition w_quizzes {quizz_machine.nb_batch_w_quizzes} c_quizzes {quizz_machine.nb_batch_c_quizzes}"
)
f"test_set_composition w_quizzes {quizz_machine.nb_batch_w_quizzes} c_quizzes {quizz_machine.nb_batch_c_quizzes}"
)
- cta = " ".join([f"{float(m.main_test_accuracy):.04f}" for m in models])
- log_string(f"current_test_accuracies {cta}")
-
# Replace a fraction of the w_quizzes with fresh ones
quizz_machine.renew_w_quizzes(args.nb_train_samples // args.nb_gpts)
# Replace a fraction of the w_quizzes with fresh ones
quizz_machine.renew_w_quizzes(args.nb_train_samples // args.nb_gpts)