parser.add_argument("--nb_test_samples", type=int, default=None)
-parser.add_argument("--learning_rate", type=float, default=1e-4)
+parser.add_argument("--learning_rate", type=float, default=1e-3)
########################################
default_args = {
"model": "37M",
"batch_size": 100,
- "nb_train_samples": 250000,
+ "nb_train_samples": 100000,
"nb_test_samples": 10000,
}
)
# We keep the first average logits as a reference
- if min_ave_seq_logproba is None:
- min_ave_seq_logproba = ave_seq_logproba
- else:
- log_string(
- f"min_ave_seq_logproba {min_ave_seq_logproba} ave_seq_logproba {ave_seq_logproba}"
- )
+ # if min_ave_seq_logproba is None:
+ # min_ave_seq_logproba = ave_seq_logproba
+ # else:
+ # log_string(
+ # f"min_ave_seq_logproba {min_ave_seq_logproba} ave_seq_logproba {ave_seq_logproba}"
+ # )
# We update everyone
for model in models: