20240715-00:09:15 argv ./main.py --result_dir=results_grids_v5/ --resume --nb_new_c_quizzes_for_train=1000 --nb_new_c_quizzes_for_test=100 20240715-00:09:15 args.log_filename train.log 20240715-00:09:15 args.result_dir results_grids_v5/ 20240715-00:09:15 args.seed 0 20240715-00:09:15 args.resume True 20240715-00:09:15 args.max_percents_of_test_in_train -1 20240715-00:09:15 args.nb_epochs 10000 20240715-00:09:15 args.batch_size 25 20240715-00:09:15 args.physical_batch_size None 20240715-00:09:15 args.nb_train_samples 100000 20240715-00:09:15 args.nb_test_samples 10000 20240715-00:09:15 args.nb_new_c_quizzes_for_train 1000 20240715-00:09:15 args.nb_new_c_quizzes_for_test 100 20240715-00:09:15 args.learning_rate 0.0005 20240715-00:09:15 args.model 37M 20240715-00:09:15 args.dim_model 512 20240715-00:09:15 args.dim_keys 64 20240715-00:09:15 args.dim_hidden 2048 20240715-00:09:15 args.nb_heads 8 20240715-00:09:15 args.nb_blocks 12 20240715-00:09:15 args.dropout 0.1 20240715-00:09:15 args.deterministic_synthesis False 20240715-00:09:15 args.problem grids 20240715-00:09:15 args.nb_threads 1 20240715-00:09:15 args.gpus all 20240715-00:09:15 args.nb_gpts 5 20240715-00:09:15 args.accuracy_to_make_c_quizzes 0.9 20240715-00:09:15 args.proba_understands 0.9 20240715-00:09:15 args.proba_not_understands 0.5 20240715-00:09:15 args.generation_temperature 1.0 20240715-00:09:15 args.dirty_debug False 20240715-00:09:15 args.grids_tasks None 20240715-00:09:15 args.sky_height 6 20240715-00:09:15 args.sky_width 8 20240715-00:09:15 args.sky_nb_birds 3 20240715-00:09:15 args.sky_nb_iterations 2 20240715-00:09:15 args.sky_speed 3 20240715-00:09:26 main_device cuda:0 gpus ['cuda:0', 'cuda:1'] 20240715-00:09:26 vocabulary_size 13 20240715-00:09:26 creating model 0 and its w_quizzes 20240715-00:11:16 creating model 1 and its w_quizzes 20240715-00:13:12 creating model 2 and its w_quizzes 20240715-00:15:09 creating model 3 and its w_quizzes 20240715-00:17:05 creating model 4 and its w_quizzes 20240715-00:18:59 successfully loaded gpt_000.pth 20240715-00:18:59 successfully loaded gpt_001.pth 20240715-00:18:59 successfully loaded gpt_002.pth 20240715-00:18:59 successfully loaded gpt_003.pth 20240715-00:18:59 successfully loaded gpt_004.pth 20240715-00:18:59 cannot find c_quizzes.pth 20240715-00:18:59 nb_parameters 37817357 (37M) 20240715-00:19:01 nb_new_c_quizzes_for_train 1000 nb_new_c_quizzes_for_test 100 20240715-00:19:01 --- epoch 0 ---------------------------------------- 20240715-00:19:01 current_test_accuracies 0.9120 0.9050 0.9040 0.9040 0.9030 20240715-00:22:53 keep c_quizzes model 1 nb_accumulated 20 / 1100 20240715-00:26:50 keep c_quizzes model 0 nb_accumulated 37 / 1100 20240715-00:29:31 keep c_quizzes model 0 nb_accumulated 62 / 1100 20240715-00:32:06 keep c_quizzes model 0 nb_accumulated 87 / 1100 20240715-00:34:40 keep c_quizzes model 1 nb_accumulated 109 / 1100 20240715-00:37:13 keep c_quizzes model 4 nb_accumulated 126 / 1100 20240715-00:39:47 keep c_quizzes model 2 nb_accumulated 143 / 1100 20240715-00:42:21 keep c_quizzes model 1 nb_accumulated 160 / 1100 20240715-00:44:56 keep c_quizzes model 4 nb_accumulated 179 / 1100 20240715-00:47:31 keep c_quizzes model 4 nb_accumulated 199 / 1100 20240715-00:50:04 keep c_quizzes model 0 nb_accumulated 211 / 1100 20240715-00:52:38 keep c_quizzes model 3 nb_accumulated 236 / 1100 20240715-00:55:12 keep c_quizzes model 2 nb_accumulated 257 / 1100 20240715-00:57:46 keep c_quizzes model 0 nb_accumulated 275 / 1100 20240715-01:00:20 keep c_quizzes model 0 nb_accumulated 297 / 1100 20240715-01:02:53 keep c_quizzes model 4 nb_accumulated 325 / 1100 20240715-01:05:27 keep c_quizzes model 2 nb_accumulated 345 / 1100 20240715-01:08:01 keep c_quizzes model 1 nb_accumulated 363 / 1100 20240715-01:10:34 keep c_quizzes model 1 nb_accumulated 379 / 1100 20240715-01:13:08 keep c_quizzes model 4 nb_accumulated 394 / 1100 20240715-01:15:42 keep c_quizzes model 0 nb_accumulated 414 / 1100 20240715-01:18:17 keep c_quizzes model 0 nb_accumulated 428 / 1100 20240715-01:20:52 keep c_quizzes model 1 nb_accumulated 441 / 1100 20240715-01:23:27 keep c_quizzes model 0 nb_accumulated 465 / 1100 20240715-01:26:02 keep c_quizzes model 3 nb_accumulated 492 / 1100 20240715-01:28:37 keep c_quizzes model 0 nb_accumulated 505 / 1100 20240715-01:31:12 keep c_quizzes model 2 nb_accumulated 524 / 1100 20240715-01:33:48 keep c_quizzes model 4 nb_accumulated 549 / 1100 20240715-01:36:23 keep c_quizzes model 2 nb_accumulated 560 / 1100 20240715-01:38:58 keep c_quizzes model 3 nb_accumulated 576 / 1100 20240715-01:41:34 keep c_quizzes model 1 nb_accumulated 597 / 1100 20240715-01:44:09 keep c_quizzes model 2 nb_accumulated 615 / 1100 20240715-01:46:44 keep c_quizzes model 3 nb_accumulated 632 / 1100 20240715-01:49:19 keep c_quizzes model 4 nb_accumulated 653 / 1100 20240715-01:51:54 keep c_quizzes model 4 nb_accumulated 675 / 1100 20240715-01:54:29 keep c_quizzes model 0 nb_accumulated 697 / 1100 20240715-01:57:04 keep c_quizzes model 2 nb_accumulated 713 / 1100 20240715-01:59:40 keep c_quizzes model 3 nb_accumulated 730 / 1100 20240715-02:02:15 keep c_quizzes model 0 nb_accumulated 745 / 1100 20240715-02:04:50 keep c_quizzes model 1 nb_accumulated 756 / 1100 20240715-02:07:24 keep c_quizzes model 0 nb_accumulated 771 / 1100 20240715-02:09:59 keep c_quizzes model 0 nb_accumulated 790 / 1100 20240715-02:12:33 keep c_quizzes model 3 nb_accumulated 811 / 1100 20240715-02:15:08 keep c_quizzes model 2 nb_accumulated 826 / 1100 20240715-02:17:42 keep c_quizzes model 0 nb_accumulated 849 / 1100 20240715-02:20:17 keep c_quizzes model 0 nb_accumulated 878 / 1100 20240715-02:22:51 keep c_quizzes model 4 nb_accumulated 902 / 1100 20240715-02:25:26 keep c_quizzes model 2 nb_accumulated 924 / 1100 20240715-02:28:00 keep c_quizzes model 0 nb_accumulated 940 / 1100 20240715-02:30:35 keep c_quizzes model 2 nb_accumulated 959 / 1100 20240715-02:33:09 keep c_quizzes model 4 nb_accumulated 977 / 1100 20240715-02:35:43 keep c_quizzes model 2 nb_accumulated 996 / 1100 20240715-02:38:18 keep c_quizzes model 4 nb_accumulated 1020 / 1100 20240715-02:40:53 keep c_quizzes model 3 nb_accumulated 1036 / 1100 20240715-02:43:29 keep c_quizzes model 2 nb_accumulated 1058 / 1100 20240715-02:46:04 keep c_quizzes model 2 nb_accumulated 1080 / 1100 20240715-02:48:38 keep c_quizzes model 4 nb_accumulated 1096 / 1100 20240715-02:51:12 keep c_quizzes model 2 nb_accumulated 1116 / 1100 20240715-02:51:13 wrote c_quizzes.pth 20240715-02:51:13 training model 0 20240715-02:51:13 training model 1 20240715-03:00:18 train_perplexity 0 model 0 1.1430834520386364 20240715-03:00:24 train_perplexity 0 model 1 1.1423646853781924 20240715-03:00:47 test_perplexity 0 model 0 1.1410397057938662 20240715-03:00:55 test_perplexity 0 model 1 1.1390125792310228 20240715-03:03:47 test_accuracy 0 model 0 forward 464 / 515 backward 395 / 485 20240715-03:03:47 main_test_accuracy 0 0.859000027179718 20240715-03:03:50 test_accuracy 0 model 1 forward 461 / 483 backward 430 / 517 20240715-03:03:50 main_test_accuracy 0 0.8910000324249268 20240715-03:03:50 wrote gpt_000.pth 20240715-03:03:51 wrote gpt_001.pth 20240715-03:03:58 --- epoch 1 ---------------------------------------- 20240715-03:03:58 current_test_accuracies 0.8590 0.8910 0.0000 0.0000 0.0000 20240715-03:03:58 training model 2 20240715-03:03:58 training model 3 20240715-03:13:03 train_perplexity 1 model 2 1.142580540166983 20240715-03:13:11 train_perplexity 1 model 3 1.1430704354958778 20240715-03:13:31 test_perplexity 1 model 2 1.141565987530598 20240715-03:13:40 test_perplexity 1 model 3 1.1399006541445829 20240715-03:16:35 test_accuracy 1 model 2 forward 472 / 507 backward 398 / 493 20240715-03:16:35 main_test_accuracy 1 0.8700000643730164 20240715-03:16:37 test_accuracy 1 model 3 forward 483 / 519 backward 388 / 481 20240715-03:16:37 main_test_accuracy 1 0.8710000514984131 20240715-03:16:38 wrote gpt_002.pth 20240715-03:16:38 wrote gpt_003.pth 20240715-03:16:45 --- epoch 2 ---------------------------------------- 20240715-03:16:45 current_test_accuracies 0.8590 0.8910 0.8700 0.8710 0.0000 20240715-03:16:45 training model 4 20240715-03:16:45 training model 0 20240715-03:25:50 train_perplexity 2 model 4 1.1434123726349492 20240715-03:25:59 train_perplexity 2 model 0 1.1423119419628687 20240715-03:26:18 test_perplexity 2 model 4 1.141549120736477 20240715-03:26:28 test_perplexity 2 model 0 1.1397197784791686 20240715-03:29:22 test_accuracy 2 model 4 forward 467 / 510 backward 371 / 490 20240715-03:29:22 main_test_accuracy 2 0.8380000591278076 20240715-03:29:23 test_accuracy 2 model 0 forward 492 / 515 backward 411 / 485 20240715-03:29:23 main_test_accuracy 2 0.9030000567436218 20240715-03:29:24 wrote gpt_004.pth 20240715-03:29:25 wrote gpt_000.pth 20240715-03:29:32 --- epoch 3 ---------------------------------------- 20240715-03:29:32 current_test_accuracies 0.9030 0.8910 0.8700 0.8710 0.8380 20240715-03:29:32 training model 4 20240715-03:29:32 training model 2 20240715-03:38:37 train_perplexity 3 model 4 1.1425609584674628 20240715-03:38:46 train_perplexity 3 model 2 1.1413207845307445 20240715-03:39:05 test_perplexity 3 model 4 1.140306918190884 20240715-03:39:14 test_perplexity 3 model 2 1.1407433065448789 20240715-03:42:08 test_accuracy 3 model 4 forward 484 / 510 backward 393 / 490 20240715-03:42:08 main_test_accuracy 3 0.8770000338554382 20240715-03:42:10 test_accuracy 3 model 2 forward 474 / 507 backward 400 / 493 20240715-03:42:10 main_test_accuracy 3 0.8740000128746033 20240715-03:42:11 wrote gpt_004.pth 20240715-03:42:11 wrote gpt_002.pth 20240715-03:42:19 --- epoch 4 ---------------------------------------- 20240715-03:42:19 current_test_accuracies 0.9030 0.8910 0.8740 0.8710 0.8770 20240715-03:42:19 training model 3 20240715-03:42:19 training model 2 20240715-03:51:25 train_perplexity 4 model 3 1.1420777233151826 20240715-03:51:32 train_perplexity 4 model 2 1.1411286229307138 20240715-03:51:54 test_perplexity 4 model 3 1.1394201030008686 20240715-03:52:02 test_perplexity 4 model 2 1.1403540506521763 20240715-03:54:56 test_accuracy 4 model 3 forward 479 / 519 backward 383 / 481 20240715-03:54:56 main_test_accuracy 4 0.862000048160553 20240715-03:54:58 test_accuracy 4 model 2 forward 483 / 507 backward 402 / 493 20240715-03:54:58 main_test_accuracy 4 0.8850000500679016 20240715-03:54:59 wrote gpt_003.pth 20240715-03:54:59 wrote gpt_002.pth 20240715-03:55:07 --- epoch 5 ---------------------------------------- 20240715-03:55:07 current_test_accuracies 0.9030 0.8910 0.8850 0.8620 0.8770 20240715-03:55:07 training model 3 20240715-03:55:07 training model 4 20240715-04:04:12 train_perplexity 5 model 3 1.1413780899841892 20240715-04:04:20 train_perplexity 5 model 4 1.1417037996758466 20240715-04:04:41 test_perplexity 5 model 3 1.1383335858298749 20240715-04:04:50 test_perplexity 5 model 4 1.1411197431113487 20240715-04:07:43 test_accuracy 5 model 3 forward 494 / 519 backward 397 / 481 20240715-04:07:43 main_test_accuracy 5 0.8910000324249268 20240715-04:07:45 test_accuracy 5 model 4 forward 480 / 510 backward 394 / 490 20240715-04:07:45 main_test_accuracy 5 0.8740000128746033 20240715-04:07:46 wrote gpt_003.pth 20240715-04:07:46 wrote gpt_004.pth 20240715-04:07:53 --- epoch 6 ---------------------------------------- 20240715-04:07:53 current_test_accuracies 0.9030 0.8910 0.8850 0.8910 0.8740 20240715-04:07:53 training model 4 20240715-04:07:53 training model 2 20240715-04:16:58 train_perplexity 6 model 4 1.141848357545664 20240715-04:17:07 train_perplexity 6 model 2 1.1403548972110544 20240715-04:17:26 test_perplexity 6 model 4 1.140165441068691 20240715-04:17:36 test_perplexity 6 model 2 1.1407139970054425 20240715-04:20:28 test_accuracy 6 model 4 forward 485 / 510 backward 397 / 490 20240715-04:20:28 main_test_accuracy 6 0.8820000290870667 20240715-04:20:30 test_accuracy 6 model 2 forward 477 / 507 backward 411 / 493 20240715-04:20:30 main_test_accuracy 6 0.8880000710487366 20240715-04:20:31 wrote gpt_004.pth 20240715-04:20:31 wrote gpt_002.pth 20240715-04:20:38 --- epoch 7 ---------------------------------------- 20240715-04:20:38 current_test_accuracies 0.9030 0.8910 0.8880 0.8910 0.8820 20240715-04:20:38 training model 4 20240715-04:20:38 training model 2 20240715-04:29:42 train_perplexity 7 model 4 1.141099370194611 20240715-04:29:51 train_perplexity 7 model 2 1.1402879471928298 20240715-04:30:10 test_perplexity 7 model 4 1.1396958640586232 20240715-04:30:20 test_perplexity 7 model 2 1.1404988565047403 20240715-04:33:11 test_accuracy 7 model 4 forward 485 / 510 backward 390 / 490 20240715-04:33:11 main_test_accuracy 7 0.8750000596046448 20240715-04:33:13 test_accuracy 7 model 2 forward 482 / 507 backward 419 / 493 20240715-04:33:13 main_test_accuracy 7 0.9010000228881836 20240715-04:33:14 wrote gpt_004.pth 20240715-04:33:14 wrote gpt_002.pth 20240715-04:33:21 --- epoch 8 ---------------------------------------- 20240715-04:33:21 current_test_accuracies 0.9030 0.8910 0.9010 0.8910 0.8750 20240715-04:33:21 training model 4 20240715-04:33:21 training model 1 20240715-04:42:27 train_perplexity 8 model 4 1.1412269921545206 20240715-04:42:35 train_perplexity 8 model 1 1.141304065166524 20240715-04:42:55 test_perplexity 8 model 4 1.1400228701234447 20240715-04:43:04 test_perplexity 8 model 1 1.1390424919183684 20240715-04:45:57 test_accuracy 8 model 4 forward 489 / 510 backward 402 / 490 20240715-04:45:57 main_test_accuracy 8 0.8910000324249268 20240715-04:46:00 test_accuracy 8 model 1 forward 455 / 483 backward 419 / 517 20240715-04:46:00 main_test_accuracy 8 0.8740000128746033 20240715-04:46:01 wrote gpt_004.pth 20240715-04:46:01 wrote gpt_001.pth 20240715-04:46:09 --- epoch 9 ---------------------------------------- 20240715-04:46:09 current_test_accuracies 0.9030 0.8740 0.9010 0.8910 0.8910 20240715-04:46:09 training model 1 20240715-04:46:09 training model 3 20240715-04:55:13 train_perplexity 9 model 1 1.1411031239813547 20240715-04:55:22 train_perplexity 9 model 3 1.1411224762604224 20240715-04:55:41 test_perplexity 9 model 1 1.1394873911460672 20240715-04:55:51 test_perplexity 9 model 3 1.1384879700104764 20240715-04:58:45 test_accuracy 9 model 1 forward 468 / 483 backward 432 / 517 20240715-04:58:45 main_test_accuracy 9 0.9000000357627869 20240715-04:58:47 test_accuracy 9 model 3 forward 491 / 519 backward 401 / 481 20240715-04:58:47 main_test_accuracy 9 0.8920000195503235 20240715-04:58:48 wrote gpt_001.pth 20240715-04:58:48 wrote gpt_003.pth 20240715-04:58:56 --- epoch 10 ---------------------------------------- 20240715-04:58:56 current_test_accuracies 0.9030 0.9000 0.9010 0.8920 0.8910 20240715-04:58:56 training model 4 20240715-04:58:56 training model 3 20240715-05:08:00 train_perplexity 10 model 4 1.1407158474370827 20240715-05:08:10 train_perplexity 10 model 3 1.141061582390614 20240715-05:08:26 test_perplexity 10 model 4 1.1394520332404439 20240715-05:08:37 test_perplexity 10 model 3 1.1385543814556778 20240715-05:11:28 test_accuracy 10 model 4 forward 485 / 510 backward 404 / 490 20240715-05:11:28 main_test_accuracy 10 0.8890000581741333 20240715-05:11:31 test_accuracy 10 model 3 forward 496 / 519 backward 401 / 481 20240715-05:11:31 main_test_accuracy 10 0.8970000147819519 20240715-05:11:31 wrote gpt_004.pth 20240715-05:11:32 wrote gpt_003.pth 20240715-05:11:39 --- epoch 11 ---------------------------------------- 20240715-05:11:39 current_test_accuracies 0.9030 0.9000 0.9010 0.8970 0.8890 20240715-05:11:39 training model 4 20240715-05:11:39 training model 3 20240715-05:20:44 train_perplexity 11 model 4 1.1404303914305693 20240715-05:20:53 train_perplexity 11 model 3 1.1409422589665232 20240715-05:21:12 test_perplexity 11 model 4 1.1396992382259177 20240715-05:21:21 test_perplexity 11 model 3 1.1388183553662639 20240715-05:24:16 test_accuracy 11 model 4 forward 499 / 510 backward 407 / 490 20240715-05:24:16 main_test_accuracy 11 0.906000018119812 20240715-05:24:18 test_accuracy 11 model 3 forward 489 / 519 backward 396 / 481 20240715-05:24:18 main_test_accuracy 11 0.8850000500679016 20240715-05:24:19 wrote gpt_004.pth 20240715-05:24:19 wrote gpt_003.pth 20240715-05:24:26 --- epoch 12 ---------------------------------------- 20240715-05:24:26 current_test_accuracies 0.9030 0.9000 0.9010 0.8850 0.9060 20240715-05:24:26 training model 3 20240715-05:24:26 training model 1 20240715-05:33:32 train_perplexity 12 model 3 1.1404393161534925 20240715-05:33:40 train_perplexity 12 model 1 1.141049417771769 20240715-05:33:59 test_perplexity 12 model 3 1.1384021964927558 20240715-05:34:09 test_perplexity 12 model 1 1.1383997046612053 20240715-05:37:02 test_accuracy 12 model 3 forward 495 / 519 backward 396 / 481 20240715-05:37:02 main_test_accuracy 12 0.8910000324249268 20240715-05:37:05 test_accuracy 12 model 1 forward 464 / 483 backward 429 / 517 20240715-05:37:05 main_test_accuracy 12 0.893000066280365 20240715-05:37:06 wrote gpt_003.pth 20240715-05:37:06 wrote gpt_001.pth 20240715-05:37:13 --- epoch 13 ---------------------------------------- 20240715-05:37:13 current_test_accuracies 0.9030 0.8930 0.9010 0.8910 0.9060 20240715-05:37:13 training model 3 20240715-05:37:13 training model 1 20240715-05:46:18 train_perplexity 13 model 3 1.140254493420507 20240715-05:46:27 train_perplexity 13 model 1 1.1409373942978345 20240715-05:46:45 test_perplexity 13 model 3 1.138570643783188 20240715-05:46:56 test_perplexity 13 model 1 1.1379135273751115 20240715-05:49:48 test_accuracy 13 model 3 forward 492 / 519 backward 397 / 481 20240715-05:49:48 main_test_accuracy 13 0.8890000581741333 20240715-05:49:51 test_accuracy 13 model 1 forward 466 / 483 backward 434 / 517 20240715-05:49:51 main_test_accuracy 13 0.9000000357627869 20240715-05:49:52 wrote gpt_003.pth 20240715-05:49:52 wrote gpt_001.pth 20240715-05:49:59 --- epoch 14 ---------------------------------------- 20240715-05:49:59 current_test_accuracies 0.9030 0.9000 0.9010 0.8890 0.9060 20240715-05:49:59 training model 3 20240715-05:49:59 training model 1 20240715-05:59:04 train_perplexity 14 model 3 1.1401741535889671 20240715-05:59:12 train_perplexity 14 model 1 1.14082753534466 20240715-05:59:31 test_perplexity 14 model 3 1.1379958360546287 20240715-05:59:41 test_perplexity 14 model 1 1.1385415379431019 20240715-06:02:33 test_accuracy 14 model 3 forward 498 / 519 backward 404 / 481 20240715-06:02:33 main_test_accuracy 14 0.9020000696182251 20240715-06:02:36 test_accuracy 14 model 1 forward 450 / 483 backward 436 / 517 20240715-06:02:36 main_test_accuracy 14 0.8860000371932983 20240715-06:02:37 wrote gpt_003.pth 20240715-06:02:37 wrote gpt_001.pth 20240715-06:02:44 --- epoch 15 ---------------------------------------- 20240715-06:02:44 current_test_accuracies 0.9030 0.8860 0.9010 0.9020 0.9060 20240715-06:02:44 training model 1 20240715-06:02:44 training model 2 20240715-06:11:49 train_perplexity 15 model 1 1.140248030156348 20240715-06:11:57 train_perplexity 15 model 2 1.140294969982541 20240715-06:12:18 test_perplexity 15 model 1 1.1377089289102393 20240715-06:12:27 test_perplexity 15 model 2 1.1400891400583186 20240715-06:15:21 test_accuracy 15 model 1 forward 461 / 483 backward 433 / 517 20240715-06:15:21 main_test_accuracy 15 0.8940000534057617 20240715-06:15:22 test_accuracy 15 model 2 forward 482 / 507 backward 410 / 493 20240715-06:15:22 main_test_accuracy 15 0.8920000195503235 20240715-06:15:23 wrote gpt_001.pth 20240715-06:15:23 wrote gpt_002.pth 20240715-06:15:31 --- epoch 16 ---------------------------------------- 20240715-06:15:31 current_test_accuracies 0.9030 0.8940 0.8920 0.9020 0.9060 20240715-06:15:31 training model 2 20240715-06:15:31 training model 1 20240715-06:24:36 train_perplexity 16 model 2 1.1402865663956003 20240715-06:24:44 train_perplexity 16 model 1 1.1398054284860866 20240715-06:25:05 test_perplexity 16 model 2 1.1398058158074589 20240715-06:25:14 test_perplexity 16 model 1 1.1379584777950427 20240715-06:28:07 test_accuracy 16 model 2 forward 489 / 507 backward 419 / 493 20240715-06:28:07 main_test_accuracy 16 0.9080000519752502 20240715-06:28:09 test_accuracy 16 model 1 forward 467 / 483 backward 439 / 517 20240715-06:28:09 main_test_accuracy 16 0.906000018119812 20240715-06:28:10 wrote gpt_002.pth 20240715-06:28:10 wrote gpt_001.pth 20240715-06:28:18 --- epoch 17 ---------------------------------------- 20240715-06:28:18 current_test_accuracies 0.9030 0.9060 0.9080 0.9020 0.9060 20240715-06:32:09 keep c_quizzes model 4 nb_accumulated 10 / 1100 20240715-06:36:01 keep c_quizzes model 0 nb_accumulated 28 / 1100 20240715-06:38:41 keep c_quizzes model 4 nb_accumulated 47 / 1100 20240715-06:41:16 keep c_quizzes model 0 nb_accumulated 61 / 1100 20240715-06:43:50 keep c_quizzes model 4 nb_accumulated 86 / 1100 20240715-06:46:24 keep c_quizzes model 2 nb_accumulated 99 / 1100 20240715-06:48:58 keep c_quizzes model 2 nb_accumulated 108 / 1100 20240715-06:51:33 keep c_quizzes model 3 nb_accumulated 121 / 1100 20240715-06:54:07 keep c_quizzes model 1 nb_accumulated 134 / 1100 20240715-06:56:42 keep c_quizzes model 3 nb_accumulated 148 / 1100 20240715-06:59:16 keep c_quizzes model 2 nb_accumulated 161 / 1100 20240715-07:01:50 keep c_quizzes model 1 nb_accumulated 180 / 1100 20240715-07:04:24 keep c_quizzes model 2 nb_accumulated 192 / 1100 20240715-07:06:58 keep c_quizzes model 4 nb_accumulated 204 / 1100 20240715-07:09:32 keep c_quizzes model 3 nb_accumulated 229 / 1100 20240715-07:12:07 keep c_quizzes model 3 nb_accumulated 247 / 1100 20240715-07:14:41 keep c_quizzes model 3 nb_accumulated 266 / 1100 20240715-07:17:14 keep c_quizzes model 4 nb_accumulated 281 / 1100 20240715-07:19:48 keep c_quizzes model 4 nb_accumulated 304 / 1100 20240715-07:22:23 keep c_quizzes model 1 nb_accumulated 318 / 1100 20240715-07:24:57 keep c_quizzes model 1 nb_accumulated 332 / 1100 20240715-07:27:31 keep c_quizzes model 1 nb_accumulated 353 / 1100 20240715-07:30:05 keep c_quizzes model 0 nb_accumulated 372 / 1100 20240715-07:32:39 keep c_quizzes model 3 nb_accumulated 391 / 1100 20240715-07:35:14 keep c_quizzes model 1 nb_accumulated 413 / 1100 20240715-07:37:48 keep c_quizzes model 0 nb_accumulated 428 / 1100 20240715-07:40:22 keep c_quizzes model 3 nb_accumulated 452 / 1100 20240715-07:42:56 keep c_quizzes model 1 nb_accumulated 465 / 1100 20240715-07:45:30 keep c_quizzes model 2 nb_accumulated 481 / 1100 20240715-07:48:05 keep c_quizzes model 4 nb_accumulated 508 / 1100 20240715-07:50:40 keep c_quizzes model 3 nb_accumulated 523 / 1100 20240715-07:53:14 keep c_quizzes model 3 nb_accumulated 544 / 1100 20240715-07:55:48 keep c_quizzes model 0 nb_accumulated 555 / 1100 20240715-07:58:23 keep c_quizzes model 2 nb_accumulated 564 / 1100 20240715-08:00:57 keep c_quizzes model 2 nb_accumulated 575 / 1100 20240715-08:03:31 keep c_quizzes model 2 nb_accumulated 586 / 1100 20240715-08:06:05 keep c_quizzes model 2 nb_accumulated 599 / 1100 20240715-08:08:39 keep c_quizzes model 1 nb_accumulated 613 / 1100 20240715-08:11:13 keep c_quizzes model 4 nb_accumulated 630 / 1100 20240715-08:13:48 keep c_quizzes model 1 nb_accumulated 644 / 1100 20240715-08:16:22 keep c_quizzes model 3 nb_accumulated 663 / 1100 20240715-08:18:56 keep c_quizzes model 1 nb_accumulated 681 / 1100 20240715-08:21:30 keep c_quizzes model 4 nb_accumulated 703 / 1100 20240715-08:24:04 keep c_quizzes model 3 nb_accumulated 714 / 1100 20240715-08:26:38 keep c_quizzes model 4 nb_accumulated 730 / 1100 20240715-08:29:12 keep c_quizzes model 3 nb_accumulated 752 / 1100 20240715-08:31:46 keep c_quizzes model 3 nb_accumulated 764 / 1100 20240715-08:34:20 keep c_quizzes model 0 nb_accumulated 785 / 1100 20240715-08:36:54 keep c_quizzes model 1 nb_accumulated 797 / 1100 20240715-08:39:28 keep c_quizzes model 0 nb_accumulated 809 / 1100 20240715-08:42:02 keep c_quizzes model 3 nb_accumulated 828 / 1100 20240715-08:44:36 keep c_quizzes model 1 nb_accumulated 844 / 1100 20240715-08:47:10 keep c_quizzes model 3 nb_accumulated 857 / 1100 20240715-08:49:43 keep c_quizzes model 2 nb_accumulated 876 / 1100 20240715-08:52:18 keep c_quizzes model 1 nb_accumulated 890 / 1100 20240715-08:54:52 keep c_quizzes model 0 nb_accumulated 907 / 1100 20240715-08:57:26 keep c_quizzes model 4 nb_accumulated 922 / 1100 20240715-09:00:00 keep c_quizzes model 1 nb_accumulated 941 / 1100 20240715-09:02:34 keep c_quizzes model 3 nb_accumulated 960 / 1100 20240715-09:05:07 keep c_quizzes model 0 nb_accumulated 982 / 1100 20240715-09:07:41 keep c_quizzes model 2 nb_accumulated 994 / 1100 20240715-09:10:16 keep c_quizzes model 4 nb_accumulated 1007 / 1100 20240715-09:12:51 keep c_quizzes model 0 nb_accumulated 1017 / 1100 20240715-09:15:25 keep c_quizzes model 0 nb_accumulated 1038 / 1100 20240715-09:17:59 keep c_quizzes model 4 nb_accumulated 1062 / 1100 20240715-09:20:33 keep c_quizzes model 0 nb_accumulated 1077 / 1100 20240715-09:23:07 keep c_quizzes model 1 nb_accumulated 1094 / 1100 20240715-09:25:41 keep c_quizzes model 4 nb_accumulated 1105 / 1100 20240715-09:25:41 wrote c_quizzes.pth 20240715-09:25:41 training model 0 20240715-09:25:41 training model 1 20240715-09:34:46 train_perplexity 17 model 0 1.1420382538862284 20240715-09:34:52 train_perplexity 17 model 1 1.1405164690448726 20240715-09:35:15 test_perplexity 17 model 0 1.1397529236339123 20240715-09:35:23 test_perplexity 17 model 1 1.137770830208441 20240715-09:38:17 test_accuracy 17 model 0 forward 479 / 515 backward 400 / 485 20240715-09:38:17 main_test_accuracy 17 0.8790000677108765 20240715-09:38:19 test_accuracy 17 model 1 forward 474 / 483 backward 436 / 517 20240715-09:38:19 main_test_accuracy 17 0.9100000262260437 20240715-09:38:20 wrote gpt_000.pth 20240715-09:38:21 wrote gpt_001.pth 20240715-09:38:28 --- epoch 18 ---------------------------------------- 20240715-09:38:28 current_test_accuracies 0.8790 0.9100 0.0000 0.0000 0.0000 20240715-09:38:28 training model 2 20240715-09:38:28 training model 3 20240715-09:47:33 train_perplexity 18 model 2 1.1406417698537406 20240715-09:47:42 train_perplexity 18 model 3 1.14090382150312 20240715-09:48:01 test_perplexity 18 model 2 1.1399993518150808 20240715-09:48:11 test_perplexity 18 model 3 1.1384082173265173 20240715-09:51:04 test_accuracy 18 model 2 forward 484 / 507 backward 419 / 493 20240715-09:51:04 main_test_accuracy 18 0.9030000567436218 20240715-09:51:06 test_accuracy 18 model 3 forward 484 / 519 backward 403 / 481 20240715-09:51:06 main_test_accuracy 18 0.8870000243186951 20240715-09:51:07 wrote gpt_002.pth 20240715-09:51:07 wrote gpt_003.pth 20240715-09:51:14 --- epoch 19 ---------------------------------------- 20240715-09:51:14 current_test_accuracies 0.8790 0.9100 0.9030 0.8870 0.0000 20240715-09:51:14 training model 4 20240715-09:51:14 training model 0 20240715-10:00:19 train_perplexity 19 model 4 1.1410932914646732 20240715-10:00:28 train_perplexity 19 model 0 1.1421513371060277 20240715-10:00:46 test_perplexity 19 model 4 1.1397439581743798 20240715-10:00:56 test_perplexity 19 model 0 1.140210664430285 20240715-10:03:50 test_accuracy 19 model 4 forward 487 / 510 backward 391 / 490 20240715-10:03:50 main_test_accuracy 19 0.878000020980835 20240715-10:03:52 test_accuracy 19 model 0 forward 493 / 515 backward 386 / 485 20240715-10:03:52 main_test_accuracy 19 0.8790000677108765 20240715-10:03:53 wrote gpt_004.pth 20240715-10:03:53 wrote gpt_000.pth 20240715-10:04:01 --- epoch 20 ---------------------------------------- 20240715-10:04:01 current_test_accuracies 0.8790 0.9100 0.9030 0.8870 0.8780 20240715-10:04:01 training model 4 20240715-10:04:01 training model 0 20240715-10:13:06 train_perplexity 20 model 4 1.1403125777131202 20240715-10:13:14 train_perplexity 20 model 0 1.142107705949692 20240715-10:13:33 test_perplexity 20 model 4 1.140449973195165 20240715-10:13:43 test_perplexity 20 model 0 1.1397995159175618 20240715-10:16:36 test_accuracy 20 model 4 forward 497 / 510 backward 413 / 490 20240715-10:16:36 main_test_accuracy 20 0.9100000262260437 20240715-10:16:38 test_accuracy 20 model 0 forward 476 / 515 backward 401 / 485 20240715-10:16:38 main_test_accuracy 20 0.8770000338554382 20240715-10:16:39 wrote gpt_004.pth 20240715-10:16:39 wrote gpt_000.pth 20240715-10:16:46 --- epoch 21 ---------------------------------------- 20240715-10:16:46 current_test_accuracies 0.8770 0.9100 0.9030 0.8870 0.9100 20240715-10:16:46 training model 0 20240715-10:16:46 training model 3 20240715-10:25:52 train_perplexity 21 model 0 1.1409899202462825 20240715-10:25:54 train_perplexity 21 model 3 1.14035783035209 20240715-10:26:27 test_perplexity 21 model 0 1.139051804923632 20240715-10:26:29 test_perplexity 21 model 3 1.1383023791096543 20240715-10:29:24 test_accuracy 21 model 0 forward 491 / 515 backward 396 / 485 20240715-10:29:24 main_test_accuracy 21 0.8870000243186951 20240715-10:29:25 test_accuracy 21 model 3 forward 498 / 519 backward 405 / 481 20240715-10:29:25 main_test_accuracy 21 0.9030000567436218 20240715-10:29:26 wrote gpt_000.pth 20240715-10:29:26 wrote gpt_003.pth 20240715-10:29:33 --- epoch 22 ---------------------------------------- 20240715-10:29:33 current_test_accuracies 0.8870 0.9100 0.9030 0.9030 0.9100 20240715-10:29:33 training model 0 20240715-10:29:33 training model 2 20240715-10:38:38 train_perplexity 22 model 0 1.1410476297608874 20240715-10:38:47 train_perplexity 22 model 2 1.1402067580725739 20240715-10:39:05 test_perplexity 22 model 0 1.1387551941383638 20240715-10:39:15 test_perplexity 22 model 2 1.140165930459669 20240715-10:42:07 test_accuracy 22 model 0 forward 494 / 515 backward 414 / 485 20240715-10:42:07 main_test_accuracy 22 0.9080000519752502 20240715-10:42:09 test_accuracy 22 model 2 forward 483 / 507 backward 419 / 493 20240715-10:42:09 main_test_accuracy 22 0.9020000696182251 20240715-10:42:10 wrote gpt_000.pth 20240715-10:42:11 wrote gpt_002.pth 20240715-10:42:18 --- epoch 23 ---------------------------------------- 20240715-10:42:18 current_test_accuracies 0.9080 0.9100 0.9020 0.9030 0.9100 20240715-10:46:09 keep c_quizzes model 4 nb_accumulated 22 / 1100 20240715-10:50:03 keep c_quizzes model 3 nb_accumulated 33 / 1100 20240715-10:52:44 keep c_quizzes model 1 nb_accumulated 51 / 1100 20240715-10:55:20 keep c_quizzes model 1 nb_accumulated 68 / 1100 20240715-10:57:55 keep c_quizzes model 2 nb_accumulated 83 / 1100 20240715-11:00:30 keep c_quizzes model 3 nb_accumulated 99 / 1100 20240715-11:03:05 keep c_quizzes model 1 nb_accumulated 111 / 1100 20240715-11:05:40 keep c_quizzes model 2 nb_accumulated 124 / 1100 20240715-11:08:15 keep c_quizzes model 2 nb_accumulated 133 / 1100 20240715-11:10:50 keep c_quizzes model 1 nb_accumulated 154 / 1100 20240715-11:13:26 keep c_quizzes model 3 nb_accumulated 169 / 1100 20240715-11:16:01 keep c_quizzes model 1 nb_accumulated 187 / 1100 20240715-11:18:36 keep c_quizzes model 0 nb_accumulated 199 / 1100 20240715-11:21:12 keep c_quizzes model 1 nb_accumulated 220 / 1100 20240715-11:23:46 keep c_quizzes model 3 nb_accumulated 239 / 1100 20240715-11:26:22 keep c_quizzes model 0 nb_accumulated 246 / 1100 20240715-11:28:58 keep c_quizzes model 2 nb_accumulated 264 / 1100 20240715-11:31:34 keep c_quizzes model 4 nb_accumulated 285 / 1100 20240715-11:34:09 keep c_quizzes model 1 nb_accumulated 304 / 1100 20240715-11:36:44 keep c_quizzes model 1 nb_accumulated 316 / 1100 20240715-11:39:19 keep c_quizzes model 2 nb_accumulated 329 / 1100 20240715-11:41:54 keep c_quizzes model 4 nb_accumulated 341 / 1100 20240715-11:44:29 keep c_quizzes model 4 nb_accumulated 356 / 1100 20240715-11:47:04 keep c_quizzes model 0 nb_accumulated 374 / 1100 20240715-11:49:39 keep c_quizzes model 1 nb_accumulated 392 / 1100 20240715-11:52:14 keep c_quizzes model 3 nb_accumulated 408 / 1100 20240715-11:54:49 keep c_quizzes model 1 nb_accumulated 430 / 1100 20240715-11:57:24 keep c_quizzes model 4 nb_accumulated 445 / 1100 20240715-11:59:59 keep c_quizzes model 4 nb_accumulated 462 / 1100 20240715-12:02:34 keep c_quizzes model 2 nb_accumulated 477 / 1100 20240715-12:05:09 keep c_quizzes model 3 nb_accumulated 495 / 1100 20240715-12:07:45 keep c_quizzes model 4 nb_accumulated 518 / 1100 20240715-12:10:20 keep c_quizzes model 4 nb_accumulated 532 / 1100 20240715-12:12:56 keep c_quizzes model 0 nb_accumulated 545 / 1100 20240715-12:15:32 keep c_quizzes model 0 nb_accumulated 561 / 1100 20240715-12:18:07 keep c_quizzes model 1 nb_accumulated 574 / 1100 20240715-12:20:42 keep c_quizzes model 3 nb_accumulated 591 / 1100 20240715-12:23:18 keep c_quizzes model 0 nb_accumulated 603 / 1100 20240715-12:25:53 keep c_quizzes model 4 nb_accumulated 614 / 1100 20240715-12:28:28 keep c_quizzes model 2 nb_accumulated 628 / 1100 20240715-12:31:03 keep c_quizzes model 3 nb_accumulated 641 / 1100 20240715-12:33:38 keep c_quizzes model 2 nb_accumulated 664 / 1100 20240715-12:36:13 keep c_quizzes model 4 nb_accumulated 697 / 1100 20240715-12:38:48 keep c_quizzes model 3 nb_accumulated 709 / 1100 20240715-12:41:23 keep c_quizzes model 2 nb_accumulated 723 / 1100 20240715-12:43:59 keep c_quizzes model 0 nb_accumulated 734 / 1100 20240715-12:46:34 keep c_quizzes model 2 nb_accumulated 742 / 1100 20240715-12:49:09 keep c_quizzes model 0 nb_accumulated 755 / 1100 20240715-12:51:44 keep c_quizzes model 3 nb_accumulated 770 / 1100 20240715-12:54:19 keep c_quizzes model 2 nb_accumulated 786 / 1100 20240715-12:56:54 keep c_quizzes model 4 nb_accumulated 805 / 1100 20240715-12:59:29 keep c_quizzes model 2 nb_accumulated 824 / 1100 20240715-13:02:03 keep c_quizzes model 4 nb_accumulated 842 / 1100 20240715-13:04:37 keep c_quizzes model 2 nb_accumulated 861 / 1100 20240715-13:07:11 keep c_quizzes model 4 nb_accumulated 883 / 1100 20240715-13:09:45 keep c_quizzes model 1 nb_accumulated 901 / 1100 20240715-13:12:19 keep c_quizzes model 2 nb_accumulated 910 / 1100 20240715-13:14:54 keep c_quizzes model 3 nb_accumulated 926 / 1100 20240715-13:17:28 keep c_quizzes model 0 nb_accumulated 944 / 1100 20240715-13:20:02 keep c_quizzes model 2 nb_accumulated 960 / 1100 20240715-13:22:36 keep c_quizzes model 4 nb_accumulated 977 / 1100 20240715-13:25:11 keep c_quizzes model 0 nb_accumulated 988 / 1100 20240715-13:27:46 keep c_quizzes model 0 nb_accumulated 1001 / 1100 20240715-13:30:21 keep c_quizzes model 4 nb_accumulated 1017 / 1100 20240715-13:32:55 keep c_quizzes model 0 nb_accumulated 1031 / 1100 20240715-13:35:30 keep c_quizzes model 3 nb_accumulated 1047 / 1100 20240715-13:38:04 keep c_quizzes model 3 nb_accumulated 1063 / 1100 20240715-13:40:38 keep c_quizzes model 2 nb_accumulated 1078 / 1100 20240715-13:43:12 keep c_quizzes model 2 nb_accumulated 1094 / 1100 20240715-13:45:47 keep c_quizzes model 4 nb_accumulated 1118 / 1100 20240715-13:45:47 wrote c_quizzes.pth 20240715-13:45:47 training model 0 20240715-13:45:47 training model 1 20240715-13:54:51 train_perplexity 23 model 0 1.141629334151315 20240715-13:54:59 train_perplexity 23 model 1 1.1409981379290308 20240715-13:55:20 test_perplexity 23 model 0 1.1401129834348427 20240715-13:55:29 test_perplexity 23 model 1 1.138946329720106 20240715-13:58:21 test_accuracy 23 model 0 forward 494 / 515 backward 405 / 485 20240715-13:58:21 main_test_accuracy 23 0.8990000486373901 20240715-13:58:24 test_accuracy 23 model 1 forward 463 / 483 backward 433 / 517 20240715-13:58:24 main_test_accuracy 23 0.8960000276565552 20240715-13:58:25 wrote gpt_000.pth 20240715-13:58:25 wrote gpt_001.pth 20240715-13:58:33 --- epoch 24 ---------------------------------------- 20240715-13:58:33 current_test_accuracies 0.8990 0.8960 0.0000 0.0000 0.0000 20240715-13:58:33 training model 2 20240715-13:58:33 training model 3 20240715-14:07:38 train_perplexity 24 model 2 1.1407010592703906 20240715-14:07:47 train_perplexity 24 model 3 1.1406886137280574 20240715-14:08:05 test_perplexity 24 model 2 1.140829990522475 20240715-14:08:15 test_perplexity 24 model 3 1.138595529353599 20240715-14:11:08 test_accuracy 24 model 2 forward 490 / 507 backward 418 / 493 20240715-14:11:08 main_test_accuracy 24 0.9080000519752502 20240715-14:11:10 test_accuracy 24 model 3 forward 506 / 519 backward 409 / 481 20240715-14:11:10 main_test_accuracy 24 0.9150000214576721 20240715-14:11:11 wrote gpt_002.pth 20240715-14:11:11 wrote gpt_003.pth 20240715-14:11:19 --- epoch 25 ---------------------------------------- 20240715-14:11:19 current_test_accuracies 0.8990 0.8960 0.9080 0.9150 0.0000 20240715-14:11:19 training model 4 20240715-14:11:19 training model 1 20240715-14:20:25 train_perplexity 25 model 4 1.1412237938719103 20240715-14:20:32 train_perplexity 25 model 1 1.1404324922726314 20240715-14:20:54 test_perplexity 25 model 4 1.139679003158181 20240715-14:21:02 test_perplexity 25 model 1 1.138460043632155 20240715-14:23:56 test_accuracy 25 model 4 forward 490 / 510 backward 408 / 490 20240715-14:23:56 main_test_accuracy 25 0.8980000615119934 20240715-14:23:58 test_accuracy 25 model 1 forward 472 / 483 backward 427 / 517 20240715-14:23:58 main_test_accuracy 25 0.8990000486373901 20240715-14:23:59 wrote gpt_004.pth 20240715-14:23:59 wrote gpt_001.pth 20240715-14:24:07 --- epoch 26 ---------------------------------------- 20240715-14:24:07 current_test_accuracies 0.8990 0.8990 0.9080 0.9150 0.8980 20240715-14:24:07 training model 4 20240715-14:24:07 training model 0 20240715-14:33:13 train_perplexity 26 model 4 1.1411662010528674 20240715-14:33:15 train_perplexity 26 model 0 1.1415513807286912 20240715-14:33:48 test_perplexity 26 model 4 1.1396176128427278 20240715-14:33:50 test_perplexity 26 model 0 1.1397696665708799 20240715-14:36:46 test_accuracy 26 model 4 forward 497 / 510 backward 420 / 490 20240715-14:36:46 main_test_accuracy 26 0.9170000553131104 20240715-14:36:47 test_accuracy 26 model 0 forward 491 / 515 backward 406 / 485 20240715-14:36:47 main_test_accuracy 26 0.8970000147819519 20240715-14:36:48 wrote gpt_004.pth 20240715-14:36:48 wrote gpt_000.pth 20240715-14:36:55 --- epoch 27 ---------------------------------------- 20240715-14:36:55 current_test_accuracies 0.8970 0.8990 0.9080 0.9150 0.9170 20240715-14:36:55 training model 0 20240715-14:36:55 training model 1 20240715-14:46:00 train_perplexity 27 model 0 1.141038385173114 20240715-14:46:08 train_perplexity 27 model 1 1.140706593033312 20240715-14:46:29 test_perplexity 27 model 0 1.1394758287182323 20240715-14:46:37 test_perplexity 27 model 1 1.138198827760137 20240715-14:49:31 test_accuracy 27 model 0 forward 503 / 515 backward 422 / 485 20240715-14:49:31 main_test_accuracy 27 0.9250000715255737 20240715-14:49:33 test_accuracy 27 model 1 forward 467 / 483 backward 448 / 517 20240715-14:49:33 main_test_accuracy 27 0.9150000214576721 20240715-14:49:34 wrote gpt_000.pth 20240715-14:49:34 wrote gpt_001.pth 20240715-14:49:42 --- epoch 28 ---------------------------------------- 20240715-14:49:42 current_test_accuracies 0.9250 0.9150 0.9080 0.9150 0.9170 20240715-14:53:34 keep c_quizzes model 0 nb_accumulated 22 / 1100 20240715-14:57:27 keep c_quizzes model 1 nb_accumulated 36 / 1100 20240715-15:00:09 keep c_quizzes model 1 nb_accumulated 53 / 1100 20240715-15:02:44 keep c_quizzes model 4 nb_accumulated 80 / 1100 20240715-15:05:19 keep c_quizzes model 2 nb_accumulated 100 / 1100 20240715-15:07:54 keep c_quizzes model 1 nb_accumulated 126 / 1100 20240715-15:10:29 keep c_quizzes model 1 nb_accumulated 148 / 1100 20240715-15:13:04 keep c_quizzes model 4 nb_accumulated 162 / 1100 20240715-15:15:39 keep c_quizzes model 0 nb_accumulated 170 / 1100 20240715-15:18:13 keep c_quizzes model 2 nb_accumulated 187 / 1100 20240715-15:20:48 keep c_quizzes model 2 nb_accumulated 210 / 1100 20240715-15:23:23 keep c_quizzes model 1 nb_accumulated 226 / 1100 20240715-15:25:58 keep c_quizzes model 2 nb_accumulated 240 / 1100 20240715-15:28:33 keep c_quizzes model 0 nb_accumulated 259 / 1100 20240715-15:31:07 keep c_quizzes model 2 nb_accumulated 280 / 1100 20240715-15:33:42 keep c_quizzes model 2 nb_accumulated 298 / 1100 20240715-15:36:17 keep c_quizzes model 1 nb_accumulated 319 / 1100 20240715-15:38:51 keep c_quizzes model 1 nb_accumulated 331 / 1100 20240715-15:41:26 keep c_quizzes model 1 nb_accumulated 345 / 1100 20240715-15:44:01 keep c_quizzes model 4 nb_accumulated 359 / 1100 20240715-15:46:35 keep c_quizzes model 4 nb_accumulated 380 / 1100 20240715-15:49:09 keep c_quizzes model 4 nb_accumulated 397 / 1100 20240715-15:51:44 keep c_quizzes model 1 nb_accumulated 419 / 1100 20240715-15:54:19 keep c_quizzes model 1 nb_accumulated 441 / 1100 20240715-15:56:54 keep c_quizzes model 0 nb_accumulated 462 / 1100 20240715-15:59:28 keep c_quizzes model 3 nb_accumulated 487 / 1100 20240715-16:02:03 keep c_quizzes model 0 nb_accumulated 507 / 1100 20240715-16:04:37 keep c_quizzes model 2 nb_accumulated 522 / 1100 20240715-16:07:11 keep c_quizzes model 0 nb_accumulated 540 / 1100 20240715-16:09:46 keep c_quizzes model 0 nb_accumulated 550 / 1100 20240715-16:12:21 keep c_quizzes model 0 nb_accumulated 567 / 1100 20240715-16:14:55 keep c_quizzes model 1 nb_accumulated 578 / 1100 20240715-16:17:29 keep c_quizzes model 2 nb_accumulated 593 / 1100 20240715-16:20:04 keep c_quizzes model 2 nb_accumulated 614 / 1100 20240715-16:22:38 keep c_quizzes model 0 nb_accumulated 629 / 1100 20240715-16:25:12 keep c_quizzes model 0 nb_accumulated 639 / 1100 20240715-16:27:47 keep c_quizzes model 2 nb_accumulated 653 / 1100 20240715-16:30:22 keep c_quizzes model 1 nb_accumulated 675 / 1100 20240715-16:32:56 keep c_quizzes model 0 nb_accumulated 686 / 1100 20240715-16:35:31 keep c_quizzes model 0 nb_accumulated 707 / 1100 20240715-16:38:05 keep c_quizzes model 2 nb_accumulated 719 / 1100 20240715-16:40:40 keep c_quizzes model 2 nb_accumulated 733 / 1100 20240715-16:43:14 keep c_quizzes model 2 nb_accumulated 754 / 1100 20240715-16:45:49 keep c_quizzes model 0 nb_accumulated 772 / 1100 20240715-16:48:23 keep c_quizzes model 2 nb_accumulated 790 / 1100 20240715-16:50:57 keep c_quizzes model 3 nb_accumulated 805 / 1100 20240715-16:53:32 keep c_quizzes model 1 nb_accumulated 819 / 1100 20240715-16:56:06 keep c_quizzes model 1 nb_accumulated 838 / 1100 20240715-16:58:41 keep c_quizzes model 2 nb_accumulated 854 / 1100 20240715-17:01:15 keep c_quizzes model 3 nb_accumulated 872 / 1100 20240715-17:03:50 keep c_quizzes model 4 nb_accumulated 885 / 1100 20240715-17:06:24 keep c_quizzes model 4 nb_accumulated 896 / 1100 20240715-17:08:59 keep c_quizzes model 1 nb_accumulated 912 / 1100 20240715-17:11:33 keep c_quizzes model 3 nb_accumulated 926 / 1100 20240715-17:14:07 keep c_quizzes model 1 nb_accumulated 944 / 1100 20240715-17:16:43 keep c_quizzes model 1 nb_accumulated 959 / 1100 20240715-17:19:17 keep c_quizzes model 1 nb_accumulated 973 / 1100 20240715-17:21:51 keep c_quizzes model 3 nb_accumulated 991 / 1100 20240715-17:24:26 keep c_quizzes model 2 nb_accumulated 1003 / 1100 20240715-17:27:01 keep c_quizzes model 1 nb_accumulated 1015 / 1100 20240715-17:29:35 keep c_quizzes model 2 nb_accumulated 1027 / 1100 20240715-17:32:10 keep c_quizzes model 4 nb_accumulated 1044 / 1100 20240715-17:34:44 keep c_quizzes model 2 nb_accumulated 1064 / 1100 20240715-17:37:18 keep c_quizzes model 1 nb_accumulated 1088 / 1100 20240715-17:39:53 keep c_quizzes model 1 nb_accumulated 1103 / 1100 20240715-17:39:53 wrote c_quizzes.pth 20240715-17:39:53 training model 0 20240715-17:39:53 training model 1 20240715-17:48:57 train_perplexity 28 model 0 1.141529048359982 20240715-17:49:05 train_perplexity 28 model 1 1.1410729057087212 20240715-17:49:26 test_perplexity 28 model 0 1.1402880518187009 20240715-17:49:35 test_perplexity 28 model 1 1.1391810049089703 20240715-17:52:28 test_accuracy 28 model 0 forward 486 / 515 backward 412 / 485 20240715-17:52:28 main_test_accuracy 28 0.8980000615119934 20240715-17:52:31 test_accuracy 28 model 1 forward 464 / 483 backward 426 / 517 20240715-17:52:31 main_test_accuracy 28 0.89000004529953 20240715-17:52:32 wrote gpt_000.pth 20240715-17:52:32 wrote gpt_001.pth 20240715-17:52:40 --- epoch 29 ---------------------------------------- 20240715-17:52:40 current_test_accuracies 0.8980 0.8900 0.0000 0.0000 0.0000 20240715-17:52:40 training model 2 20240715-17:52:40 training model 3 20240715-18:01:45 train_perplexity 29 model 2 1.140900560923804 20240715-18:01:53 train_perplexity 29 model 3 1.1420250911719303 20240715-18:02:13 test_perplexity 29 model 2 1.1411921523270714 20240715-18:02:23 test_perplexity 29 model 3 1.1397511022883662 20240715-18:05:14 test_accuracy 29 model 2 forward 489 / 507 backward 428 / 493 20240715-18:05:14 main_test_accuracy 29 0.9170000553131104 20240715-18:05:16 test_accuracy 29 model 3 forward 489 / 519 backward 399 / 481 20240715-18:05:16 main_test_accuracy 29 0.8880000710487366 20240715-18:05:17 wrote gpt_002.pth 20240715-18:05:18 wrote gpt_003.pth 20240715-18:05:25 --- epoch 30 ---------------------------------------- 20240715-18:05:25 current_test_accuracies 0.8980 0.8900 0.9170 0.8880 0.0000 20240715-18:05:25 training model 4 20240715-18:05:25 training model 3 20240715-18:14:30 train_perplexity 30 model 4 1.141918029536582 20240715-18:14:39 train_perplexity 30 model 3 1.1411291648932187 20240715-18:14:57 test_perplexity 30 model 4 1.1410123611504701 20240715-18:15:08 test_perplexity 30 model 3 1.139630826258643 20240715-18:18:02 test_accuracy 30 model 4 forward 498 / 510 backward 410 / 490 20240715-18:18:02 main_test_accuracy 30 0.9080000519752502 20240715-18:18:04 test_accuracy 30 model 3 forward 496 / 519 backward 414 / 481 20240715-18:18:04 main_test_accuracy 30 0.9100000262260437 20240715-18:18:05 wrote gpt_004.pth 20240715-18:18:05 wrote gpt_003.pth 20240715-18:18:13 --- epoch 31 ---------------------------------------- 20240715-18:18:13 current_test_accuracies 0.8980 0.8900 0.9170 0.9100 0.9080 20240715-18:18:13 training model 1 20240715-18:18:13 training model 0 20240715-18:27:18 train_perplexity 31 model 1 1.1409716008278576 20240715-18:27:27 train_perplexity 31 model 0 1.1418746873703007 20240715-18:27:46 test_perplexity 31 model 1 1.1393602137575325 20240715-18:27:55 test_perplexity 31 model 0 1.1398557390665576 20240715-18:30:49 test_accuracy 31 model 1 forward 467 / 483 backward 450 / 517 20240715-18:30:49 main_test_accuracy 31 0.9170000553131104 20240715-18:30:51 test_accuracy 31 model 0 forward 502 / 515 backward 420 / 485 20240715-18:30:51 main_test_accuracy 31 0.9220000505447388 20240715-18:30:52 wrote gpt_001.pth 20240715-18:30:52 wrote gpt_000.pth 20240715-18:31:00 --- epoch 32 ---------------------------------------- 20240715-18:31:00 current_test_accuracies 0.9220 0.9170 0.9170 0.9100 0.9080 20240715-18:34:53 keep c_quizzes model 0 nb_accumulated 22 / 1100 20240715-18:38:45 keep c_quizzes model 1 nb_accumulated 44 / 1100 20240715-18:41:31 keep c_quizzes model 1 nb_accumulated 60 / 1100 20240715-18:44:06 keep c_quizzes model 0 nb_accumulated 73 / 1100 20240715-18:46:40 keep c_quizzes model 0 nb_accumulated 94 / 1100 20240715-18:49:15 keep c_quizzes model 4 nb_accumulated 115 / 1100 20240715-18:51:49 keep c_quizzes model 2 nb_accumulated 138 / 1100 20240715-18:54:23 keep c_quizzes model 2 nb_accumulated 150 / 1100 20240715-18:56:58 keep c_quizzes model 1 nb_accumulated 167 / 1100 20240715-18:59:32 keep c_quizzes model 1 nb_accumulated 183 / 1100 20240715-19:02:07 keep c_quizzes model 2 nb_accumulated 194 / 1100 20240715-19:04:42 keep c_quizzes model 2 nb_accumulated 215 / 1100 20240715-19:07:16 keep c_quizzes model 4 nb_accumulated 230 / 1100 20240715-19:09:51 keep c_quizzes model 0 nb_accumulated 247 / 1100 20240715-19:12:26 keep c_quizzes model 0 nb_accumulated 276 / 1100 20240715-19:15:00 keep c_quizzes model 2 nb_accumulated 294 / 1100