20240808-20:01:51 argv ./main.py --result_dir=results_noise_10 --nb_train_samples=40000 --nb_test_samples=2000 --grids_world_tasks=replace_color,translate,grow,half_fill,frame,detect,corners,contact --accuracy_to_make_c_quizzes=0.95 --prompt_noise=0.05 --nb_gpts=10 --max_fail_to_validate=5 --proba_understands=0.75 20240808-20:01:51 args.log_filename train.log 20240808-20:01:51 args.result_dir results_noise_10 20240808-20:01:51 args.seed 0 20240808-20:01:51 args.resume False 20240808-20:01:51 args.max_percents_of_test_in_train -1 20240808-20:01:51 args.log_command None 20240808-20:01:51 args.nb_epochs 10000 20240808-20:01:51 args.batch_size 25 20240808-20:01:51 args.physical_batch_size None 20240808-20:01:51 args.inference_batch_size 50 20240808-20:01:51 args.nb_train_samples 40000 20240808-20:01:51 args.nb_test_samples 2000 20240808-20:01:51 args.nb_new_c_quizzes_for_train None 20240808-20:01:51 args.nb_new_c_quizzes_for_test None 20240808-20:01:51 args.learning_rate 0.0005 20240808-20:01:51 args.schedule_free False 20240808-20:01:51 args.model 37M 20240808-20:01:51 args.dim_model 512 20240808-20:01:51 args.dim_keys 64 20240808-20:01:51 args.dim_hidden 2048 20240808-20:01:51 args.nb_heads 8 20240808-20:01:51 args.nb_blocks 12 20240808-20:01:51 args.dropout 0.1 20240808-20:01:51 args.deterministic_synthesis False 20240808-20:01:51 args.problem grids 20240808-20:01:51 args.nb_threads 1 20240808-20:01:51 args.gpus all 20240808-20:01:51 args.nb_gpts 10 20240808-20:01:51 args.max_fail_to_validate 5 20240808-20:01:51 args.accuracy_to_make_c_quizzes 0.95 20240808-20:01:51 args.proba_understands 0.75 20240808-20:01:51 args.proba_not_understands 0.5 20240808-20:01:51 args.temperature_hot 1.5 20240808-20:01:51 args.temperature_cold 1 20240808-20:01:51 args.prompt_noise 0.05 20240808-20:01:51 args.nb_averaging_rounds 3 20240808-20:01:51 args.dirty_debug False 20240808-20:01:51 args.test None 20240808-20:01:51 args.grids_world_tasks replace_color,translate,grow,half_fill,frame,detect,corners,contact 20240808-20:01:51 args.grids_science_tasks None 20240808-20:01:51 args.sky_height 6 20240808-20:01:51 args.sky_width 8 20240808-20:01:51 args.sky_nb_birds 3 20240808-20:01:51 args.sky_nb_iterations 2 20240808-20:01:51 args.sky_speed 3 20240808-20:02:03 main_device cuda:0 gpus ['cuda:0', 'cuda:1'] 20240808-20:02:03 vocabulary_size 15 20240808-20:02:03 creating model 0 and its w_quizzes 20240808-20:02:11 creating model 1 and its w_quizzes 20240808-20:02:24 creating model 2 and its w_quizzes 20240808-20:02:37 creating model 3 and its w_quizzes 20240808-20:02:50 creating model 4 and its w_quizzes 20240808-20:03:04 creating model 5 and its w_quizzes 20240808-20:03:17 creating model 6 and its w_quizzes 20240808-20:03:30 creating model 7 and its w_quizzes 20240808-20:03:44 creating model 8 and its w_quizzes 20240808-20:03:57 creating model 9 and its w_quizzes 20240808-20:04:11 nb_parameters 37819407 (37M) 20240808-20:04:11 nb_new_c_quizzes_for_train 400 nb_new_c_quizzes_for_test 20 20240808-20:04:11 wrote state.pth 20240808-20:04:11 --- epoch 0 ---------------------------------------- 20240808-20:04:11 current_test_accuracies 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240808-20:04:11 training model 0 20240808-20:04:11 training model 1 20240808-20:07:47 train_perplexity 0 model 1 2.61185214462735 20240808-20:07:52 test_perplexity 0 model 1 1.4017766871759398 20240808-20:07:55 train_perplexity 0 model 0 2.700652719773651 20240808-20:08:00 test_perplexity 0 model 0 1.3946038601766884 20240808-20:09:18 test_accuracy 0 model 1 val 4 / 1592 20240808-20:09:20 test_accuracy 0 model 0 val 21 / 1556 20240808-20:09:22 wrote gpt_000.pth 20240808-20:09:22 wrote gpt_001.pth 20240808-20:09:36 wrote non_validated_0000_00.png 20240808-20:09:48 wrote non_validated_0000_01.png 20240808-20:09:49 wrote state.pth 20240808-20:09:49 --- epoch 1 ---------------------------------------- 20240808-20:09:49 current_test_accuracies 0.0135 0.0025 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240808-20:09:49 training model 2 20240808-20:09:49 training model 3 20240808-20:13:24 train_perplexity 1 model 3 2.7707583942609775 20240808-20:13:30 test_perplexity 1 model 3 1.4160060367148157 20240808-20:13:31 train_perplexity 1 model 2 2.9365085694167994 20240808-20:13:35 test_perplexity 1 model 2 1.413584565515257 20240808-20:14:54 test_accuracy 1 model 2 val 1 / 1610 20240808-20:14:56 test_accuracy 1 model 3 val 2 / 1593 20240808-20:14:57 wrote gpt_002.pth 20240808-20:14:58 wrote gpt_003.pth 20240808-20:15:11 wrote non_validated_0001_02.png 20240808-20:15:24 wrote non_validated_0001_03.png 20240808-20:15:24 wrote state.pth 20240808-20:15:24 --- epoch 2 ---------------------------------------- 20240808-20:15:24 current_test_accuracies 0.0135 0.0025 0.0006 0.0013 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240808-20:15:24 training model 4 20240808-20:15:24 training model 5 20240808-20:18:58 train_perplexity 2 model 5 2.6786714558051083 20240808-20:19:04 test_perplexity 2 model 5 1.3771682657113629 20240808-20:19:06 train_perplexity 2 model 4 2.749189877650507 20240808-20:19:11 test_perplexity 2 model 4 1.404325344818232 20240808-20:20:29 test_accuracy 2 model 4 val 3 / 1625 20240808-20:20:31 test_accuracy 2 model 5 val 10 / 1597 20240808-20:20:32 wrote gpt_004.pth 20240808-20:20:33 wrote gpt_005.pth 20240808-20:20:46 wrote non_validated_0002_04.png 20240808-20:20:59 wrote non_validated_0002_05.png 20240808-20:20:59 wrote state.pth 20240808-20:20:59 --- epoch 3 ---------------------------------------- 20240808-20:20:59 current_test_accuracies 0.0135 0.0025 0.0006 0.0013 0.0018 0.0063 0.0000 0.0000 0.0000 0.0000 20240808-20:20:59 training model 6 20240808-20:20:59 training model 7 20240808-20:24:33 train_perplexity 3 model 7 2.664890680246267 20240808-20:24:39 test_perplexity 3 model 7 1.3794044972523056 20240808-20:24:40 train_perplexity 3 model 6 2.7287347296341924 20240808-20:24:45 test_perplexity 3 model 6 1.384667108612619 20240808-20:26:01 test_accuracy 3 model 7 val 8 / 1609 20240808-20:26:03 test_accuracy 3 model 6 val 17 / 1600 20240808-20:26:05 wrote gpt_006.pth 20240808-20:26:05 wrote gpt_007.pth 20240808-20:26:18 wrote non_validated_0003_06.png 20240808-20:26:31 wrote non_validated_0003_07.png 20240808-20:26:31 wrote state.pth 20240808-20:26:31 --- epoch 4 ---------------------------------------- 20240808-20:26:31 current_test_accuracies 0.0135 0.0025 0.0006 0.0013 0.0018 0.0063 0.0106 0.0050 0.0000 0.0000 20240808-20:26:31 training model 8 20240808-20:26:31 training model 9 20240808-20:30:05 train_perplexity 4 model 9 2.592162814495052 20240808-20:30:11 test_perplexity 4 model 9 1.4034157751893466 20240808-20:30:13 train_perplexity 4 model 8 2.6270142513942476 20240808-20:30:18 test_perplexity 4 model 8 1.3794365342245858 20240808-20:31:37 test_accuracy 4 model 9 val 4 / 1594 20240808-20:31:39 test_accuracy 4 model 8 val 28 / 1598 20240808-20:31:40 wrote gpt_008.pth 20240808-20:31:41 wrote gpt_009.pth 20240808-20:31:54 wrote non_validated_0004_08.png 20240808-20:32:07 wrote non_validated_0004_09.png 20240808-20:32:07 wrote state.pth 20240808-20:32:07 --- epoch 5 ---------------------------------------- 20240808-20:32:07 current_test_accuracies 0.0135 0.0025 0.0006 0.0013 0.0018 0.0063 0.0106 0.0050 0.0175 0.0025 20240808-20:32:07 training model 2 20240808-20:32:07 training model 3 20240808-20:35:48 train_perplexity 5 model 3 1.3610495779879945 20240808-20:35:48 train_perplexity 5 model 2 1.3698576387958052 20240808-20:35:55 test_perplexity 5 model 3 1.2940013570123976 20240808-20:35:55 test_perplexity 5 model 2 1.2977402787328907 20240808-20:37:14 test_accuracy 5 model 2 val 61 / 1623 20240808-20:37:16 test_accuracy 5 model 3 val 89 / 1594 20240808-20:37:18 wrote gpt_002.pth 20240808-20:37:19 wrote gpt_003.pth 20240808-20:37:32 wrote non_validated_0005_02.png 20240808-20:37:45 wrote non_validated_0005_03.png 20240808-20:37:45 wrote state.pth 20240808-20:37:45 --- epoch 6 ---------------------------------------- 20240808-20:37:45 current_test_accuracies 0.0135 0.0025 0.0376 0.0558 0.0018 0.0063 0.0106 0.0050 0.0175 0.0025 20240808-20:37:45 training model 4 20240808-20:37:45 training model 9 20240808-20:41:26 train_perplexity 6 model 9 1.3572233074034958 20240808-20:41:26 train_perplexity 6 model 4 1.357255656882486 20240808-20:41:33 test_perplexity 6 model 4 1.289048790338565 20240808-20:41:33 test_perplexity 6 model 9 1.287123267424336 20240808-20:42:57 test_accuracy 6 model 9 val 97 / 1575 20240808-20:42:58 test_accuracy 6 model 4 val 77 / 1586 20240808-20:43:00 wrote gpt_004.pth 20240808-20:43:00 wrote gpt_009.pth 20240808-20:43:13 wrote non_validated_0006_04.png 20240808-20:43:26 wrote non_validated_0006_09.png 20240808-20:43:26 wrote state.pth 20240808-20:43:26 --- epoch 7 ---------------------------------------- 20240808-20:43:26 current_test_accuracies 0.0135 0.0025 0.0376 0.0558 0.0485 0.0063 0.0106 0.0050 0.0175 0.0616 20240808-20:43:26 training model 1 20240808-20:43:26 training model 7 20240808-20:47:08 train_perplexity 7 model 7 1.3356032734131544 20240808-20:47:08 train_perplexity 7 model 1 1.3601610830483932 20240808-20:47:15 test_perplexity 7 model 7 1.2747707877472505 20240808-20:47:15 test_perplexity 7 model 1 1.292098621885316 20240808-20:48:36 test_accuracy 7 model 7 val 126 / 1583 20240808-20:48:38 test_accuracy 7 model 1 val 65 / 1564 20240808-20:48:39 wrote gpt_001.pth 20240808-20:48:40 wrote gpt_007.pth 20240808-20:48:53 wrote non_validated_0007_01.png 20240808-20:49:06 wrote non_validated_0007_07.png 20240808-20:49:06 wrote state.pth 20240808-20:49:06 --- epoch 8 ---------------------------------------- 20240808-20:49:06 current_test_accuracies 0.0135 0.0416 0.0376 0.0558 0.0485 0.0063 0.0106 0.0796 0.0175 0.0616 20240808-20:49:06 training model 5 20240808-20:49:06 training model 6 20240808-20:52:47 train_perplexity 8 model 5 1.3338110327350232 20240808-20:52:47 train_perplexity 8 model 6 1.349509511937042 20240808-20:52:54 test_perplexity 8 model 5 1.267571156587023 20240808-20:52:54 test_perplexity 8 model 6 1.2897465704665156 20240808-20:54:16 test_accuracy 8 model 5 val 88 / 1583 20240808-20:54:16 test_accuracy 8 model 6 val 83 / 1581 20240808-20:54:18 wrote gpt_005.pth 20240808-20:54:19 wrote gpt_006.pth 20240808-20:54:32 wrote non_validated_0008_05.png 20240808-20:54:45 wrote non_validated_0008_06.png 20240808-20:54:45 wrote state.pth 20240808-20:54:45 --- epoch 9 ---------------------------------------- 20240808-20:54:45 current_test_accuracies 0.0135 0.0416 0.0376 0.0558 0.0485 0.0556 0.0525 0.0796 0.0175 0.0616 20240808-20:54:45 training model 0 20240808-20:54:45 training model 8 20240808-20:58:26 train_perplexity 9 model 0 1.3541394845532158 20240808-20:58:26 train_perplexity 9 model 8 1.3554187663455746 20240808-20:58:33 test_perplexity 9 model 0 1.2886897534607176 20240808-20:58:34 test_perplexity 9 model 8 1.287709235171357 20240808-20:59:55 test_accuracy 9 model 8 val 88 / 1589 20240808-20:59:56 test_accuracy 9 model 0 val 72 / 1594 20240808-20:59:58 wrote gpt_000.pth 20240808-20:59:59 wrote gpt_008.pth 20240808-21:00:12 wrote non_validated_0009_00.png 20240808-21:00:25 wrote non_validated_0009_08.png 20240808-21:00:25 wrote state.pth 20240808-21:00:25 --- epoch 10 ---------------------------------------- 20240808-21:00:25 current_test_accuracies 0.0452 0.0416 0.0376 0.0558 0.0485 0.0556 0.0525 0.0796 0.0554 0.0616 20240808-21:00:25 training model 2 20240808-21:00:25 training model 1 20240808-21:04:05 train_perplexity 10 model 2 1.2851339392600476 20240808-21:04:05 train_perplexity 10 model 1 1.2817965314697912 20240808-21:04:13 test_perplexity 10 model 2 1.2397895617664867 20240808-21:04:13 test_perplexity 10 model 1 1.2519072636691144 20240808-21:05:33 test_accuracy 10 model 2 val 180 / 1621 20240808-21:05:36 test_accuracy 10 model 1 val 165 / 1583 20240808-21:05:38 wrote gpt_002.pth 20240808-21:05:38 wrote gpt_001.pth 20240808-21:05:51 wrote non_validated_0010_02.png 20240808-21:06:04 wrote non_validated_0010_01.png 20240808-21:06:04 wrote state.pth 20240808-21:06:04 --- epoch 11 ---------------------------------------- 20240808-21:06:04 current_test_accuracies 0.0452 0.1042 0.1110 0.0558 0.0485 0.0556 0.0525 0.0796 0.0554 0.0616 20240808-21:06:04 training model 0 20240808-21:06:04 training model 4 20240808-21:09:45 train_perplexity 11 model 0 1.2752923579612525 20240808-21:09:45 train_perplexity 11 model 4 1.2757911313024461 20240808-21:09:52 test_perplexity 11 model 0 1.230123391531438 20240808-21:09:53 test_perplexity 11 model 4 1.2299655393391882 20240808-21:11:13 test_accuracy 11 model 0 val 207 / 1609 20240808-21:11:15 test_accuracy 11 model 4 val 252 / 1584 20240808-21:11:17 wrote gpt_000.pth 20240808-21:11:17 wrote gpt_004.pth 20240808-21:11:30 wrote non_validated_0011_00.png 20240808-21:11:43 wrote non_validated_0011_04.png 20240808-21:11:44 wrote state.pth 20240808-21:11:44 --- epoch 12 ---------------------------------------- 20240808-21:11:44 current_test_accuracies 0.1287 0.1042 0.1110 0.0558 0.1591 0.0556 0.0525 0.0796 0.0554 0.0616 20240808-21:11:44 training model 6 20240808-21:11:44 training model 8 20240808-21:15:25 train_perplexity 12 model 6 1.2789026438996836 20240808-21:15:25 train_perplexity 12 model 8 1.279486333288754 20240808-21:15:32 test_perplexity 12 model 6 1.2396137189535177 20240808-21:15:32 test_perplexity 12 model 8 1.2374924233528155 20240808-21:16:51 test_accuracy 12 model 6 val 217 / 1636 20240808-21:16:53 test_accuracy 12 model 8 val 177 / 1628 20240808-21:16:55 wrote gpt_006.pth 20240808-21:16:55 wrote gpt_008.pth 20240808-21:17:08 wrote non_validated_0012_06.png 20240808-21:17:21 wrote non_validated_0012_08.png 20240808-21:17:21 wrote state.pth 20240808-21:17:21 --- epoch 13 ---------------------------------------- 20240808-21:17:21 current_test_accuracies 0.1287 0.1042 0.1110 0.0558 0.1591 0.0556 0.1326 0.0796 0.1087 0.0616 20240808-21:17:21 training model 5 20240808-21:17:21 training model 3 20240808-21:21:02 train_perplexity 13 model 5 1.2591441849797873 20240808-21:21:02 train_perplexity 13 model 3 1.2850618704567038 20240808-21:21:09 test_perplexity 13 model 5 1.219867262969668 20240808-21:21:09 test_perplexity 13 model 3 1.243886603622235 20240808-21:22:30 test_accuracy 13 model 3 val 197 / 1622 20240808-21:22:31 test_accuracy 13 model 5 val 265 / 1592 20240808-21:22:33 wrote gpt_005.pth 20240808-21:22:34 wrote gpt_003.pth 20240808-21:22:46 wrote non_validated_0013_05.png 20240808-21:22:59 wrote non_validated_0013_03.png 20240808-21:22:59 wrote state.pth 20240808-21:22:59 --- epoch 14 ---------------------------------------- 20240808-21:22:59 current_test_accuracies 0.1287 0.1042 0.1110 0.1215 0.1591 0.1665 0.1326 0.0796 0.1087 0.0616 20240808-21:22:59 training model 9 20240808-21:22:59 training model 7 20240808-21:26:40 train_perplexity 14 model 9 1.2797367432018718 20240808-21:26:40 train_perplexity 14 model 7 1.2611361694954857 20240808-21:26:47 test_perplexity 14 model 9 1.2367455499170024 20240808-21:26:47 test_perplexity 14 model 7 1.2211465488820086 20240808-21:28:06 test_accuracy 14 model 7 val 291 / 1605 20240808-21:28:09 test_accuracy 14 model 9 val 203 / 1599 20240808-21:28:11 wrote gpt_009.pth 20240808-21:28:11 wrote gpt_007.pth 20240808-21:28:25 wrote non_validated_0014_09.png 20240808-21:28:38 wrote non_validated_0014_07.png 20240808-21:28:38 wrote state.pth 20240808-21:28:38 --- epoch 15 ---------------------------------------- 20240808-21:28:38 current_test_accuracies 0.1287 0.1042 0.1110 0.1215 0.1591 0.1665 0.1326 0.1813 0.1087 0.1270 20240808-21:28:38 training model 1 20240808-21:28:38 training model 8 20240808-21:32:19 train_perplexity 15 model 8 1.2319350417080375 20240808-21:32:19 train_perplexity 15 model 1 1.2373209840536166 20240808-21:32:26 test_perplexity 15 model 8 1.2013018086131884 20240808-21:32:27 test_perplexity 15 model 1 1.2076785860181498 20240808-21:33:44 test_accuracy 15 model 1 val 405 / 1602 20240808-21:33:46 test_accuracy 15 model 8 val 468 / 1600 20240808-21:33:49 wrote gpt_001.pth 20240808-21:33:49 wrote gpt_008.pth 20240808-21:34:02 wrote non_validated_0015_01.png 20240808-21:34:15 wrote non_validated_0015_08.png 20240808-21:34:15 wrote state.pth 20240808-21:34:15 --- epoch 16 ---------------------------------------- 20240808-21:34:15 current_test_accuracies 0.1287 0.2528 0.1110 0.1215 0.1591 0.1665 0.1326 0.1813 0.2925 0.1270 20240808-21:34:15 training model 2 20240808-21:34:15 training model 3 20240808-21:37:56 train_perplexity 16 model 2 1.2364805653371935 20240808-21:37:56 train_perplexity 16 model 3 1.239036428800135 20240808-21:38:03 test_perplexity 16 model 2 1.2052662505890657 20240808-21:38:03 test_perplexity 16 model 3 1.2058800595588126 20240808-21:39:22 test_accuracy 16 model 2 val 396 / 1625 20240808-21:39:22 test_accuracy 16 model 3 val 409 / 1605 20240808-21:39:24 wrote gpt_002.pth 20240808-21:39:25 wrote gpt_003.pth 20240808-21:39:38 wrote non_validated_0016_02.png 20240808-21:39:51 wrote non_validated_0016_03.png 20240808-21:39:51 wrote state.pth 20240808-21:39:51 --- epoch 17 ---------------------------------------- 20240808-21:39:51 current_test_accuracies 0.1287 0.2528 0.2437 0.2548 0.1591 0.1665 0.1326 0.1813 0.2925 0.1270 20240808-21:39:51 training model 9 20240808-21:39:51 training model 0 20240808-21:43:32 train_perplexity 17 model 9 1.2328078342107522 20240808-21:43:32 train_perplexity 17 model 0 1.2290247698312984 20240808-21:43:39 test_perplexity 17 model 9 1.2030796435286035 20240808-21:43:39 test_perplexity 17 model 0 1.1998440824276106 20240808-21:44:59 test_accuracy 17 model 0 val 424 / 1606 20240808-21:45:01 test_accuracy 17 model 9 val 455 / 1582 20240808-21:45:03 wrote gpt_009.pth 20240808-21:45:03 wrote gpt_000.pth 20240808-21:45:16 wrote non_validated_0017_09.png 20240808-21:45:29 wrote non_validated_0017_00.png 20240808-21:45:29 wrote state.pth 20240808-21:45:29 --- epoch 18 ---------------------------------------- 20240808-21:45:29 current_test_accuracies 0.2640 0.2528 0.2437 0.2548 0.1591 0.1665 0.1326 0.1813 0.2925 0.2876 20240808-21:45:29 training model 6 20240808-21:45:29 training model 4 20240808-21:49:11 train_perplexity 18 model 6 1.2337712019636415 20240808-21:49:11 train_perplexity 18 model 4 1.228093455685787 20240808-21:49:18 test_perplexity 18 model 6 1.2068826162597333 20240808-21:49:18 test_perplexity 18 model 4 1.2042576406148133 20240808-21:50:39 test_accuracy 18 model 6 val 391 / 1624 20240808-21:50:40 test_accuracy 18 model 4 val 459 / 1582 20240808-21:50:42 wrote gpt_006.pth 20240808-21:50:42 wrote gpt_004.pth 20240808-21:50:55 wrote non_validated_0018_06.png 20240808-21:51:08 wrote non_validated_0018_04.png 20240808-21:51:08 wrote state.pth 20240808-21:51:08 --- epoch 19 ---------------------------------------- 20240808-21:51:08 current_test_accuracies 0.2640 0.2528 0.2437 0.2548 0.2901 0.1665 0.2408 0.1813 0.2925 0.2876 20240808-21:51:08 training model 5 20240808-21:51:08 training model 7 20240808-21:54:49 train_perplexity 19 model 5 1.2178327034084397 20240808-21:54:49 train_perplexity 19 model 7 1.2192461880282626 20240808-21:54:56 test_perplexity 19 model 5 1.1950007698896115 20240808-21:54:56 test_perplexity 19 model 7 1.1978779768966923 20240808-21:56:16 test_accuracy 19 model 7 val 489 / 1612 20240808-21:56:17 test_accuracy 19 model 5 val 587 / 1611 20240808-21:56:19 wrote gpt_005.pth 20240808-21:56:19 wrote gpt_007.pth 20240808-21:56:32 wrote non_validated_0019_05.png 20240808-21:56:45 wrote non_validated_0019_07.png 20240808-21:56:45 wrote state.pth 20240808-21:56:45 --- epoch 20 ---------------------------------------- 20240808-21:56:45 current_test_accuracies 0.2640 0.2528 0.2437 0.2548 0.2901 0.3644 0.2408 0.3033 0.2925 0.2876 20240808-21:56:45 training model 6 20240808-21:56:45 training model 2 20240808-22:00:27 train_perplexity 20 model 2 1.20682259406318 20240808-22:00:29 train_perplexity 20 model 6 1.2081547409367852 20240808-22:00:34 test_perplexity 20 model 2 1.1893894057268564 20240808-22:00:36 test_perplexity 20 model 6 1.1907377894376843 20240808-22:01:54 test_accuracy 20 model 2 val 582 / 1627 20240808-22:01:57 test_accuracy 20 model 6 val 697 / 1597 20240808-22:01:59 wrote gpt_006.pth 20240808-22:01:59 wrote gpt_002.pth 20240808-22:02:12 wrote non_validated_0020_06.png 20240808-22:02:25 wrote non_validated_0020_02.png 20240808-22:02:25 wrote state.pth 20240808-22:02:25 --- epoch 21 ---------------------------------------- 20240808-22:02:25 current_test_accuracies 0.2640 0.2528 0.3577 0.2548 0.2901 0.3644 0.4364 0.3033 0.2925 0.2876 20240808-22:02:25 training model 1 20240808-22:02:25 training model 3 20240808-22:06:06 train_perplexity 21 model 1 1.2079573018344634 20240808-22:06:06 train_perplexity 21 model 3 1.2097607065294678 20240808-22:06:14 test_perplexity 21 model 1 1.1890506909079746 20240808-22:06:14 test_perplexity 21 model 3 1.190037373042743 20240808-22:07:35 test_accuracy 21 model 1 val 606 / 1595 20240808-22:07:37 test_accuracy 21 model 3 val 627 / 1578 20240808-22:07:39 wrote gpt_001.pth 20240808-22:07:39 wrote gpt_003.pth 20240808-22:07:53 wrote non_validated_0021_01.png 20240808-22:08:06 wrote non_validated_0021_03.png 20240808-22:08:06 wrote state.pth 20240808-22:08:06 --- epoch 22 ---------------------------------------- 20240808-22:08:06 current_test_accuracies 0.2640 0.3799 0.3577 0.3973 0.2901 0.3644 0.4364 0.3033 0.2925 0.2876 20240808-22:08:06 training model 0 20240808-22:08:06 training model 9 20240808-22:11:47 train_perplexity 22 model 0 1.2055984664127275 20240808-22:11:47 train_perplexity 22 model 9 1.204432806785485 20240808-22:11:54 test_perplexity 22 model 0 1.1888515789707246 20240808-22:11:54 test_perplexity 22 model 9 1.187744581457125 20240808-22:13:15 test_accuracy 22 model 0 val 702 / 1624 20240808-22:13:17 test_accuracy 22 model 9 val 725 / 1592 20240808-22:13:19 wrote gpt_000.pth 20240808-22:13:19 wrote gpt_009.pth 20240808-22:13:32 wrote non_validated_0022_00.png 20240808-22:13:45 wrote non_validated_0022_09.png 20240808-22:13:45 wrote state.pth 20240808-22:13:45 --- epoch 23 ---------------------------------------- 20240808-22:13:45 current_test_accuracies 0.4323 0.3799 0.3577 0.3973 0.2901 0.3644 0.4364 0.3033 0.2925 0.4554 20240808-22:13:45 training model 4 20240808-22:13:45 training model 8 20240808-22:17:27 train_perplexity 23 model 8 1.208068365889341 20240808-22:17:27 train_perplexity 23 model 4 1.2024673276152715 20240808-22:17:34 test_perplexity 23 model 8 1.1901778164509385 20240808-22:17:34 test_perplexity 23 model 4 1.1844103026482937 20240808-22:18:55 test_accuracy 23 model 4 val 770 / 1604 20240808-22:18:57 test_accuracy 23 model 8 val 651 / 1592 20240808-22:18:59 wrote gpt_004.pth 20240808-22:18:59 wrote gpt_008.pth 20240808-22:19:12 wrote non_validated_0023_04.png 20240808-22:19:25 wrote non_validated_0023_08.png 20240808-22:19:25 wrote state.pth 20240808-22:19:25 --- epoch 24 ---------------------------------------- 20240808-22:19:25 current_test_accuracies 0.4323 0.3799 0.3577 0.3973 0.4800 0.3644 0.4364 0.3033 0.4089 0.4554 20240808-22:19:25 training model 7 20240808-22:19:25 training model 2 20240808-22:23:07 train_perplexity 24 model 2 1.1902562708462536 20240808-22:23:07 train_perplexity 24 model 7 1.1999876832963134 20240808-22:23:14 test_perplexity 24 model 2 1.175208159490558 20240808-22:23:14 test_perplexity 24 model 7 1.1836547845837397 20240808-22:24:36 test_accuracy 24 model 2 val 847 / 1580 20240808-22:24:36 test_accuracy 24 model 7 val 698 / 1581 20240808-22:24:38 wrote gpt_007.pth 20240808-22:24:39 wrote gpt_002.pth 20240808-22:24:52 wrote non_validated_0024_07.png 20240808-22:25:05 wrote non_validated_0024_02.png 20240808-22:25:05 wrote state.pth 20240808-22:25:05 --- epoch 25 ---------------------------------------- 20240808-22:25:05 current_test_accuracies 0.4323 0.3799 0.5361 0.3973 0.4800 0.3644 0.4364 0.4415 0.4089 0.4554 20240808-22:25:05 training model 5 20240808-22:25:05 training model 1 20240808-22:28:46 train_perplexity 25 model 5 1.199100802119133 20240808-22:28:46 train_perplexity 25 model 1 1.191330055355661 20240808-22:28:53 test_perplexity 25 model 5 1.1839985067310443 20240808-22:28:54 test_perplexity 25 model 1 1.1787980589816942 20240808-22:30:13 test_accuracy 25 model 5 val 785 / 1609 20240808-22:30:13 test_accuracy 25 model 1 val 807 / 1600 20240808-22:30:15 wrote gpt_005.pth 20240808-22:30:16 wrote gpt_001.pth 20240808-22:30:29 wrote non_validated_0025_05.png 20240808-22:30:42 wrote non_validated_0025_01.png 20240808-22:30:42 wrote state.pth 20240808-22:30:42 --- epoch 26 ---------------------------------------- 20240808-22:30:42 current_test_accuracies 0.4323 0.5044 0.5361 0.3973 0.4800 0.4879 0.4364 0.4415 0.4089 0.4554 20240808-22:30:42 training model 3 20240808-22:30:42 training model 8 20240808-22:34:23 train_perplexity 26 model 8 1.1926430113721724 20240808-22:34:24 train_perplexity 26 model 3 1.1920018507003418 20240808-22:34:31 test_perplexity 26 model 8 1.1785800643377207 20240808-22:34:31 test_perplexity 26 model 3 1.1796970382311673 20240808-22:35:49 test_accuracy 26 model 8 val 874 / 1613 20240808-22:35:50 test_accuracy 26 model 3 val 920 / 1609 20240808-22:35:52 wrote gpt_003.pth 20240808-22:35:52 wrote gpt_008.pth 20240808-22:36:05 wrote non_validated_0026_03.png 20240808-22:36:18 wrote non_validated_0026_08.png 20240808-22:36:18 wrote state.pth 20240808-22:36:18 --- epoch 27 ---------------------------------------- 20240808-22:36:18 current_test_accuracies 0.4323 0.5044 0.5361 0.5718 0.4800 0.4879 0.4364 0.4415 0.5418 0.4554 20240808-22:36:18 training model 0 20240808-22:36:18 training model 6 20240808-22:40:00 train_perplexity 27 model 6 1.1940889622389106 20240808-22:40:00 train_perplexity 27 model 0 1.1915290568636943 20240808-22:40:07 test_perplexity 27 model 6 1.1801704968612423 20240808-22:40:07 test_perplexity 27 model 0 1.176007107724744 20240808-22:41:27 test_accuracy 27 model 0 val 895 / 1581 20240808-22:41:28 test_accuracy 27 model 6 val 883 / 1586 20240808-22:41:30 wrote gpt_000.pth 20240808-22:41:31 wrote gpt_006.pth 20240808-22:41:44 wrote non_validated_0027_00.png 20240808-22:41:57 wrote non_validated_0027_06.png 20240808-22:41:57 wrote state.pth 20240808-22:41:57 --- epoch 28 ---------------------------------------- 20240808-22:41:57 current_test_accuracies 0.5661 0.5044 0.5361 0.5718 0.4800 0.4879 0.5567 0.4415 0.5418 0.4554 20240808-22:41:57 training model 7 20240808-22:41:57 training model 9 20240808-22:45:39 train_perplexity 28 model 9 1.1895890665405178 20240808-22:45:40 train_perplexity 28 model 7 1.1874838242658898 20240808-22:45:46 test_perplexity 28 model 9 1.176406972960885 20240808-22:45:47 test_perplexity 28 model 7 1.178068115255994 20240808-22:47:08 test_accuracy 28 model 9 val 867 / 1589 20240808-22:47:08 test_accuracy 28 model 7 val 852 / 1554 20240808-22:47:11 wrote gpt_007.pth 20240808-22:47:11 wrote gpt_009.pth 20240808-22:47:24 wrote non_validated_0028_07.png 20240808-22:47:37 wrote non_validated_0028_09.png 20240808-22:47:37 wrote state.pth 20240808-22:47:37 --- epoch 29 ---------------------------------------- 20240808-22:47:37 current_test_accuracies 0.5661 0.5044 0.5361 0.5718 0.4800 0.4879 0.5567 0.5483 0.5418 0.5456 20240808-22:47:37 training model 4 20240808-22:47:37 training model 5 20240808-22:51:20 train_perplexity 29 model 5 1.1865079895175685 20240808-22:51:21 train_perplexity 29 model 4 1.189115343928614 20240808-22:51:26 test_perplexity 29 model 5 1.173558918100353 20240808-22:51:28 test_perplexity 29 model 4 1.174458219294476 20240808-22:52:49 test_accuracy 29 model 5 val 978 / 1580 20240808-22:52:50 test_accuracy 29 model 4 val 978 / 1576 20240808-22:52:52 wrote gpt_004.pth 20240808-22:52:53 wrote gpt_005.pth 20240808-22:53:06 wrote non_validated_0029_04.png 20240808-22:53:19 wrote non_validated_0029_05.png 20240808-22:53:19 wrote state.pth 20240808-22:53:19 --- epoch 30 ---------------------------------------- 20240808-22:53:19 current_test_accuracies 0.5661 0.5044 0.5361 0.5718 0.6206 0.6190 0.5567 0.5483 0.5418 0.5456 20240808-22:53:19 training model 1 20240808-22:53:19 training model 2 20240808-22:57:00 train_perplexity 30 model 2 1.1805104999388703 20240808-22:57:03 train_perplexity 30 model 1 1.181418650816051 20240808-22:57:07 test_perplexity 30 model 2 1.1688139112611104 20240808-22:57:09 test_perplexity 30 model 1 1.1734612303888778 20240808-22:58:27 test_accuracy 30 model 2 val 1000 / 1606 20240808-22:58:28 test_accuracy 30 model 1 val 940 / 1611 20240808-22:58:30 wrote gpt_001.pth 20240808-22:58:31 wrote gpt_002.pth 20240808-22:58:44 wrote non_validated_0030_01.png 20240808-22:58:57 wrote non_validated_0030_02.png 20240808-22:58:57 wrote state.pth 20240808-22:58:57 --- epoch 31 ---------------------------------------- 20240808-22:58:57 current_test_accuracies 0.5661 0.5835 0.6227 0.5718 0.6206 0.6190 0.5567 0.5483 0.5418 0.5456 20240808-22:58:57 training model 8 20240808-22:58:57 training model 9 20240808-23:02:38 train_perplexity 31 model 9 1.1803782227506152 20240808-23:02:43 train_perplexity 31 model 8 1.1822841547163823 20240808-23:02:44 test_perplexity 31 model 9 1.1707916027243714 20240808-23:02:48 test_perplexity 31 model 8 1.1745281182220997 20240808-23:04:06 test_accuracy 31 model 9 val 1091 / 1606 20240808-23:04:09 test_accuracy 31 model 8 val 987 / 1593 20240808-23:04:11 wrote gpt_008.pth 20240808-23:04:12 wrote gpt_009.pth 20240808-23:04:25 wrote non_validated_0031_08.png 20240808-23:04:38 wrote non_validated_0031_09.png 20240808-23:04:38 wrote state.pth 20240808-23:04:38 --- epoch 32 ---------------------------------------- 20240808-23:04:38 current_test_accuracies 0.5661 0.5835 0.6227 0.5718 0.6206 0.6190 0.5567 0.5483 0.6196 0.6793 20240808-23:04:38 training model 7 20240808-23:04:38 training model 6 20240808-23:08:20 train_perplexity 32 model 7 1.180299482562428 20240808-23:08:20 train_perplexity 32 model 6 1.1831075778159958 20240808-23:08:27 test_perplexity 32 model 7 1.170802658186532 20240808-23:08:27 test_perplexity 32 model 6 1.1728431465194018 20240808-23:09:47 test_accuracy 32 model 7 val 1089 / 1600 20240808-23:09:47 test_accuracy 32 model 6 val 1044 / 1612 20240808-23:09:50 wrote gpt_007.pth 20240808-23:09:50 wrote gpt_006.pth 20240808-23:10:03 wrote non_validated_0032_07.png 20240808-23:10:16 wrote non_validated_0032_06.png 20240808-23:10:16 wrote state.pth 20240808-23:10:16 --- epoch 33 ---------------------------------------- 20240808-23:10:16 current_test_accuracies 0.5661 0.5835 0.6227 0.5718 0.6206 0.6190 0.6476 0.6806 0.6196 0.6793 20240808-23:10:16 training model 0 20240808-23:10:16 training model 3 20240808-23:13:58 train_perplexity 33 model 3 1.1823554410993515 20240808-23:13:59 train_perplexity 33 model 0 1.1822793882264813 20240808-23:14:06 test_perplexity 33 model 3 1.1707989955556375 20240808-23:14:06 test_perplexity 33 model 0 1.173727047139595 20240808-23:15:25 test_accuracy 33 model 0 val 1008 / 1605 20240808-23:15:28 test_accuracy 33 model 3 val 1036 / 1595 20240808-23:15:30 wrote gpt_000.pth 20240808-23:15:30 wrote gpt_003.pth 20240808-23:15:44 wrote non_validated_0033_00.png 20240808-23:15:57 wrote non_validated_0033_03.png 20240808-23:15:57 wrote state.pth 20240808-23:15:57 --- epoch 34 ---------------------------------------- 20240808-23:15:57 current_test_accuracies 0.6280 0.5835 0.6227 0.6495 0.6206 0.6190 0.6476 0.6806 0.6196 0.6793 20240808-23:15:57 training model 1 20240808-23:15:57 training model 5 20240808-23:19:39 train_perplexity 34 model 5 1.1796353836539086 20240808-23:19:40 train_perplexity 34 model 1 1.1767890685324736 20240808-23:19:44 test_perplexity 34 model 1 1.1668764829379294 20240808-23:20:27 test_accuracy 34 model 1 val 1114 / 1583 20240808-23:20:29 wrote gpt_001.pth 20240808-23:20:29 wrote gpt_005.pth 20240808-23:20:43 wrote non_validated_0034_01.png 20240808-23:20:56 wrote non_validated_0034_05.png 20240808-23:20:56 wrote state.pth 20240808-23:20:56 --- epoch 35 ---------------------------------------- 20240808-23:20:56 current_test_accuracies 0.6280 0.7037 0.6227 0.6495 0.6206 0.6190 0.6476 0.6806 0.6196 0.6793 20240808-23:20:56 training model 5 20240808-23:20:56 training model 8 20240808-23:24:38 train_perplexity 35 model 8 1.1764537521763772 20240808-23:24:39 train_perplexity 35 model 5 1.1748903653689882 20240808-23:24:45 test_perplexity 35 model 8 1.1674882770290689 20240808-23:24:46 test_perplexity 35 model 5 1.1672504865064803 20240808-23:26:06 test_accuracy 35 model 5 val 1180 / 1605 20240808-23:26:08 test_accuracy 35 model 8 val 1141 / 1594 20240808-23:26:10 wrote gpt_005.pth 20240808-23:26:11 wrote gpt_008.pth 20240808-23:26:24 wrote non_validated_0035_05.png 20240808-23:26:37 wrote non_validated_0035_08.png 20240808-23:26:37 wrote state.pth 20240808-23:26:37 --- epoch 36 ---------------------------------------- 20240808-23:26:37 current_test_accuracies 0.6280 0.7037 0.6227 0.6495 0.6206 0.7352 0.6476 0.6806 0.7158 0.6793 20240808-23:26:37 training model 4 20240808-23:26:37 training model 2 20240808-23:30:19 train_perplexity 36 model 2 1.174313732025028 20240808-23:30:20 train_perplexity 36 model 4 1.1803050246224855 20240808-23:30:26 test_perplexity 36 model 2 1.1691197941373663 20240808-23:30:27 test_perplexity 36 model 4 1.1726951581451641 20240808-23:31:49 test_accuracy 36 model 4 val 1042 / 1593 20240808-23:31:50 test_accuracy 36 model 2 val 1087 / 1571 20240808-23:31:52 wrote gpt_004.pth 20240808-23:31:52 wrote gpt_002.pth 20240808-23:32:05 wrote non_validated_0036_04.png 20240808-23:32:19 wrote non_validated_0036_02.png 20240808-23:32:19 wrote state.pth 20240808-23:32:19 --- epoch 37 ---------------------------------------- 20240808-23:32:19 current_test_accuracies 0.6280 0.7037 0.6919 0.6495 0.6541 0.7352 0.6476 0.6806 0.7158 0.6793 20240808-23:32:19 training model 0 20240808-23:32:19 training model 6 20240808-23:36:01 train_perplexity 37 model 6 1.1768025237591309 20240808-23:36:02 train_perplexity 37 model 0 1.1774161516949555 20240808-23:36:08 test_perplexity 37 model 6 1.1688507755614368 20240808-23:36:09 test_perplexity 37 model 0 1.1675181949146523 20240808-23:37:28 test_accuracy 37 model 0 val 1148 / 1622 20240808-23:37:30 test_accuracy 37 model 6 val 1113 / 1576 20240808-23:37:32 wrote gpt_000.pth 20240808-23:37:32 wrote gpt_006.pth 20240808-23:37:46 wrote non_validated_0037_00.png 20240808-23:37:59 wrote non_validated_0037_06.png 20240808-23:37:59 wrote state.pth 20240808-23:37:59 --- epoch 38 ---------------------------------------- 20240808-23:37:59 current_test_accuracies 0.7078 0.7037 0.6919 0.6495 0.6541 0.7352 0.7062 0.6806 0.7158 0.6793 20240808-23:37:59 training model 3 20240808-23:37:59 training model 4 20240808-23:41:40 train_perplexity 38 model 4 1.1750984563894262 20240808-23:41:44 train_perplexity 38 model 3 1.1753496769669616 20240808-23:41:47 test_perplexity 38 model 4 1.167961945170015 20240808-23:41:49 test_perplexity 38 model 3 1.1679425011600182 20240808-23:43:08 test_accuracy 38 model 4 val 1223 / 1614 20240808-23:43:12 test_accuracy 38 model 3 val 1165 / 1598 20240808-23:43:13 wrote gpt_003.pth 20240808-23:43:14 wrote gpt_004.pth 20240808-23:43:27 wrote non_validated_0038_03.png 20240808-23:43:40 wrote non_validated_0038_04.png 20240808-23:43:40 wrote state.pth 20240808-23:43:40 --- epoch 39 ---------------------------------------- 20240808-23:43:40 current_test_accuracies 0.7078 0.7037 0.6919 0.7290 0.7577 0.7352 0.7062 0.6806 0.7158 0.6793 20240808-23:43:40 training model 9 20240808-23:43:40 training model 7 20240808-23:47:22 train_perplexity 39 model 7 1.1750902567784867 20240808-23:47:24 train_perplexity 39 model 9 1.1747775061050014 20240808-23:47:29 test_perplexity 39 model 7 1.169620238646557 20240808-23:47:30 test_perplexity 39 model 9 1.1673067106816255 20240808-23:48:49 test_accuracy 39 model 7 val 1114 / 1609 20240808-23:48:52 test_accuracy 39 model 9 val 1144 / 1587 20240808-23:48:54 wrote gpt_009.pth 20240808-23:48:54 wrote gpt_007.pth 20240808-23:49:07 wrote non_validated_0039_09.png 20240808-23:49:20 wrote non_validated_0039_07.png 20240808-23:49:20 wrote state.pth 20240808-23:49:20 --- epoch 40 ---------------------------------------- 20240808-23:49:20 current_test_accuracies 0.7078 0.7037 0.6919 0.7290 0.7577 0.7352 0.7062 0.6924 0.7158 0.7209 20240808-23:49:20 training model 2 20240808-23:49:20 training model 7 20240808-23:53:03 train_perplexity 40 model 7 1.1716512885652879 20240808-23:53:04 train_perplexity 40 model 2 1.1709468030958243 20240808-23:53:10 test_perplexity 40 model 7 1.1644445601591638 20240808-23:53:11 test_perplexity 40 model 2 1.1659562774682124 20240808-23:54:30 test_accuracy 40 model 2 val 1187 / 1602 20240808-23:54:32 test_accuracy 40 model 7 val 1191 / 1570 20240808-23:54:34 wrote gpt_002.pth 20240808-23:54:34 wrote gpt_007.pth 20240808-23:54:48 wrote non_validated_0040_02.png 20240808-23:55:01 wrote non_validated_0040_07.png 20240808-23:55:01 wrote state.pth 20240808-23:55:01 --- epoch 41 ---------------------------------------- 20240808-23:55:01 current_test_accuracies 0.7078 0.7037 0.7409 0.7290 0.7577 0.7352 0.7062 0.7586 0.7158 0.7209 20240808-23:55:01 training model 1 20240808-23:55:01 training model 6 20240808-23:58:44 train_perplexity 41 model 6 1.173059476753474 20240808-23:58:44 train_perplexity 41 model 1 1.1710539504297313 20240808-23:58:51 test_perplexity 41 model 6 1.1662193649555601 20240808-23:58:51 test_perplexity 41 model 1 1.1651618604733192 20240809-00:00:12 test_accuracy 41 model 1 val 1238 / 1616 20240809-00:00:14 test_accuracy 41 model 6 val 1206 / 1585 20240809-00:00:16 wrote gpt_001.pth 20240809-00:00:16 wrote gpt_006.pth 20240809-00:00:29 wrote non_validated_0041_01.png 20240809-00:00:43 wrote non_validated_0041_06.png 20240809-00:00:43 wrote state.pth 20240809-00:00:43 --- epoch 42 ---------------------------------------- 20240809-00:00:43 current_test_accuracies 0.7078 0.7661 0.7409 0.7290 0.7577 0.7352 0.7609 0.7586 0.7158 0.7209 20240809-00:00:43 training model 0 20240809-00:00:43 training model 8 20240809-00:04:25 train_perplexity 42 model 8 1.1716247896592207 20240809-00:04:28 train_perplexity 42 model 0 1.1727723443385603 20240809-00:04:31 test_perplexity 42 model 8 1.164606763360764 20240809-00:04:34 test_perplexity 42 model 0 1.1664222217017648 20240809-00:05:53 test_accuracy 42 model 8 val 1251 / 1612 20240809-00:05:54 test_accuracy 42 model 0 val 1234 / 1622 20240809-00:05:56 wrote gpt_000.pth 20240809-00:05:56 wrote gpt_008.pth 20240809-00:06:09 wrote non_validated_0042_00.png 20240809-00:06:23 wrote non_validated_0042_08.png 20240809-00:06:23 wrote state.pth 20240809-00:06:23 --- epoch 43 ---------------------------------------- 20240809-00:06:23 current_test_accuracies 0.7608 0.7661 0.7409 0.7290 0.7577 0.7352 0.7609 0.7586 0.7761 0.7209 20240809-00:06:23 training model 9 20240809-00:06:23 training model 3 20240809-00:10:03 train_perplexity 43 model 3 1.1716307022261394 20240809-00:10:10 test_perplexity 43 model 3 1.1676979186470555 20240809-00:10:11 train_perplexity 43 model 9 1.1710564026024977 20240809-00:10:16 test_perplexity 43 model 9 1.1645177171357286 20240809-00:11:34 test_accuracy 43 model 3 val 1276 / 1630 20240809-00:11:34 test_accuracy 43 model 9 val 1302 / 1602 20240809-00:11:36 wrote gpt_009.pth 20240809-00:11:37 wrote gpt_003.pth 20240809-00:11:50 wrote non_validated_0043_09.png 20240809-00:12:03 wrote non_validated_0043_03.png 20240809-00:12:03 wrote state.pth 20240809-00:12:03 --- epoch 44 ---------------------------------------- 20240809-00:12:03 current_test_accuracies 0.7608 0.7661 0.7409 0.7828 0.7577 0.7352 0.7609 0.7586 0.7761 0.8127 20240809-00:12:03 training model 5 20240809-00:12:03 training model 2 20240809-00:15:45 train_perplexity 44 model 2 1.168005489718492 20240809-00:15:50 train_perplexity 44 model 5 1.171088561495658 20240809-00:15:52 test_perplexity 44 model 2 1.1628380389292006 20240809-00:15:55 test_perplexity 44 model 5 1.1643755025003486 20240809-00:17:15 test_accuracy 44 model 2 val 1304 / 1594 20240809-00:17:16 test_accuracy 44 model 5 val 1201 / 1592 20240809-00:17:18 wrote gpt_005.pth 20240809-00:17:19 wrote gpt_002.pth 20240809-00:17:32 wrote non_validated_0044_05.png 20240809-00:17:45 wrote non_validated_0044_02.png 20240809-00:17:45 wrote state.pth 20240809-00:17:45 --- epoch 45 ---------------------------------------- 20240809-00:17:45 current_test_accuracies 0.7608 0.7661 0.8181 0.7828 0.7577 0.7544 0.7609 0.7586 0.7761 0.8127 20240809-00:17:45 training model 5 20240809-00:17:45 training model 4 20240809-00:21:25 train_perplexity 45 model 4 1.1716618790574687 20240809-00:21:31 test_perplexity 45 model 4 1.164041803879029 20240809-00:21:33 wrote gpt_005.pth 20240809-00:21:34 wrote gpt_004.pth 20240809-06:14:32 argv ./main.py --result_dir=results_noise_10 --nb_train_samples=40000 --nb_test_samples=2000 --grids_world_tasks=replace_color,translate,grow,half_fill,frame,detect,corners,contact --accuracy_to_make_c_quizzes=0.95 --prompt_noise=0.05 --nb_gpts=10 --max_fail_to_validate=5 --proba_understands=0.75 --seed 1232 --resume --inference_batch_size=25 20240809-06:14:32 args.log_filename train.log 20240809-06:14:32 args.result_dir results_noise_10 20240809-06:14:32 args.seed 1232 20240809-06:14:32 args.resume True 20240809-06:14:32 args.max_percents_of_test_in_train -1 20240809-06:14:32 args.log_command None 20240809-06:14:32 args.nb_epochs 10000 20240809-06:14:32 args.batch_size 25 20240809-06:14:32 args.physical_batch_size None 20240809-06:14:32 args.inference_batch_size 25 20240809-06:14:32 args.nb_train_samples 40000 20240809-06:14:32 args.nb_test_samples 2000 20240809-06:14:32 args.nb_new_c_quizzes_for_train None 20240809-06:14:32 args.nb_new_c_quizzes_for_test None 20240809-06:14:32 args.learning_rate 0.0005 20240809-06:14:32 args.schedule_free False 20240809-06:14:32 args.model 37M 20240809-06:14:32 args.dim_model 512 20240809-06:14:32 args.dim_keys 64 20240809-06:14:32 args.dim_hidden 2048 20240809-06:14:32 args.nb_heads 8 20240809-06:14:32 args.nb_blocks 12 20240809-06:14:32 args.dropout 0.1 20240809-06:14:32 args.deterministic_synthesis False 20240809-06:14:32 args.problem grids 20240809-06:14:32 args.nb_threads 1 20240809-06:14:32 args.gpus all 20240809-06:14:32 args.nb_gpts 10 20240809-06:14:32 args.max_fail_to_validate 5 20240809-06:14:32 args.accuracy_to_make_c_quizzes 0.95 20240809-06:14:32 args.proba_understands 0.75 20240809-06:14:32 args.proba_not_understands 0.5 20240809-06:14:32 args.temperature_hot 1.5 20240809-06:14:32 args.temperature_cold 1 20240809-06:14:32 args.prompt_noise 0.05 20240809-06:14:32 args.nb_averaging_rounds 3 20240809-06:14:32 args.dirty_debug False 20240809-06:14:32 args.test None 20240809-06:14:32 args.grids_world_tasks replace_color,translate,grow,half_fill,frame,detect,corners,contact 20240809-06:14:32 args.grids_science_tasks None 20240809-06:14:32 args.sky_height 6 20240809-06:14:32 args.sky_width 8 20240809-06:14:32 args.sky_nb_birds 3 20240809-06:14:32 args.sky_nb_iterations 2 20240809-06:14:32 args.sky_speed 3 20240809-06:14:32 main_device cuda:0 gpus ['cuda:0', 'cuda:1'] 20240809-06:14:32 vocabulary_size 15 20240809-06:14:32 creating model 0 and its w_quizzes 20240809-06:14:45 creating model 1 and its w_quizzes 20240809-06:14:58 creating model 2 and its w_quizzes 20240809-06:15:12 creating model 3 and its w_quizzes 20240809-06:15:26 creating model 4 and its w_quizzes 20240809-06:15:39 creating model 5 and its w_quizzes 20240809-06:15:52 creating model 6 and its w_quizzes 20240809-06:16:05 creating model 7 and its w_quizzes 20240809-06:16:18 creating model 8 and its w_quizzes 20240809-06:16:31 creating model 9 and its w_quizzes 20240809-06:16:44 successfully loaded gpt_000.pth 20240809-06:16:45 successfully loaded gpt_001.pth 20240809-06:16:45 successfully loaded gpt_002.pth 20240809-06:16:45 successfully loaded gpt_003.pth 20240809-06:16:46 successfully loaded gpt_004.pth 20240809-06:16:46 successfully loaded gpt_005.pth 20240809-06:16:46 successfully loaded gpt_006.pth 20240809-06:16:46 successfully loaded gpt_007.pth 20240809-06:16:46 successfully loaded gpt_008.pth 20240809-06:16:47 successfully loaded gpt_009.pth 20240809-06:16:47 cannot find c_quizzes.pth 20240809-06:16:47 successfully loaded state.pth 20240809-06:16:47 nb_parameters 37819407 (37M) 20240809-06:16:47 nb_new_c_quizzes_for_train 400 nb_new_c_quizzes_for_test 20 20240809-06:16:47 wrote state.pth 20240809-06:16:47 --- epoch 45 ---------------------------------------- 20240809-06:16:47 current_test_accuracies 0.7608 0.7661 0.8181 0.7828 0.7577 0.7544 0.7609 0.7586 0.7761 0.8127 20240809-06:16:47 training model 5 20240809-06:16:47 training model 4 20240809-06:20:29 train_perplexity 45 model 4 1.168051925867842 20240809-06:20:32 train_perplexity 45 model 5 1.1665203412982612 20240809-06:20:36 test_perplexity 45 model 4 1.1646048743801463 20240809-06:20:38 test_perplexity 45 model 5 1.1614118881915179 20240809-06:23:04 test_accuracy 45 model 4 val 1280 / 1605 20240809-06:23:05 test_accuracy 45 model 5 val 1345 / 1616 20240809-06:23:07 wrote gpt_005.pth 20240809-06:23:08 wrote gpt_004.pth 20240809-06:23:25 wrote non_validated_0045_05.png 20240809-06:23:42 wrote non_validated_0045_04.png 20240809-06:23:42 wrote state.pth 20240809-06:23:42 --- epoch 46 ---------------------------------------- 20240809-06:23:42 current_test_accuracies 0.7608 0.7661 0.8181 0.7828 0.7975 0.8323 0.7609 0.7586 0.7761 0.8127 20240809-06:23:42 training model 7 20240809-06:23:42 training model 0 20240809-06:27:25 train_perplexity 46 model 0 1.169200241887764 20240809-06:27:27 train_perplexity 46 model 7 1.1690835063939415 20240809-06:27:33 test_perplexity 46 model 0 1.1641647258801475 20240809-06:27:34 test_perplexity 46 model 7 1.1619035178008854 20240809-06:29:57 test_accuracy 46 model 0 val 1274 / 1629 20240809-06:30:01 test_accuracy 46 model 7 val 1291 / 1592 20240809-06:30:02 wrote gpt_007.pth 20240809-06:30:03 wrote gpt_000.pth 20240809-06:30:20 wrote non_validated_0046_07.png 20240809-06:30:37 wrote non_validated_0046_00.png 20240809-06:30:37 wrote state.pth 20240809-06:30:37 --- epoch 47 ---------------------------------------- 20240809-06:30:37 current_test_accuracies 0.7821 0.7661 0.8181 0.7828 0.7975 0.8323 0.7609 0.8109 0.7761 0.8127 20240809-06:30:37 training model 6 20240809-06:30:37 training model 1 20240809-06:34:19 train_perplexity 47 model 1 1.1681599002214 20240809-06:34:22 train_perplexity 47 model 6 1.1690473870757183 20240809-06:34:26 test_perplexity 47 model 1 1.163113566700364 20240809-06:34:28 test_perplexity 47 model 6 1.1633805761587777 20240809-06:36:55 test_accuracy 47 model 1 val 1313 / 1614 20240809-06:36:57 test_accuracy 47 model 6 val 1274 / 1584 20240809-06:36:58 wrote gpt_006.pth 20240809-06:36:59 wrote gpt_001.pth 20240809-06:37:17 wrote non_validated_0047_06.png 20240809-06:37:34 wrote non_validated_0047_01.png 20240809-06:37:34 wrote state.pth 20240809-06:37:34 --- epoch 48 ---------------------------------------- 20240809-06:37:34 current_test_accuracies 0.7821 0.8135 0.8181 0.7828 0.7975 0.8323 0.8043 0.8109 0.7761 0.8127 20240809-06:37:34 training model 8 20240809-06:37:34 training model 0 20240809-06:41:16 train_perplexity 48 model 0 1.1669986272951303 20240809-06:41:19 train_perplexity 48 model 8 1.1693143813923144 20240809-06:41:22 test_perplexity 48 model 0 1.1622223389955644 20240809-06:41:25 test_perplexity 48 model 8 1.1621626680333315 20240809-06:43:54 test_accuracy 48 model 8 val 1336 / 1579 20240809-06:43:55 test_accuracy 48 model 0 val 1297 / 1590 20240809-06:43:57 wrote gpt_008.pth 20240809-06:43:58 wrote gpt_000.pth 20240809-06:44:15 wrote non_validated_0048_08.png 20240809-06:44:32 wrote non_validated_0048_00.png 20240809-06:44:32 wrote state.pth 20240809-06:44:32 --- epoch 49 ---------------------------------------- 20240809-06:44:32 current_test_accuracies 0.8157 0.8135 0.8181 0.7828 0.7975 0.8323 0.8043 0.8109 0.8461 0.8127 20240809-06:44:32 training model 3 20240809-06:44:32 training model 4 20240809-06:48:15 train_perplexity 49 model 4 1.1666587085719127 20240809-06:48:15 train_perplexity 49 model 3 1.1688294016105527 20240809-06:48:22 test_perplexity 49 model 4 1.1628349797389015 20240809-06:48:22 test_perplexity 49 model 3 1.1652004910675136 20240809-06:50:49 test_accuracy 49 model 3 val 1277 / 1610 20240809-06:50:51 test_accuracy 49 model 4 val 1311 / 1588 20240809-06:50:53 wrote gpt_003.pth 20240809-06:50:53 wrote gpt_004.pth 20240809-06:51:10 wrote non_validated_0049_03.png 20240809-06:51:27 wrote non_validated_0049_04.png 20240809-06:51:27 wrote state.pth 20240809-06:51:27 --- epoch 50 ---------------------------------------- 20240809-06:51:27 current_test_accuracies 0.8157 0.8135 0.8181 0.7932 0.8256 0.8323 0.8043 0.8109 0.8461 0.8127 20240809-06:51:27 training model 3 20240809-06:51:27 training model 6 20240809-06:55:10 train_perplexity 50 model 3 1.1667628366414307 20240809-06:55:10 train_perplexity 50 model 6 1.16740389048063 20240809-06:55:17 test_perplexity 50 model 3 1.164015321824315 20240809-06:55:17 test_perplexity 50 model 6 1.1627393316857892 20240809-06:57:44 test_accuracy 50 model 6 val 1329 / 1608 20240809-06:57:47 test_accuracy 50 model 3 val 1375 / 1598 20240809-06:57:48 wrote gpt_003.pth 20240809-06:57:49 wrote gpt_006.pth 20240809-06:58:06 wrote non_validated_0050_03.png 20240809-06:58:24 wrote non_validated_0050_06.png 20240809-06:58:24 wrote state.pth 20240809-06:58:24 --- epoch 51 ---------------------------------------- 20240809-06:58:24 current_test_accuracies 0.8157 0.8135 0.8181 0.8605 0.8256 0.8323 0.8265 0.8109 0.8461 0.8127 20240809-06:58:24 training model 7 20240809-06:58:24 training model 9 20240809-07:02:05 train_perplexity 51 model 9 1.168168346185757 20240809-07:02:09 train_perplexity 51 model 7 1.166648983988508 20240809-07:02:13 test_perplexity 51 model 9 1.1605819309265921 20240809-07:02:15 test_perplexity 51 model 7 1.1646466760595802 20240809-07:04:39 test_accuracy 51 model 7 val 1334 / 1625 20240809-07:04:43 test_accuracy 51 model 9 val 1281 / 1565 20240809-07:04:45 wrote gpt_007.pth 20240809-07:04:46 wrote gpt_009.pth 20240809-07:05:03 wrote non_validated_0051_07.png 20240809-07:05:20 wrote non_validated_0051_09.png 20240809-07:05:20 wrote state.pth 20240809-07:05:20 --- epoch 52 ---------------------------------------- 20240809-07:05:20 current_test_accuracies 0.8157 0.8135 0.8181 0.8605 0.8256 0.8323 0.8265 0.8209 0.8461 0.8185 20240809-07:05:20 training model 1 20240809-07:05:20 training model 0 20240809-07:09:03 train_perplexity 52 model 0 1.1656881947487636 20240809-07:09:03 train_perplexity 52 model 1 1.1660634626570314 20240809-07:09:10 test_perplexity 52 model 0 1.1593544244095642 20240809-07:09:10 test_perplexity 52 model 1 1.1620401600200305 20240809-07:11:39 test_accuracy 52 model 1 val 1321 / 1578 20240809-07:11:41 test_accuracy 52 model 0 val 1319 / 1564 20240809-07:11:43 wrote gpt_001.pth 20240809-07:11:44 wrote gpt_000.pth 20240809-07:12:01 wrote non_validated_0052_01.png 20240809-07:12:18 wrote non_validated_0052_00.png 20240809-07:12:18 wrote state.pth 20240809-07:12:18 --- epoch 53 ---------------------------------------- 20240809-07:12:18 current_test_accuracies 0.8434 0.8371 0.8181 0.8605 0.8256 0.8323 0.8265 0.8209 0.8461 0.8185 20240809-07:12:18 training model 2 20240809-07:12:18 training model 9 20240809-07:16:00 train_perplexity 53 model 2 1.1657574727104867 20240809-07:16:00 train_perplexity 53 model 9 1.1658024766713047 20240809-07:16:08 test_perplexity 53 model 2 1.1627436443931336 20240809-07:16:08 test_perplexity 53 model 9 1.1624117974162669 20240809-07:18:37 test_accuracy 53 model 2 val 1373 / 1594 20240809-07:18:37 test_accuracy 53 model 9 val 1330 / 1583 20240809-07:18:39 wrote gpt_002.pth 20240809-07:18:40 wrote gpt_009.pth 20240809-07:18:57 wrote non_validated_0053_02.png 20240809-07:19:14 wrote non_validated_0053_09.png 20240809-07:19:14 wrote state.pth 20240809-07:19:14 --- epoch 54 ---------------------------------------- 20240809-07:19:14 current_test_accuracies 0.8434 0.8371 0.8614 0.8605 0.8256 0.8323 0.8265 0.8209 0.8461 0.8402 20240809-07:19:14 training model 7 20240809-07:19:14 training model 4 20240809-07:22:57 train_perplexity 54 model 4 1.165215777911608 20240809-07:22:57 train_perplexity 54 model 7 1.1650827357780997 20240809-07:23:04 test_perplexity 54 model 4 1.1595503137992147 20240809-07:23:04 test_perplexity 54 model 7 1.1614080864174487 20240809-07:25:30 test_accuracy 54 model 7 val 1393 / 1604 20240809-07:25:33 test_accuracy 54 model 4 val 1361 / 1579 20240809-07:25:35 wrote gpt_007.pth 20240809-07:25:36 wrote gpt_004.pth 20240809-07:25:53 wrote non_validated_0054_07.png 20240809-07:26:10 wrote non_validated_0054_04.png 20240809-07:26:10 wrote state.pth 20240809-07:26:10 --- epoch 55 ---------------------------------------- 20240809-07:26:10 current_test_accuracies 0.8434 0.8371 0.8614 0.8605 0.8619 0.8323 0.8265 0.8685 0.8461 0.8402 20240809-07:26:10 training model 6 20240809-07:26:10 training model 5 20240809-07:29:52 train_perplexity 55 model 5 1.1647771385080925 20240809-07:29:54 train_perplexity 55 model 6 1.1655259875985353 20240809-07:29:59 test_perplexity 55 model 5 1.1614214729401602 20240809-07:30:01 test_perplexity 55 model 6 1.1626125779808676 20240809-07:32:28 test_accuracy 55 model 6 val 1394 / 1608 20240809-07:32:30 test_accuracy 55 model 5 val 1343 / 1575 20240809-07:32:32 wrote gpt_006.pth 20240809-07:32:33 wrote gpt_005.pth 20240809-07:32:50 wrote non_validated_0055_06.png 20240809-07:33:07 wrote non_validated_0055_05.png 20240809-07:33:07 wrote state.pth 20240809-07:33:07 --- epoch 56 ---------------------------------------- 20240809-07:33:07 current_test_accuracies 0.8434 0.8371 0.8614 0.8605 0.8619 0.8527 0.8669 0.8685 0.8461 0.8402 20240809-07:33:07 training model 1 20240809-07:33:07 training model 9 20240809-07:36:49 train_perplexity 56 model 9 1.1649379247432994 20240809-07:36:50 train_perplexity 56 model 1 1.1643672121240018 20240809-07:36:56 test_perplexity 56 model 9 1.1615666870373402 20240809-07:36:57 test_perplexity 56 model 1 1.1616098516074653 20240809-07:39:21 test_accuracy 56 model 9 val 1393 / 1603 20240809-07:39:23 test_accuracy 56 model 1 val 1341 / 1551 20240809-07:39:25 wrote gpt_001.pth 20240809-07:39:26 wrote gpt_009.pth 20240809-07:39:43 wrote non_validated_0056_01.png 20240809-07:40:00 wrote non_validated_0056_09.png 20240809-07:40:00 wrote state.pth 20240809-07:40:00 --- epoch 57 ---------------------------------------- 20240809-07:40:00 current_test_accuracies 0.8434 0.8646 0.8614 0.8605 0.8619 0.8527 0.8669 0.8685 0.8461 0.8690 20240809-07:40:00 training model 0 20240809-07:40:00 training model 8 20240809-07:43:42 train_perplexity 57 model 8 1.1666055467157859 20240809-07:43:45 train_perplexity 57 model 0 1.1632583532287368 20240809-07:43:48 test_perplexity 57 model 8 1.1635651971709409 20240809-07:43:51 test_perplexity 57 model 0 1.1595648612146179 20240809-07:46:17 test_accuracy 57 model 8 val 1385 / 1621 20240809-07:46:18 test_accuracy 57 model 0 val 1408 / 1605 20240809-07:46:20 wrote gpt_000.pth 20240809-07:46:21 wrote gpt_008.pth 20240809-07:46:38 wrote non_validated_0057_00.png 20240809-07:46:55 wrote non_validated_0057_08.png 20240809-07:46:55 wrote state.pth 20240809-07:46:55 --- epoch 58 ---------------------------------------- 20240809-07:46:55 current_test_accuracies 0.8773 0.8646 0.8614 0.8605 0.8619 0.8527 0.8669 0.8685 0.8544 0.8690 20240809-07:46:55 training model 5 20240809-07:46:55 training model 8 20240809-07:50:36 train_perplexity 58 model 8 1.1647426946110033 20240809-07:50:42 test_perplexity 58 model 8 1.1612321140690676 20240809-07:50:42 train_perplexity 58 model 5 1.1631845564298393 20240809-07:50:46 test_perplexity 58 model 5 1.1604182694988114 20240809-07:53:11 test_accuracy 58 model 8 val 1426 / 1604 20240809-07:53:12 test_accuracy 58 model 5 val 1412 / 1610 20240809-07:53:14 wrote gpt_005.pth 20240809-07:53:15 wrote gpt_008.pth 20240809-07:53:32 wrote non_validated_0058_05.png 20240809-07:53:49 wrote non_validated_0058_08.png 20240809-07:53:49 wrote state.pth 20240809-07:53:49 --- epoch 59 ---------------------------------------- 20240809-07:53:49 current_test_accuracies 0.8773 0.8646 0.8614 0.8605 0.8619 0.8770 0.8669 0.8685 0.8890 0.8690 20240809-07:53:49 training model 3 20240809-07:53:49 training model 2 20240809-07:57:31 train_perplexity 59 model 2 1.1639808671641652 20240809-07:57:33 train_perplexity 59 model 3 1.1649447207074097 20240809-07:57:38 test_perplexity 59 model 2 1.161536466453471 20240809-07:57:39 test_perplexity 59 model 3 1.160686009374683 20240809-08:00:05 test_accuracy 59 model 2 val 1380 / 1606 20240809-08:00:09 test_accuracy 59 model 3 val 1346 / 1573 20240809-08:00:11 wrote gpt_003.pth 20240809-08:00:12 wrote gpt_002.pth 20240809-08:00:29 wrote non_validated_0059_03.png 20240809-08:00:46 wrote non_validated_0059_02.png 20240809-08:00:46 wrote state.pth 20240809-08:00:46 --- epoch 60 ---------------------------------------- 20240809-08:00:46 current_test_accuracies 0.8773 0.8646 0.8593 0.8557 0.8619 0.8770 0.8669 0.8685 0.8890 0.8690 20240809-08:00:46 training model 3 20240809-08:00:46 training model 2 20240809-08:04:29 train_perplexity 60 model 2 1.1631366142252515 20240809-08:04:29 train_perplexity 60 model 3 1.1637382457833172 20240809-08:04:36 test_perplexity 60 model 2 1.1602947480541856 20240809-08:04:36 test_perplexity 60 model 3 1.1609722881835898 20240809-08:07:03 test_accuracy 60 model 3 val 1409 / 1601 20240809-08:07:06 test_accuracy 60 model 2 val 1409 / 1572 20240809-08:07:08 wrote gpt_003.pth 20240809-08:07:09 wrote gpt_002.pth 20240809-08:07:26 wrote non_validated_0060_03.png 20240809-08:07:43 wrote non_validated_0060_02.png 20240809-08:07:43 wrote state.pth 20240809-08:07:43 --- epoch 61 ---------------------------------------- 20240809-08:07:43 current_test_accuracies 0.8773 0.8646 0.8963 0.8801 0.8619 0.8770 0.8669 0.8685 0.8890 0.8690 20240809-08:07:43 training model 4 20240809-08:07:43 training model 1 20240809-08:11:26 train_perplexity 61 model 1 1.1634509770285804 20240809-08:11:26 train_perplexity 61 model 4 1.1637598577404966 20240809-08:11:33 test_perplexity 61 model 1 1.1603024372588728 20240809-08:11:34 test_perplexity 61 model 4 1.1605649801020397 20240809-08:14:03 test_accuracy 61 model 1 val 1370 / 1590 20240809-08:14:04 test_accuracy 61 model 4 val 1410 / 1599 20240809-08:14:06 wrote gpt_004.pth 20240809-08:14:07 wrote gpt_001.pth 20240809-08:14:24 wrote non_validated_0061_04.png 20240809-08:14:41 wrote non_validated_0061_01.png 20240809-08:14:41 wrote state.pth 20240809-08:14:41 --- epoch 62 ---------------------------------------- 20240809-08:14:41 current_test_accuracies 0.8773 0.8616 0.8963 0.8801 0.8818 0.8770 0.8669 0.8685 0.8890 0.8690 20240809-08:14:41 training model 1 20240809-08:14:41 training model 6 20240809-08:18:23 train_perplexity 62 model 6 1.1634648510948986 20240809-08:18:24 train_perplexity 62 model 1 1.162464457774639 20240809-08:18:31 test_perplexity 62 model 6 1.1595354909754014 20240809-08:18:31 test_perplexity 62 model 1 1.1600246534732221 20240809-08:20:57 test_accuracy 62 model 6 val 1463 / 1605 20240809-08:21:02 test_accuracy 62 model 1 val 1420 / 1567 20240809-08:21:04 wrote gpt_001.pth 20240809-08:21:04 wrote gpt_006.pth 20240809-08:21:22 wrote non_validated_0062_01.png 20240809-08:21:39 wrote non_validated_0062_06.png 20240809-08:21:39 wrote state.pth 20240809-08:21:39 --- epoch 63 ---------------------------------------- 20240809-08:21:39 current_test_accuracies 0.8773 0.9062 0.8963 0.8801 0.8818 0.8770 0.9115 0.8685 0.8890 0.8690 20240809-08:21:39 training model 7 20240809-08:21:39 training model 9 20240809-08:25:20 train_perplexity 63 model 9 1.1634124416394727 20240809-08:25:25 train_perplexity 63 model 7 1.1639348838437984 20240809-08:25:27 test_perplexity 63 model 9 1.1614469717939546 20240809-08:25:30 test_perplexity 63 model 7 1.1602874245760697 20240809-08:27:55 test_accuracy 63 model 9 val 1424 / 1617 20240809-08:28:00 test_accuracy 63 model 7 val 1331 / 1544 20240809-08:28:02 wrote gpt_007.pth 20240809-08:28:03 wrote gpt_009.pth 20240809-08:28:20 wrote non_validated_0063_07.png 20240809-08:28:36 wrote non_validated_0063_09.png 20240809-08:28:37 wrote state.pth 20240809-08:28:37 --- epoch 64 ---------------------------------------- 20240809-08:28:37 current_test_accuracies 0.8773 0.9062 0.8963 0.8801 0.8818 0.8770 0.9115 0.8620 0.8890 0.8806 20240809-08:28:37 training model 7 20240809-08:28:37 training model 5 20240809-08:32:19 train_perplexity 64 model 5 1.1624640654731189 20240809-08:32:22 train_perplexity 64 model 7 1.1627790862033502 20240809-08:32:26 test_perplexity 64 model 5 1.161309085238665 20240809-08:32:28 test_perplexity 64 model 7 1.161149338032252 20240809-08:34:54 test_accuracy 64 model 5 val 1423 / 1607 20240809-08:34:55 test_accuracy 64 model 7 val 1382 / 1611 20240809-08:34:57 wrote gpt_007.pth 20240809-08:34:58 wrote gpt_005.pth 20240809-08:35:15 wrote non_validated_0064_07.png 20240809-08:35:32 wrote non_validated_0064_05.png 20240809-08:35:32 wrote state.pth 20240809-08:35:32 --- epoch 65 ---------------------------------------- 20240809-08:35:32 current_test_accuracies 0.8773 0.9062 0.8963 0.8801 0.8818 0.8855 0.9115 0.8579 0.8890 0.8806 20240809-08:35:32 training model 7 20240809-08:35:32 training model 0 20240809-08:39:15 train_perplexity 65 model 0 1.162547437176955 20240809-08:39:17 train_perplexity 65 model 7 1.1616336175792386 20240809-08:39:22 test_perplexity 65 model 0 1.1619277938259558 20240809-08:39:23 test_perplexity 65 model 7 1.1611010151759569 20240809-08:41:50 test_accuracy 65 model 7 val 1471 / 1626 20240809-08:41:51 test_accuracy 65 model 0 val 1431 / 1615 20240809-08:41:53 wrote gpt_007.pth 20240809-08:41:53 wrote gpt_000.pth 20240809-08:42:10 wrote non_validated_0065_07.png 20240809-08:42:27 wrote non_validated_0065_00.png 20240809-08:42:27 wrote state.pth 20240809-08:42:27 --- epoch 66 ---------------------------------------- 20240809-08:42:27 current_test_accuracies 0.8861 0.9062 0.8963 0.8801 0.8818 0.8855 0.9115 0.9047 0.8890 0.8806 20240809-08:42:27 training model 3 20240809-08:42:27 training model 9 20240809-08:46:10 train_perplexity 66 model 9 1.162250378628014 20240809-08:46:11 train_perplexity 66 model 3 1.1621618197106687 20240809-08:46:17 test_perplexity 66 model 9 1.1615679568469837 20240809-08:46:18 test_perplexity 66 model 3 1.1596128216343125 20240809-08:48:44 test_accuracy 66 model 9 val 1441 / 1609 20240809-08:48:47 test_accuracy 66 model 3 val 1389 / 1582 20240809-08:48:49 wrote gpt_003.pth 20240809-08:48:49 wrote gpt_009.pth 20240809-08:49:06 wrote non_validated_0066_03.png 20240809-08:49:23 wrote non_validated_0066_09.png 20240809-08:49:23 wrote state.pth 20240809-08:49:23 --- epoch 67 ---------------------------------------- 20240809-08:49:23 current_test_accuracies 0.8861 0.9062 0.8963 0.8780 0.8818 0.8855 0.9115 0.9047 0.8890 0.8956 20240809-08:49:23 training model 3 20240809-08:49:23 training model 4 20240809-08:53:06 train_perplexity 67 model 4 1.162557318709215 20240809-08:53:06 train_perplexity 67 model 3 1.162114415522144 20240809-08:53:14 test_perplexity 67 model 4 1.158602633189422 20240809-08:53:14 test_perplexity 67 model 3 1.1608811276861728 20240809-08:55:38 test_accuracy 67 model 3 val 1489 / 1633 20240809-08:55:42 test_accuracy 67 model 4 val 1402 / 1593 20240809-08:55:43 wrote gpt_003.pth 20240809-08:55:44 wrote gpt_004.pth 20240809-08:56:01 wrote non_validated_0067_03.png 20240809-08:56:18 wrote non_validated_0067_04.png 20240809-08:56:18 wrote state.pth 20240809-08:56:18 --- epoch 68 ---------------------------------------- 20240809-08:56:18 current_test_accuracies 0.8861 0.9062 0.8963 0.9118 0.8801 0.8855 0.9115 0.9047 0.8890 0.8956 20240809-08:56:18 training model 4 20240809-08:56:18 training model 5 20240809-09:00:01 train_perplexity 68 model 5 1.1613584431322754 20240809-09:00:02 train_perplexity 68 model 4 1.1613152252982555 20240809-09:00:08 test_perplexity 68 model 5 1.1588065568224803 20240809-09:00:09 test_perplexity 68 model 4 1.160640689273932 20240809-09:02:37 test_accuracy 68 model 4 val 1417 / 1593 20240809-09:02:38 test_accuracy 68 model 5 val 1420 / 1595 20240809-09:02:40 wrote gpt_004.pth 20240809-09:02:41 wrote gpt_005.pth 20240809-09:02:58 wrote non_validated_0068_04.png 20240809-09:03:15 wrote non_validated_0068_05.png 20240809-09:03:15 wrote state.pth 20240809-09:03:15 --- epoch 69 ---------------------------------------- 20240809-09:03:15 current_test_accuracies 0.8861 0.9062 0.8963 0.9118 0.8895 0.8903 0.9115 0.9047 0.8890 0.8956 20240809-09:03:15 training model 0 20240809-09:03:15 training model 8 20240809-09:06:58 train_perplexity 69 model 8 1.163884693888416 20240809-09:06:58 train_perplexity 69 model 0 1.162228574892704 20240809-09:07:05 test_perplexity 69 model 8 1.1598711291094816 20240809-09:07:05 test_perplexity 69 model 0 1.1605214551036598 20240809-09:09:33 test_accuracy 69 model 8 val 1389 / 1597 20240809-09:09:34 test_accuracy 69 model 0 val 1404 / 1585 20240809-09:09:36 wrote gpt_000.pth 20240809-09:09:36 wrote gpt_008.pth 20240809-09:09:54 wrote non_validated_0069_00.png 20240809-09:10:11 wrote non_validated_0069_08.png 20240809-09:10:11 wrote state.pth 20240809-09:10:11 --- epoch 70 ---------------------------------------- 20240809-09:10:11 current_test_accuracies 0.8858 0.9062 0.8963 0.9118 0.8895 0.8903 0.9115 0.9047 0.8698 0.8956 20240809-09:10:11 training model 8 20240809-09:10:11 training model 0 20240809-09:13:54 train_perplexity 70 model 8 1.16254533620821 20240809-09:13:54 train_perplexity 70 model 0 1.1612041052912143 20240809-09:14:01 test_perplexity 70 model 8 1.1586459751863465 20240809-09:14:01 test_perplexity 70 model 0 1.157940611777624 20240809-09:16:32 test_accuracy 70 model 8 val 1430 / 1584 20240809-09:16:32 test_accuracy 70 model 0 val 1412 / 1583 20240809-09:16:34 wrote gpt_008.pth 20240809-09:16:35 wrote gpt_000.pth 20240809-09:16:52 wrote non_validated_0070_08.png 20240809-09:17:09 wrote non_validated_0070_00.png 20240809-09:17:09 wrote state.pth 20240809-09:17:09 --- epoch 71 ---------------------------------------- 20240809-09:17:09 current_test_accuracies 0.8920 0.9062 0.8963 0.9118 0.8895 0.8903 0.9115 0.9047 0.9028 0.8956 20240809-09:17:09 training model 4 20240809-09:17:09 training model 5 20240809-09:20:53 train_perplexity 71 model 5 1.1610132478770139 20240809-09:20:55 train_perplexity 71 model 4 1.1608937474368728 20240809-09:20:59 test_perplexity 71 model 5 1.159489105162873 20240809-09:21:01 test_perplexity 71 model 4 1.1588748412548906 20240809-09:23:27 test_accuracy 71 model 5 val 1435 / 1614 20240809-09:23:29 test_accuracy 71 model 4 val 1431 / 1584 20240809-09:23:31 wrote gpt_004.pth 20240809-09:23:32 wrote gpt_005.pth 20240809-09:23:49 wrote non_validated_0071_04.png 20240809-09:24:06 wrote non_validated_0071_05.png 20240809-09:24:06 wrote state.pth 20240809-09:24:06 --- epoch 72 ---------------------------------------- 20240809-09:24:06 current_test_accuracies 0.8920 0.9062 0.8963 0.9118 0.9034 0.8891 0.9115 0.9047 0.9028 0.8956 20240809-09:24:06 training model 5 20240809-09:24:06 training model 0 20240809-09:27:49 train_perplexity 72 model 0 1.1600859608454073 20240809-09:27:51 train_perplexity 72 model 5 1.1603838069520185 20240809-09:27:56 test_perplexity 72 model 0 1.1599079578678244 20240809-09:27:57 test_perplexity 72 model 5 1.1598208734502293 20240809-09:30:25 test_accuracy 72 model 5 val 1475 / 1614 20240809-09:30:26 test_accuracy 72 model 0 val 1452 / 1595 20240809-09:30:28 wrote gpt_005.pth 20240809-09:30:29 wrote gpt_000.pth 20240809-09:30:46 wrote non_validated_0072_05.png 20240809-09:31:03 wrote non_validated_0072_00.png 20240809-09:31:03 wrote state.pth 20240809-09:31:03 --- epoch 73 ---------------------------------------- 20240809-09:31:03 current_test_accuracies 0.9103 0.9062 0.8963 0.9118 0.9034 0.9139 0.9115 0.9047 0.9028 0.8956 20240809-09:31:03 training model 9 20240809-09:31:03 training model 2 20240809-09:34:46 train_perplexity 73 model 2 1.1618315681496338 20240809-09:34:46 train_perplexity 73 model 9 1.1615327398542914 20240809-09:34:53 test_perplexity 73 model 2 1.1594817336410694 20240809-09:34:53 test_perplexity 73 model 9 1.1602816148427126 20240809-09:37:21 test_accuracy 73 model 9 val 1430 / 1607 20240809-09:37:23 test_accuracy 73 model 2 val 1430 / 1585 20240809-09:37:25 wrote gpt_009.pth 20240809-09:37:26 wrote gpt_002.pth 20240809-09:37:43 wrote non_validated_0073_09.png 20240809-09:38:00 wrote non_validated_0073_02.png 20240809-09:38:00 wrote state.pth 20240809-09:38:00 --- epoch 74 ---------------------------------------- 20240809-09:38:00 current_test_accuracies 0.9103 0.9062 0.9022 0.9118 0.9034 0.9139 0.9115 0.9047 0.9028 0.8899 20240809-09:38:00 training model 9 20240809-09:38:00 training model 2 20240809-09:41:43 train_perplexity 74 model 2 1.1613043627103827 20240809-09:41:43 train_perplexity 74 model 9 1.1612173899947953 20240809-09:41:50 test_perplexity 74 model 2 1.1568509363777764 20240809-09:41:50 test_perplexity 74 model 9 1.157494639816132 20240809-09:44:17 test_accuracy 74 model 2 val 1471 / 1622 20240809-09:44:19 test_accuracy 74 model 9 val 1459 / 1591 20240809-09:44:21 wrote gpt_009.pth 20240809-09:44:21 wrote gpt_002.pth 20240809-09:44:39 wrote non_validated_0074_09.png 20240809-09:44:56 wrote non_validated_0074_02.png 20240809-09:44:56 wrote state.pth 20240809-09:44:56 --- epoch 75 ---------------------------------------- 20240809-09:44:56 current_test_accuracies 0.9103 0.9062 0.9069 0.9118 0.9034 0.9139 0.9115 0.9047 0.9028 0.9170 20240809-09:44:56 training model 8 20240809-09:44:56 training model 4 20240809-09:48:38 train_perplexity 75 model 4 1.1604486366280942 20240809-09:48:38 train_perplexity 75 model 8 1.1615000241211235 20240809-09:48:46 test_perplexity 75 model 4 1.1595996170882978 20240809-09:48:46 test_perplexity 75 model 8 1.1587149201924354 20240809-09:51:13 test_accuracy 75 model 8 val 1458 / 1603 20240809-09:51:14 test_accuracy 75 model 4 val 1481 / 1611 20240809-09:51:16 wrote gpt_008.pth 20240809-09:51:16 wrote gpt_004.pth 20240809-09:51:33 wrote non_validated_0075_08.png 20240809-09:51:50 wrote non_validated_0075_04.png 20240809-09:51:50 wrote state.pth 20240809-09:51:50 --- epoch 76 ---------------------------------------- 20240809-09:51:50 current_test_accuracies 0.9103 0.9062 0.9069 0.9118 0.9193 0.9139 0.9115 0.9047 0.9095 0.9170 20240809-09:51:50 training model 7 20240809-09:51:50 training model 1 20240809-09:55:31 train_perplexity 76 model 1 1.1615409744425582 20240809-09:55:37 test_perplexity 76 model 1 1.156414621338309 20240809-09:55:38 train_perplexity 76 model 7 1.161555352224951 20240809-09:55:42 test_perplexity 76 model 7 1.157286005835842 20240809-09:58:06 test_accuracy 76 model 1 val 1470 / 1618 20240809-09:58:10 test_accuracy 76 model 7 val 1455 / 1574 20240809-09:58:12 wrote gpt_007.pth 20240809-09:58:12 wrote gpt_001.pth 20240809-09:58:29 wrote non_validated_0076_07.png 20240809-09:58:47 wrote non_validated_0076_01.png 20240809-09:58:47 wrote state.pth 20240809-09:58:47 --- epoch 77 ---------------------------------------- 20240809-09:58:47 current_test_accuracies 0.9103 0.9085 0.9069 0.9118 0.9193 0.9139 0.9115 0.9244 0.9095 0.9170 20240809-09:58:47 training model 2 20240809-09:58:47 training model 1 20240809-10:02:28 train_perplexity 77 model 1 1.1611227661816372 20240809-10:02:32 train_perplexity 77 model 2 1.1603058268153612 20240809-10:02:36 test_perplexity 77 model 1 1.1564396763915983 20240809-10:02:38 test_perplexity 77 model 2 1.156945507210983 20240809-10:05:01 test_accuracy 77 model 1 val 1483 / 1613 20240809-10:05:05 test_accuracy 77 model 2 val 1479 / 1596 20240809-10:05:07 wrote gpt_002.pth 20240809-10:05:07 wrote gpt_001.pth 20240809-10:05:24 wrote non_validated_0077_02.png 20240809-10:05:41 wrote non_validated_0077_01.png 20240809-10:05:41 wrote state.pth 20240809-10:05:41 --- epoch 78 ---------------------------------------- 20240809-10:05:41 current_test_accuracies 0.9103 0.9194 0.9267 0.9118 0.9193 0.9139 0.9115 0.9244 0.9095 0.9170 20240809-10:05:41 training model 8 20240809-10:05:41 training model 0 20240809-10:09:23 train_perplexity 78 model 0 1.1603587161788067 20240809-10:09:27 train_perplexity 78 model 8 1.1613292626031804 20240809-10:09:30 test_perplexity 78 model 0 1.1575402680732256 20240809-10:09:32 test_perplexity 78 model 8 1.160392231697957 20240809-10:12:02 test_accuracy 78 model 0 val 1451 / 1587 20240809-10:12:04 test_accuracy 78 model 8 val 1443 / 1573 20240809-10:12:06 wrote gpt_008.pth 20240809-10:12:06 wrote gpt_000.pth 20240809-10:12:23 wrote non_validated_0078_08.png 20240809-10:12:40 wrote non_validated_0078_00.png 20240809-10:12:40 wrote state.pth 20240809-10:12:40 --- epoch 79 ---------------------------------------- 20240809-10:12:40 current_test_accuracies 0.9143 0.9194 0.9267 0.9118 0.9193 0.9139 0.9115 0.9244 0.9174 0.9170 20240809-10:12:40 training model 6 20240809-10:12:40 training model 3 20240809-10:16:21 train_perplexity 79 model 3 1.1615588495255755 20240809-10:16:27 test_perplexity 79 model 3 1.1588345474489408 20240809-10:16:28 train_perplexity 79 model 6 1.1627946533822069 20240809-10:16:32 test_perplexity 79 model 6 1.1587012027111334 20240809-10:18:57 test_accuracy 79 model 6 val 1433 / 1609 20240809-10:18:59 test_accuracy 79 model 3 val 1423 / 1599 20240809-10:19:01 wrote gpt_006.pth 20240809-10:19:01 wrote gpt_003.pth 20240809-10:19:18 wrote non_validated_0079_06.png 20240809-10:19:35 wrote non_validated_0079_03.png 20240809-10:19:35 wrote state.pth 20240809-10:19:35 --- epoch 80 ---------------------------------------- 20240809-10:19:35 current_test_accuracies 0.9143 0.9194 0.9267 0.8899 0.9193 0.9139 0.8906 0.9244 0.9174 0.9170 20240809-10:19:35 training model 3 20240809-10:19:35 training model 6 20240809-10:23:16 train_perplexity 80 model 6 1.1615887139528316 20240809-10:23:22 test_perplexity 80 model 6 1.1602019255354974 20240809-10:23:23 train_perplexity 80 model 3 1.1603996842147009 20240809-10:23:27 test_perplexity 80 model 3 1.1590792849618308 20240809-10:25:49 test_accuracy 80 model 6 val 1484 / 1632 20240809-10:25:51 test_accuracy 80 model 3 val 1500 / 1635 20240809-10:25:53 wrote gpt_003.pth 20240809-10:25:54 wrote gpt_006.pth 20240809-10:26:11 wrote non_validated_0080_03.png 20240809-10:26:27 wrote non_validated_0080_06.png 20240809-10:26:27 wrote state.pth 20240809-10:26:27 --- epoch 81 ---------------------------------------- 20240809-10:26:27 current_test_accuracies 0.9143 0.9194 0.9267 0.9174 0.9193 0.9139 0.9093 0.9244 0.9174 0.9170 20240809-10:26:27 training model 6 20240809-10:26:27 training model 5 20240809-10:30:09 train_perplexity 81 model 5 1.16001953038773 20240809-10:30:15 test_perplexity 81 model 5 1.1581180207388684 20240809-10:30:15 train_perplexity 81 model 6 1.1611471660872326 20240809-10:30:19 test_perplexity 81 model 6 1.1587529866327804 20240809-10:32:44 test_accuracy 81 model 5 val 1467 / 1603 20240809-10:32:46 test_accuracy 81 model 6 val 1479 / 1607 20240809-10:32:48 wrote gpt_006.pth 20240809-10:32:49 wrote gpt_005.pth 20240809-10:33:06 wrote non_validated_0081_06.png 20240809-10:33:23 wrote non_validated_0081_05.png 20240809-10:33:23 wrote state.pth 20240809-10:33:23 --- epoch 82 ---------------------------------------- 20240809-10:33:23 current_test_accuracies 0.9143 0.9194 0.9267 0.9174 0.9193 0.9152 0.9203 0.9244 0.9174 0.9170 20240809-10:33:23 training model 0 20240809-10:33:23 training model 5 20240809-10:37:04 train_perplexity 82 model 5 1.1592707363631054 20240809-10:37:10 train_perplexity 82 model 0 1.159235344737974 20240809-10:37:10 test_perplexity 82 model 5 1.1576034979069347 20240809-10:37:14 test_perplexity 82 model 0 1.1572399081041833 20240809-10:39:42 test_accuracy 82 model 5 val 1472 / 1584 20240809-10:39:43 test_accuracy 82 model 0 val 1481 / 1597 20240809-10:39:45 wrote gpt_000.pth 20240809-10:39:46 wrote gpt_005.pth 20240809-10:40:03 wrote non_validated_0082_00.png 20240809-10:40:20 wrote non_validated_0082_05.png 20240809-10:40:20 wrote state.pth 20240809-10:40:20 --- epoch 83 ---------------------------------------- 20240809-10:40:20 current_test_accuracies 0.9274 0.9194 0.9267 0.9174 0.9193 0.9293 0.9203 0.9244 0.9174 0.9170 20240809-10:40:20 training model 9 20240809-10:40:20 training model 8 20240809-10:44:02 train_perplexity 83 model 8 1.16027843411139 20240809-10:44:05 train_perplexity 83 model 9 1.1599699095410028 20240809-10:44:09 test_perplexity 83 model 8 1.1582543847599092 20240809-10:44:11 test_perplexity 83 model 9 1.1571919803004873 20240809-10:46:38 test_accuracy 83 model 8 val 1506 / 1617 20240809-10:46:41 test_accuracy 83 model 9 val 1471 / 1568 20240809-10:46:43 wrote gpt_009.pth 20240809-10:46:44 wrote gpt_008.pth 20240809-10:47:01 wrote non_validated_0083_09.png 20240809-10:47:18 wrote non_validated_0083_08.png 20240809-10:47:18 wrote state.pth 20240809-10:47:18 --- epoch 84 ---------------------------------------- 20240809-10:47:18 current_test_accuracies 0.9274 0.9194 0.9267 0.9174 0.9193 0.9293 0.9203 0.9244 0.9314 0.9381 20240809-10:47:18 training model 3 20240809-10:47:18 training model 4 20240809-10:51:01 train_perplexity 84 model 4 1.1593011482575117 20240809-10:51:01 train_perplexity 84 model 3 1.1604155049189409 20240809-10:51:08 test_perplexity 84 model 4 1.1581423537921591 20240809-10:51:08 test_perplexity 84 model 3 1.1567784472046516 20240809-10:53:34 test_accuracy 84 model 4 val 1489 / 1618 20240809-10:53:37 test_accuracy 84 model 3 val 1489 / 1594 20240809-10:53:39 wrote gpt_003.pth 20240809-10:53:39 wrote gpt_004.pth 20240809-10:53:56 wrote non_validated_0084_03.png 20240809-10:54:13 wrote non_validated_0084_04.png 20240809-10:54:13 wrote state.pth 20240809-10:54:13 --- epoch 85 ---------------------------------------- 20240809-10:54:13 current_test_accuracies 0.9274 0.9194 0.9267 0.9341 0.9203 0.9293 0.9203 0.9244 0.9314 0.9381 20240809-10:54:13 training model 1 20240809-10:54:13 training model 4 20240809-10:57:56 train_perplexity 85 model 4 1.1599313093478494 20240809-10:57:57 train_perplexity 85 model 1 1.1605870782810133 20240809-10:58:03 test_perplexity 85 model 4 1.1564773857892148 20240809-10:58:04 test_perplexity 85 model 1 1.1560228578537264 20240809-11:00:29 test_accuracy 85 model 4 val 1515 / 1625 20240809-11:00:31 test_accuracy 85 model 1 val 1490 / 1587 20240809-11:00:33 wrote gpt_001.pth 20240809-11:00:34 wrote gpt_004.pth 20240809-11:00:51 wrote non_validated_0085_01.png 20240809-11:01:08 wrote non_validated_0085_04.png 20240809-11:01:08 wrote state.pth 20240809-11:01:08 --- epoch 86 ---------------------------------------- 20240809-11:01:08 current_test_accuracies 0.9274 0.9389 0.9267 0.9341 0.9323 0.9293 0.9203 0.9244 0.9314 0.9381 20240809-11:01:08 training model 6 20240809-11:01:08 training model 7 20240809-11:04:49 train_perplexity 86 model 7 1.1606212146246195 20240809-11:04:55 test_perplexity 86 model 7 1.1587267948649935 20240809-11:04:55 train_perplexity 86 model 6 1.1609764184454996 20240809-11:04:59 test_perplexity 86 model 6 1.1578587115495227 20240809-11:07:25 test_accuracy 86 model 6 val 1481 / 1615 20240809-11:07:27 test_accuracy 86 model 7 val 1442 / 1556 20240809-11:07:28 wrote gpt_006.pth 20240809-11:07:29 wrote gpt_007.pth 20240809-11:07:46 wrote non_validated_0086_06.png 20240809-11:08:03 wrote non_validated_0086_07.png 20240809-11:08:03 wrote state.pth 20240809-11:08:03 --- epoch 87 ---------------------------------------- 20240809-11:08:03 current_test_accuracies 0.9274 0.9389 0.9267 0.9341 0.9323 0.9293 0.9170 0.9267 0.9314 0.9381 20240809-11:08:03 training model 6 20240809-11:08:03 training model 2 20240809-11:11:45 train_perplexity 87 model 2 1.1605635299558281 20240809-11:11:47 train_perplexity 87 model 6 1.1592022045341792 20240809-11:11:52 test_perplexity 87 model 2 1.1590087517253123 20240809-11:11:54 test_perplexity 87 model 6 1.1586961627786219 20240809-11:14:20 test_accuracy 87 model 6 val 1489 / 1617 20240809-11:14:22 test_accuracy 87 model 2 val 1480 / 1587 20240809-11:14:24 wrote gpt_006.pth 20240809-11:14:25 wrote gpt_002.pth 20240809-11:14:42 wrote non_validated_0087_06.png 20240809-11:14:59 wrote non_validated_0087_02.png 20240809-11:14:59 wrote state.pth 20240809-11:14:59 --- epoch 88 ---------------------------------------- 20240809-11:14:59 current_test_accuracies 0.9274 0.9389 0.9326 0.9341 0.9323 0.9293 0.9208 0.9267 0.9314 0.9381 20240809-11:14:59 training model 6 20240809-11:14:59 training model 7 20240809-11:18:40 train_perplexity 88 model 7 1.1601046768448626 20240809-11:18:46 test_perplexity 88 model 7 1.1570861190797976 20240809-11:18:47 train_perplexity 88 model 6 1.159381959503113 20240809-11:18:51 test_perplexity 88 model 6 1.1588612224686552 20240809-11:21:15 test_accuracy 88 model 7 val 1515 / 1616 20240809-11:21:16 test_accuracy 88 model 6 val 1505 / 1603 20240809-11:21:18 wrote gpt_006.pth 20240809-11:21:19 wrote gpt_007.pth 20240809-11:21:36 wrote non_validated_0088_06.png 20240809-11:21:53 wrote non_validated_0088_07.png 20240809-11:21:53 wrote state.pth 20240809-11:21:53 --- epoch 89 ---------------------------------------- 20240809-11:21:53 current_test_accuracies 0.9274 0.9389 0.9326 0.9341 0.9323 0.9293 0.9389 0.9375 0.9314 0.9381 20240809-11:21:53 training model 0 20240809-11:21:53 training model 5 20240809-11:25:34 train_perplexity 89 model 5 1.1593943312918973 20240809-11:25:39 train_perplexity 89 model 0 1.159325499971814 20240809-11:25:41 test_perplexity 89 model 5 1.1548060109821712 20240809-11:25:44 test_perplexity 89 model 0 1.157552285947633 20240809-11:28:08 test_accuracy 89 model 5 val 1505 / 1609 20240809-11:28:09 test_accuracy 89 model 0 val 1528 / 1607 20240809-11:28:11 wrote gpt_000.pth 20240809-11:28:11 wrote gpt_005.pth 20240809-11:28:28 wrote non_validated_0089_00.png 20240809-11:28:45 wrote non_validated_0089_05.png 20240809-11:28:45 wrote state.pth 20240809-11:28:45 --- epoch 90 ---------------------------------------- 20240809-11:28:45 current_test_accuracies 0.9508 0.9389 0.9326 0.9341 0.9323 0.9354 0.9389 0.9375 0.9314 0.9381 20240809-11:28:45 training model 8 20240809-11:28:45 training model 4 20240809-11:32:28 train_perplexity 90 model 8 1.15985037515119 20240809-11:32:28 train_perplexity 90 model 4 1.1590013245701825 20240809-11:32:35 test_perplexity 90 model 8 1.1569636328808062 20240809-11:32:36 test_perplexity 90 model 4 1.158826104516536 20240809-11:35:03 test_accuracy 90 model 4 val 1510 / 1606 20240809-11:35:05 test_accuracy 90 model 8 val 1480 / 1589 20240809-11:35:07 wrote gpt_008.pth 20240809-11:35:08 wrote gpt_004.pth 20240809-11:35:25 wrote non_validated_0090_08.png 20240809-11:35:42 wrote non_validated_0090_04.png 20240809-11:35:42 wrote state.pth 20240809-11:35:42 --- epoch 91 ---------------------------------------- 20240809-11:35:42 current_test_accuracies 0.9508 0.9389 0.9326 0.9341 0.9402 0.9354 0.9389 0.9375 0.9314 0.9381 20240809-11:35:42 training model 8 20240809-11:35:42 training model 2 20240809-11:39:25 train_perplexity 91 model 2 1.1596744414243 20240809-11:39:26 train_perplexity 91 model 8 1.1597555097311516 20240809-11:39:32 test_perplexity 91 model 2 1.1581458363943737 20240809-11:39:33 test_perplexity 91 model 8 1.1561455206390303 20240809-11:42:00 test_accuracy 91 model 2 val 1466 / 1594 20240809-11:42:01 test_accuracy 91 model 8 val 1485 / 1590 20240809-11:42:03 wrote gpt_008.pth 20240809-11:42:04 wrote gpt_002.pth 20240809-11:42:21 wrote non_validated_0091_08.png 20240809-11:42:37 wrote non_validated_0091_02.png 20240809-11:42:37 wrote state.pth 20240809-11:42:37 --- epoch 92 ---------------------------------------- 20240809-11:42:37 current_test_accuracies 0.9508 0.9389 0.9197 0.9341 0.9402 0.9354 0.9389 0.9375 0.9340 0.9381 20240809-11:42:37 training model 2 20240809-11:42:37 training model 8 20240809-11:46:20 train_perplexity 92 model 8 1.1591963012474664 20240809-11:46:20 train_perplexity 92 model 2 1.1590933832292187 20240809-11:46:27 test_perplexity 92 model 8 1.1583361200889464 20240809-11:46:27 test_perplexity 92 model 2 1.1578655998978196 20240809-11:48:53 test_accuracy 92 model 2 val 1494 / 1603 20240809-11:48:56 test_accuracy 92 model 8 val 1489 / 1598 20240809-11:48:58 wrote gpt_002.pth 20240809-11:48:58 wrote gpt_008.pth 20240809-11:49:15 wrote non_validated_0092_02.png 20240809-11:49:32 wrote non_validated_0092_08.png 20240809-11:49:32 wrote state.pth 20240809-11:49:32 --- epoch 93 ---------------------------------------- 20240809-11:49:32 current_test_accuracies 0.9508 0.9389 0.9320 0.9341 0.9402 0.9354 0.9389 0.9375 0.9318 0.9381 20240809-11:49:32 training model 8 20240809-11:49:32 training model 2 20240809-11:53:14 train_perplexity 93 model 2 1.1587686144123626 20240809-11:53:17 train_perplexity 93 model 8 1.1584820300051832 20240809-11:53:21 test_perplexity 93 model 2 1.1564470867325802 20240809-11:53:23 test_perplexity 93 model 8 1.156403236695154 20240809-11:55:49 test_accuracy 93 model 2 val 1496 / 1604 20240809-11:55:51 test_accuracy 93 model 8 val 1487 / 1587 20240809-11:55:53 wrote gpt_008.pth 20240809-11:55:54 wrote gpt_002.pth 20240809-11:56:11 wrote non_validated_0093_08.png 20240809-11:56:28 wrote non_validated_0093_02.png 20240809-11:56:28 wrote state.pth 20240809-11:56:28 --- epoch 94 ---------------------------------------- 20240809-11:56:28 current_test_accuracies 0.9508 0.9389 0.9327 0.9341 0.9402 0.9354 0.9389 0.9375 0.9370 0.9381 20240809-11:56:28 training model 2 20240809-11:56:28 training model 3 20240809-12:00:10 train_perplexity 94 model 3 1.1594274720698725 20240809-12:00:11 train_perplexity 94 model 2 1.1583897616663887 20240809-12:00:17 test_perplexity 94 model 3 1.1585053517855397 20240809-12:00:18 test_perplexity 94 model 2 1.1561056404362284 20240809-12:02:44 test_accuracy 94 model 3 val 1489 / 1610 20240809-12:02:46 test_accuracy 94 model 2 val 1506 / 1580 20240809-12:02:48 wrote gpt_002.pth 20240809-12:02:49 wrote gpt_003.pth 20240809-12:03:06 wrote non_validated_0094_02.png 20240809-12:03:23 wrote non_validated_0094_03.png 20240809-12:03:23 wrote state.pth 20240809-12:03:23 --- epoch 95 ---------------------------------------- 20240809-12:03:23 current_test_accuracies 0.9508 0.9389 0.9532 0.9248 0.9402 0.9354 0.9389 0.9375 0.9370 0.9381 20240809-12:03:23 training model 3 20240809-12:03:23 training model 5 20240809-12:07:05 train_perplexity 95 model 5 1.158975488371203 20240809-12:07:08 train_perplexity 95 model 3 1.1589192338468264 20240809-12:07:11 test_perplexity 95 model 5 1.1581331337051328 20240809-12:07:14 test_perplexity 95 model 3 1.1571563596487833 20240809-12:09:43 test_accuracy 95 model 3 val 1485 / 1584 20240809-12:09:43 test_accuracy 95 model 5 val 1473 / 1591 20240809-12:09:45 wrote gpt_003.pth 20240809-12:09:46 wrote gpt_005.pth 20240809-12:10:03 wrote non_validated_0095_03.png 20240809-12:10:20 wrote non_validated_0095_05.png 20240809-12:10:20 wrote state.pth 20240809-12:10:20 --- epoch 96 ---------------------------------------- 20240809-12:10:20 current_test_accuracies 0.9508 0.9389 0.9532 0.9375 0.9402 0.9258 0.9389 0.9375 0.9370 0.9381 20240809-12:10:20 training model 5 20240809-12:10:20 training model 8 20240809-12:14:03 train_perplexity 96 model 8 1.157853650997434 20240809-12:14:06 train_perplexity 96 model 5 1.158127262277379 20240809-12:14:10 test_perplexity 96 model 8 1.1577734660911259 20240809-12:14:12 test_perplexity 96 model 5 1.156728070445574 20240809-12:16:37 test_accuracy 96 model 8 val 1530 / 1606 20240809-12:16:39 test_accuracy 96 model 5 val 1503 / 1606 20240809-12:16:41 wrote gpt_005.pth 20240809-12:16:42 wrote gpt_008.pth 20240809-12:16:59 wrote non_validated_0096_05.png 20240809-12:17:16 wrote non_validated_0096_08.png 20240809-12:17:16 wrote state.pth 20240809-12:17:16 --- epoch 97 ---------------------------------------- 20240809-12:17:16 current_test_accuracies 0.9508 0.9389 0.9532 0.9375 0.9402 0.9359 0.9389 0.9375 0.9527 0.9381 20240809-12:17:16 training model 5 20240809-12:17:16 training model 3 20240809-12:20:59 train_perplexity 97 model 3 1.158840767273451 20240809-12:20:59 train_perplexity 97 model 5 1.1583397576052237 20240809-12:21:06 test_perplexity 97 model 5 1.155900587339127 20240809-12:21:06 test_perplexity 97 model 3 1.1579070797731532 20240809-12:23:33 test_accuracy 97 model 5 val 1522 / 1615 20240809-12:23:33 test_accuracy 97 model 3 val 1480 / 1602 20240809-12:23:35 wrote gpt_005.pth 20240809-12:23:36 wrote gpt_003.pth 20240809-12:23:53 wrote non_validated_0097_05.png 20240809-12:24:10 wrote non_validated_0097_03.png 20240809-12:24:10 wrote state.pth 20240809-12:24:10 --- epoch 98 ---------------------------------------- 20240809-12:24:10 current_test_accuracies 0.9508 0.9389 0.9532 0.9238 0.9402 0.9424 0.9389 0.9375 0.9527 0.9381 20240809-12:24:10 training model 3 20240809-12:24:10 training model 7 20240809-12:27:53 train_perplexity 98 model 7 1.1593723144538464 20240809-12:27:53 train_perplexity 98 model 3 1.1585741481368528 20240809-12:28:00 test_perplexity 98 model 7 1.1559088690087753 20240809-12:28:00 test_perplexity 98 model 3 1.1575740581113634 20240809-12:30:26 test_accuracy 98 model 3 val 1524 / 1608 20240809-12:30:28 test_accuracy 98 model 7 val 1491 / 1594 20240809-12:30:30 wrote gpt_003.pth 20240809-12:30:31 wrote gpt_007.pth 20240809-12:30:48 wrote non_validated_0098_03.png 20240809-12:31:05 wrote non_validated_0098_07.png 20240809-12:31:05 wrote state.pth 20240809-12:31:05 --- epoch 99 ---------------------------------------- 20240809-12:31:05 current_test_accuracies 0.9508 0.9389 0.9532 0.9478 0.9402 0.9424 0.9389 0.9354 0.9527 0.9381 20240809-12:31:05 training model 7 20240809-12:31:05 training model 9 20240809-12:34:47 train_perplexity 99 model 9 1.1595948602249113 20240809-12:34:48 train_perplexity 99 model 7 1.1591455875508279 20240809-12:34:54 test_perplexity 99 model 9 1.1585469970050999 20240809-12:34:55 test_perplexity 99 model 7 1.1582624880461738 20240809-12:37:18 test_accuracy 99 model 9 val 1492 / 1625 20240809-12:37:20 test_accuracy 99 model 7 val 1486 / 1600 20240809-12:37:22 wrote gpt_007.pth 20240809-12:37:23 wrote gpt_009.pth 20240809-12:37:40 wrote non_validated_0099_07.png 20240809-12:37:57 wrote non_validated_0099_09.png 20240809-12:37:57 wrote state.pth 20240809-12:37:57 --- epoch 100 ---------------------------------------- 20240809-12:37:57 current_test_accuracies 0.9508 0.9389 0.9532 0.9478 0.9402 0.9424 0.9389 0.9287 0.9527 0.9182 20240809-12:37:57 training model 9 20240809-12:37:57 training model 7 20240809-12:41:39 train_perplexity 100 model 7 1.1587497553456683 20240809-12:41:43 train_perplexity 100 model 9 1.1603699617105125 20240809-12:41:46 test_perplexity 100 model 7 1.1563442491771416 20240809-12:41:49 test_perplexity 100 model 9 1.1575300027740447 20240809-12:44:20 test_accuracy 100 model 9 val 1457 / 1553 20240809-12:44:21 test_accuracy 100 model 7 val 1473 / 1564 20240809-12:44:23 wrote gpt_009.pth 20240809-12:44:23 wrote gpt_007.pth 20240809-12:44:40 wrote non_validated_0100_09.png 20240809-12:44:57 wrote non_validated_0100_07.png 20240809-12:44:57 wrote state.pth 20240809-12:44:57 --- epoch 101 ---------------------------------------- 20240809-12:44:57 current_test_accuracies 0.9508 0.9389 0.9532 0.9478 0.9402 0.9424 0.9389 0.9418 0.9527 0.9382 20240809-12:44:57 training model 9 20240809-12:44:57 training model 6 20240809-12:48:40 train_perplexity 101 model 6 1.1588142828524968 20240809-12:48:41 train_perplexity 101 model 9 1.1589247711238768 20240809-12:48:48 test_perplexity 101 model 6 1.1573160235695819 20240809-12:48:48 test_perplexity 101 model 9 1.1581370794300287 20240809-12:51:17 test_accuracy 101 model 9 val 1477 / 1583 20240809-12:51:18 test_accuracy 101 model 6 val 1518 / 1598 20240809-12:51:20 wrote gpt_009.pth 20240809-12:51:21 wrote gpt_006.pth 20240809-12:51:38 wrote non_validated_0101_09.png 20240809-12:51:55 wrote non_validated_0101_06.png 20240809-12:51:55 wrote state.pth 20240809-12:51:55 --- epoch 102 ---------------------------------------- 20240809-12:51:55 current_test_accuracies 0.9508 0.9389 0.9532 0.9478 0.9402 0.9424 0.9499 0.9418 0.9527 0.9330 20240809-12:51:55 training model 9 20240809-12:51:55 training model 1 20240809-12:55:38 train_perplexity 102 model 1 1.1600274147019205 20240809-12:55:38 train_perplexity 102 model 9 1.1586257349614053 20240809-12:55:45 test_perplexity 102 model 1 1.158792553344949 20240809-12:55:45 test_perplexity 102 model 9 1.1574877773906216 20240809-12:58:12 test_accuracy 102 model 9 val 1531 / 1615 20240809-12:58:14 test_accuracy 102 model 1 val 1485 / 1596 20240809-12:58:16 wrote gpt_009.pth 20240809-12:58:17 wrote gpt_001.pth 20240809-12:58:35 wrote non_validated_0102_09.png 20240809-12:58:52 wrote non_validated_0102_01.png 20240809-12:58:52 wrote state.pth 20240809-12:58:52 --- epoch 103 ---------------------------------------- 20240809-12:58:52 current_test_accuracies 0.9508 0.9305 0.9532 0.9478 0.9402 0.9424 0.9499 0.9418 0.9527 0.9480 20240809-12:58:52 training model 1 20240809-12:58:52 training model 4 20240809-13:02:34 train_perplexity 103 model 4 1.15869400980694 20240809-13:02:37 train_perplexity 103 model 1 1.1594590819143256 20240809-13:02:40 test_perplexity 103 model 4 1.1595909733262535 20240809-13:02:42 test_perplexity 103 model 1 1.1578758538310356 20240809-13:05:05 test_accuracy 103 model 4 val 1536 / 1629 20240809-13:05:09 test_accuracy 103 model 1 val 1498 / 1597 20240809-13:05:11 wrote gpt_001.pth 20240809-13:05:12 wrote gpt_004.pth 20240809-13:05:28 wrote non_validated_0103_01.png 20240809-13:05:45 wrote non_validated_0103_04.png 20240809-13:05:45 wrote state.pth 20240809-13:05:45 --- epoch 104 ---------------------------------------- 20240809-13:05:45 current_test_accuracies 0.9508 0.9380 0.9532 0.9478 0.9429 0.9424 0.9499 0.9418 0.9527 0.9480 20240809-13:05:45 training model 1 20240809-13:05:45 training model 7 20240809-13:09:28 train_perplexity 104 model 7 1.1584532700216537 20240809-13:09:29 train_perplexity 104 model 1 1.1593792594673218 20240809-13:09:35 test_perplexity 104 model 7 1.1589378009728617 20240809-13:09:36 test_perplexity 104 model 1 1.1562575768313705 20240809-13:12:05 test_accuracy 104 model 7 val 1498 / 1579 20240809-13:12:07 test_accuracy 104 model 1 val 1461 / 1570 20240809-13:12:09 wrote gpt_001.pth 20240809-13:12:10 wrote gpt_007.pth 20240809-13:12:27 wrote non_validated_0104_01.png 20240809-13:12:44 wrote non_validated_0104_07.png 20240809-13:12:44 wrote state.pth 20240809-13:12:44 --- epoch 105 ---------------------------------------- 20240809-13:12:44 current_test_accuracies 0.9508 0.9306 0.9532 0.9478 0.9429 0.9424 0.9499 0.9487 0.9527 0.9480 20240809-13:12:44 training model 1 20240809-13:12:44 training model 5 20240809-13:16:25 train_perplexity 105 model 5 1.157772188752017 20240809-13:16:28 train_perplexity 105 model 1 1.158338414223713 20240809-13:16:33 test_perplexity 105 model 5 1.1580511145966337 20240809-13:16:34 test_perplexity 105 model 1 1.1570704029547507 20240809-13:19:01 test_accuracy 105 model 1 val 1514 / 1618 20240809-13:19:02 test_accuracy 105 model 5 val 1504 / 1594 20240809-13:19:04 wrote gpt_001.pth 20240809-13:19:05 wrote gpt_005.pth 20240809-13:19:22 wrote non_validated_0105_01.png 20240809-13:19:39 wrote non_validated_0105_05.png 20240809-13:19:39 wrote state.pth 20240809-13:19:39 --- epoch 106 ---------------------------------------- 20240809-13:19:39 current_test_accuracies 0.9508 0.9357 0.9532 0.9478 0.9429 0.9435 0.9499 0.9487 0.9527 0.9480 20240809-13:19:39 training model 1 20240809-13:19:39 training model 4 20240809-13:23:20 train_perplexity 106 model 4 1.1583539601192496 20240809-13:23:26 test_perplexity 106 model 4 1.157695030036386 20240809-13:23:26 train_perplexity 106 model 1 1.158678783494228 20240809-13:23:30 test_perplexity 106 model 1 1.1562897722584113 20240809-13:25:56 test_accuracy 106 model 4 val 1512 / 1605 20240809-13:25:58 test_accuracy 106 model 1 val 1523 / 1596 20240809-13:26:00 wrote gpt_001.pth 20240809-13:26:00 wrote gpt_004.pth 20240809-13:26:17 wrote non_validated_0106_01.png 20240809-13:26:34 wrote non_validated_0106_04.png 20240809-13:26:34 wrote state.pth 20240809-13:26:34 --- epoch 107 ---------------------------------------- 20240809-13:26:34 current_test_accuracies 0.9508 0.9543 0.9532 0.9478 0.9421 0.9435 0.9499 0.9487 0.9527 0.9480 20240809-13:26:34 training model 4 20240809-13:26:34 training model 5 20240809-13:30:17 train_perplexity 107 model 5 1.1576078960981868 20240809-13:30:17 train_perplexity 107 model 4 1.1581407698415218 20240809-13:30:24 test_perplexity 107 model 5 1.1566569180454582 20240809-13:30:24 test_perplexity 107 model 4 1.157494490944219 20240809-13:32:50 test_accuracy 107 model 5 val 1513 / 1601 20240809-13:32:51 test_accuracy 107 model 4 val 1510 / 1611 20240809-13:32:53 wrote gpt_004.pth 20240809-13:32:54 wrote gpt_005.pth 20240809-13:33:10 wrote non_validated_0107_04.png 20240809-13:33:27 wrote non_validated_0107_05.png 20240809-13:33:27 wrote state.pth 20240809-13:33:27 --- epoch 108 ---------------------------------------- 20240809-13:33:27 current_test_accuracies 0.9508 0.9543 0.9532 0.9478 0.9373 0.9450 0.9499 0.9487 0.9527 0.9480 20240809-13:33:27 training model 4 20240809-13:33:27 training model 5 20240809-13:37:09 train_perplexity 108 model 5 1.1578932798326032 20240809-13:37:13 train_perplexity 108 model 4 1.158055148642746 20240809-13:37:15 test_perplexity 108 model 5 1.154708765850209 20240809-13:37:18 test_perplexity 108 model 4 1.1560881722846021 20240809-13:39:45 test_accuracy 108 model 4 val 1532 / 1613 20240809-13:39:46 test_accuracy 108 model 5 val 1512 / 1594 20240809-13:39:48 wrote gpt_004.pth 20240809-13:39:48 wrote gpt_005.pth 20240809-13:40:05 wrote non_validated_0108_04.png 20240809-13:40:22 wrote non_validated_0108_05.png 20240809-13:40:22 wrote state.pth 20240809-13:40:22 --- epoch 109 ---------------------------------------- 20240809-13:40:22 current_test_accuracies 0.9508 0.9543 0.9532 0.9478 0.9498 0.9486 0.9499 0.9487 0.9527 0.9480 20240809-13:40:22 training model 3 20240809-13:40:22 training model 9 20240809-13:44:04 train_perplexity 109 model 9 1.1584981831809609 20240809-13:44:09 train_perplexity 109 model 3 1.1579517819589882 20240809-13:44:10 test_perplexity 109 model 9 1.156807082447244 20240809-13:44:14 test_perplexity 109 model 3 1.1546359163588646 20240809-13:46:38 test_accuracy 109 model 9 val 1530 / 1614 20240809-13:46:42 test_accuracy 109 model 3 val 1521 / 1591 20240809-13:46:43 wrote gpt_003.pth 20240809-13:46:44 wrote gpt_009.pth 20240809-13:47:01 wrote non_validated_0109_03.png 20240809-13:47:18 wrote non_validated_0109_09.png 20240809-13:47:18 wrote state.pth 20240809-13:47:18 --- epoch 110 ---------------------------------------- 20240809-13:47:18 current_test_accuracies 0.9508 0.9543 0.9532 0.9560 0.9498 0.9486 0.9499 0.9487 0.9527 0.9480 20240809-13:47:18 training model 9 20240809-13:47:18 training model 5 20240809-13:51:01 train_perplexity 110 model 5 1.157260046330938 20240809-13:51:02 train_perplexity 110 model 9 1.1582866040290298 20240809-13:51:08 test_perplexity 110 model 5 1.1564651607581191 20240809-13:51:09 test_perplexity 110 model 9 1.1567895119074676 20240809-13:53:39 test_accuracy 110 model 5 val 1513 / 1598 20240809-13:53:40 test_accuracy 110 model 9 val 1522 / 1598 20240809-13:53:42 wrote gpt_009.pth 20240809-13:53:43 wrote gpt_005.pth 20240809-13:54:00 wrote non_validated_0110_09.png 20240809-13:54:17 wrote non_validated_0110_05.png 20240809-13:54:17 wrote state.pth 20240809-13:54:17 --- epoch 111 ---------------------------------------- 20240809-13:54:17 current_test_accuracies 0.9508 0.9543 0.9532 0.9560 0.9498 0.9468 0.9499 0.9487 0.9527 0.9524 20240809-13:54:17 training model 5 20240809-13:54:17 training model 7 20240809-13:57:58 train_perplexity 111 model 7 1.1581700243523332 20240809-13:58:04 test_perplexity 111 model 7 1.1559481909017388 20240809-13:58:05 train_perplexity 111 model 5 1.1571724463340278 20240809-13:58:09 test_perplexity 111 model 5 1.1559772241691324 20240809-14:00:36 test_accuracy 111 model 7 val 1506 / 1581 20240809-14:00:37 test_accuracy 111 model 5 val 1546 / 1617 20240809-14:00:39 wrote gpt_005.pth 20240809-14:00:39 wrote gpt_007.pth 20240809-14:00:56 wrote non_validated_0111_05.png 20240809-14:01:13 wrote non_validated_0111_07.png 20240809-14:01:13 wrote state.pth 20240809-14:01:13 --- epoch 112 ---------------------------------------- 20240809-14:01:13 current_test_accuracies 0.9508 0.9543 0.9532 0.9560 0.9498 0.9561 0.9499 0.9526 0.9527 0.9524 20240809-14:01:13 training model 4 20240809-14:01:13 training model 6 20240809-14:04:56 train_perplexity 112 model 6 1.1583544745672887 20240809-14:04:57 train_perplexity 112 model 4 1.1572036264709682 20240809-14:05:04 test_perplexity 112 model 6 1.1560404112578548 20240809-14:05:04 test_perplexity 112 model 4 1.1554039972731676 20240809-14:07:32 test_accuracy 112 model 6 val 1493 / 1584 20240809-14:07:32 test_accuracy 112 model 4 val 1506 / 1588 20240809-14:07:35 wrote gpt_004.pth 20240809-14:07:36 wrote gpt_006.pth 20240809-14:07:53 wrote non_validated_0112_04.png 20240809-14:08:10 wrote non_validated_0112_06.png 20240809-14:08:10 wrote state.pth 20240809-14:08:10 --- epoch 113 ---------------------------------------- 20240809-14:08:10 current_test_accuracies 0.9508 0.9543 0.9532 0.9560 0.9484 0.9561 0.9426 0.9526 0.9527 0.9524 20240809-14:08:10 training model 6 20240809-14:08:10 training model 4 20240809-14:11:52 train_perplexity 113 model 4 1.1572883565667202 20240809-14:11:55 train_perplexity 113 model 6 1.1578610170264694 20240809-14:11:59 test_perplexity 113 model 4 1.155984818874243 20240809-14:12:01 test_perplexity 113 model 6 1.1568647280597315 20240809-14:14:27 test_accuracy 113 model 4 val 1548 / 1602 20240809-14:14:28 test_accuracy 113 model 6 val 1535 / 1606 20240809-14:14:30 wrote gpt_006.pth 20240809-14:14:31 wrote gpt_004.pth 20240809-14:14:48 wrote non_validated_0113_06.png 20240809-14:15:05 wrote non_validated_0113_04.png 20240809-14:15:05 wrote state.pth 20240809-14:15:05 --- epoch 114 ---------------------------------------- 20240809-14:15:05 current_test_accuracies 0.9508 0.9543 0.9532 0.9560 0.9663 0.9561 0.9558 0.9526 0.9527 0.9524 20240809-14:32:26 argv ./main.py --result_dir=results_noise_10 --nb_train_samples=40000 --nb_test_samples=2000 --grids_world_tasks=replace_color,translate,grow,half_fill,frame,detect,corners,contact --accuracy_to_make_c_quizzes=0.95 --prompt_noise=0.05 --nb_gpts=10 --max_fail_to_validate=5 --proba_understands=0.75 --seed 12323 --resume --inference_batch_size=10 20240809-14:32:26 args.log_filename train.log 20240809-14:32:26 args.result_dir results_noise_10 20240809-14:32:26 args.seed 12323 20240809-14:32:26 args.resume True 20240809-14:32:26 args.max_percents_of_test_in_train -1 20240809-14:32:26 args.log_command None 20240809-14:32:26 args.nb_epochs 10000 20240809-14:32:26 args.batch_size 25 20240809-14:32:26 args.physical_batch_size None 20240809-14:32:26 args.inference_batch_size 10 20240809-14:32:26 args.nb_train_samples 40000 20240809-14:32:26 args.nb_test_samples 2000 20240809-14:32:26 args.nb_new_c_quizzes_for_train None 20240809-14:32:26 args.nb_new_c_quizzes_for_test None 20240809-14:32:26 args.learning_rate 0.0005 20240809-14:32:26 args.schedule_free False 20240809-14:32:26 args.model 37M 20240809-14:32:26 args.dim_model 512 20240809-14:32:26 args.dim_keys 64 20240809-14:32:26 args.dim_hidden 2048 20240809-14:32:26 args.nb_heads 8 20240809-14:32:26 args.nb_blocks 12 20240809-14:32:26 args.dropout 0.1 20240809-14:32:26 args.deterministic_synthesis False 20240809-14:32:26 args.problem grids 20240809-14:32:26 args.nb_threads 1 20240809-14:32:26 args.gpus all 20240809-14:32:26 args.nb_gpts 10 20240809-14:32:26 args.max_fail_to_validate 5 20240809-14:32:26 args.accuracy_to_make_c_quizzes 0.95 20240809-14:32:26 args.proba_understands 0.75 20240809-14:32:26 args.proba_not_understands 0.5 20240809-14:32:26 args.temperature_hot 1.5 20240809-14:32:26 args.temperature_cold 1 20240809-14:32:26 args.prompt_noise 0.05 20240809-14:32:26 args.nb_averaging_rounds 3 20240809-14:32:26 args.dirty_debug False 20240809-14:32:26 args.test None 20240809-14:32:26 args.grids_world_tasks replace_color,translate,grow,half_fill,frame,detect,corners,contact 20240809-14:32:26 args.grids_science_tasks None 20240809-14:32:26 args.sky_height 6 20240809-14:32:26 args.sky_width 8 20240809-14:32:26 args.sky_nb_birds 3 20240809-14:32:26 args.sky_nb_iterations 2 20240809-14:32:26 args.sky_speed 3 20240809-14:32:26 main_device cuda:0 gpus ['cuda:0', 'cuda:1'] 20240809-14:32:26 vocabulary_size 15 20240809-14:32:26 creating model 0 and its w_quizzes 20240809-14:32:40 creating model 1 and its w_quizzes 20240809-14:32:53 creating model 2 and its w_quizzes 20240809-14:33:06 creating model 3 and its w_quizzes 20240809-14:33:19 creating model 4 and its w_quizzes 20240809-14:33:32 creating model 5 and its w_quizzes 20240809-14:33:45 creating model 6 and its w_quizzes 20240809-14:33:59 creating model 7 and its w_quizzes 20240809-14:34:13 creating model 8 and its w_quizzes 20240809-14:34:27 creating model 9 and its w_quizzes 20240809-14:34:40 successfully loaded gpt_000.pth 20240809-14:34:41 successfully loaded gpt_001.pth 20240809-14:34:41 successfully loaded gpt_002.pth 20240809-14:34:41 successfully loaded gpt_003.pth 20240809-14:34:41 successfully loaded gpt_004.pth 20240809-14:34:42 successfully loaded gpt_005.pth 20240809-14:34:42 successfully loaded gpt_006.pth 20240809-14:34:42 successfully loaded gpt_007.pth 20240809-14:34:43 successfully loaded gpt_008.pth 20240809-14:34:43 successfully loaded gpt_009.pth 20240809-14:34:43 cannot find c_quizzes.pth 20240809-14:34:43 successfully loaded state.pth 20240809-14:34:43 nb_parameters 37819407 (37M) 20240809-14:34:43 nb_new_c_quizzes_for_train 400 nb_new_c_quizzes_for_test 20 20240809-14:34:43 wrote state.pth 20240809-14:34:43 --- epoch 114 ---------------------------------------- 20240809-14:34:43 current_test_accuracies 0.9508 0.9543 0.9532 0.9560 0.9663 0.9561 0.9558 0.9526 0.9527 0.9524 20240809-14:38:16 keep c_quizzes model 5 validated 23 / 420 (5.48%) nb_accumulated 23 / 420 (finishes Fri 15:39 -- 388/h) 20240809-14:41:22 keep c_quizzes model 2 validated 21 / 420 (5.00%) nb_accumulated 44 / 420 (finishes Fri 15:38 -- 396/h) 20240809-14:44:33 keep c_quizzes model 9 validated 31 / 420 (7.38%) nb_accumulated 75 / 420 (finishes Fri 15:29 -- 457/h) 20240809-14:47:39 keep c_quizzes model 6 validated 26 / 420 (6.19%) nb_accumulated 101 / 420 (finishes Fri 15:28 -- 468/h) 20240809-14:50:44 keep c_quizzes model 1 validated 25 / 420 (5.95%) nb_accumulated 126 / 420 (finishes Fri 15:28 -- 471/h) 20240809-14:53:52 keep c_quizzes model 6 validated 20 / 420 (4.76%) nb_accumulated 146 / 420 (finishes Fri 15:29 -- 457/h) 20240809-14:56:58 keep c_quizzes model 8 validated 19 / 420 (4.52%) nb_accumulated 165 / 420 (finishes Fri 15:31 -- 444/h) 20240809-15:00:04 keep c_quizzes model 1 validated 24 / 420 (5.71%) nb_accumulated 189 / 420 (finishes Fri 15:31 -- 447/h) 20240809-15:03:09 keep c_quizzes model 5 validated 28 / 420 (6.67%) nb_accumulated 217 / 420 (finishes Fri 15:29 -- 457/h) 20240809-15:06:16 keep c_quizzes model 7 validated 19 / 420 (4.52%) nb_accumulated 236 / 420 (finishes Fri 15:30 -- 448/h) 20240809-15:09:21 keep c_quizzes model 4 validated 13 / 420 (3.10%) nb_accumulated 249 / 420 (finishes Fri 15:33 -- 431/h) 20240809-15:12:27 keep c_quizzes model 1 validated 31 / 420 (7.38%) nb_accumulated 280 / 420 (finishes Fri 15:31 -- 445/h) 20240809-15:15:32 keep c_quizzes model 1 validated 33 / 420 (7.86%) nb_accumulated 313 / 420 (finishes Fri 15:29 -- 459/h) 20240809-15:18:38 keep c_quizzes model 0 validated 26 / 420 (6.19%) nb_accumulated 339 / 420 (finishes Fri 15:29 -- 463/h) 20240809-15:21:45 keep c_quizzes model 8 validated 19 / 420 (4.52%) nb_accumulated 358 / 420 (finishes Fri 15:29 -- 456/h) 20240809-15:24:52 keep c_quizzes model 4 validated 26 / 420 (6.19%) nb_accumulated 384 / 420 (finishes Fri 15:29 -- 459/h) 20240809-15:28:02 keep c_quizzes model 3 validated 26 / 420 (6.19%) nb_accumulated 410 / 420 (finishes Fri 15:29 -- 461/h) 20240809-15:31:08 keep c_quizzes model 4 validated 19 / 420 (4.52%) nb_accumulated 429 / 420 (finishes now! -- 456/h) 20240809-15:31:22 wrote c_quizzes.pth 20240809-15:31:22 training model 0 20240809-15:31:22 training model 1 20240809-15:35:04 train_perplexity 114 model 1 1.1584713906736608 20240809-15:35:05 train_perplexity 114 model 0 1.1597986016284267 20240809-15:35:11 test_perplexity 114 model 1 1.1572449704788996 20240809-15:35:12 test_perplexity 114 model 0 1.1573787177291666 20240809-15:41:06 test_accuracy 114 model 0 val 1512 / 1603 20240809-15:41:09 test_accuracy 114 model 1 val 1522 / 1583 20240809-15:41:11 wrote gpt_000.pth 20240809-15:41:12 wrote gpt_001.pth 20240809-15:41:45 wrote non_validated_0114_00.png 20240809-15:42:17 wrote non_validated_0114_01.png 20240809-15:42:18 wrote state.pth 20240809-15:42:18 --- epoch 115 ---------------------------------------- 20240809-15:42:18 current_test_accuracies 0.9432 0.9615 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240809-15:42:18 training model 2 20240809-15:42:18 training model 3 20240809-15:46:00 train_perplexity 115 model 3 1.1583718087150299 20240809-15:46:03 train_perplexity 115 model 2 1.1588597726489336 20240809-15:46:06 test_perplexity 115 model 3 1.1564876077183026 20240809-15:46:08 test_perplexity 115 model 2 1.1576960271446302 20240809-15:52:03 test_accuracy 115 model 2 val 1510 / 1600 20240809-15:52:07 test_accuracy 115 model 3 val 1499 / 1572 20240809-15:52:09 wrote gpt_002.pth 20240809-15:52:09 wrote gpt_003.pth 20240809-15:52:43 wrote non_validated_0115_02.png 20240809-15:53:15 wrote non_validated_0115_03.png 20240809-15:53:15 wrote state.pth 20240809-15:53:15 --- epoch 116 ---------------------------------------- 20240809-15:53:15 current_test_accuracies 0.9432 0.9615 0.9438 0.9536 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240809-15:53:15 training model 4 20240809-15:53:15 training model 5 20240809-15:56:57 train_perplexity 116 model 5 1.157658924029477 20240809-15:57:03 train_perplexity 116 model 4 1.1575231584168333 20240809-15:57:03 test_perplexity 116 model 5 1.155632660674989 20240809-15:57:07 test_perplexity 116 model 4 1.1553276939107227 20240809-16:02:57 test_accuracy 116 model 5 val 1515 / 1610 20240809-16:03:00 test_accuracy 116 model 4 val 1538 / 1602 20240809-16:03:02 wrote gpt_004.pth 20240809-16:03:03 wrote gpt_005.pth 20240809-16:03:36 wrote non_validated_0116_04.png 20240809-16:04:09 wrote non_validated_0116_05.png 20240809-16:04:09 wrote state.pth 20240809-16:04:09 --- epoch 117 ---------------------------------------- 20240809-16:04:09 current_test_accuracies 0.9432 0.9615 0.9438 0.9536 0.9600 0.9410 0.0000 0.0000 0.0000 0.0000 20240809-16:04:09 training model 6 20240809-16:04:09 training model 7 20240809-16:07:51 train_perplexity 117 model 7 1.1586958711734852 20240809-16:07:56 train_perplexity 117 model 6 1.1581595799453157 20240809-16:07:57 test_perplexity 117 model 7 1.156849306486098 20240809-16:08:00 test_perplexity 117 model 6 1.1560764180008982 20240809-16:13:59 test_accuracy 117 model 6 val 1517 / 1591 20240809-16:14:00 test_accuracy 117 model 7 val 1515 / 1589 20240809-16:14:02 wrote gpt_006.pth 20240809-16:14:03 wrote gpt_007.pth 20240809-16:14:36 wrote non_validated_0117_06.png 20240809-16:15:08 wrote non_validated_0117_07.png 20240809-16:15:08 wrote state.pth 20240809-16:15:08 --- epoch 118 ---------------------------------------- 20240809-16:15:08 current_test_accuracies 0.9432 0.9615 0.9438 0.9536 0.9600 0.9410 0.9535 0.9534 0.0000 0.0000 20240809-16:15:08 training model 8 20240809-16:15:08 training model 9 20240809-16:18:51 train_perplexity 118 model 9 1.1582988421388192 20240809-16:18:52 train_perplexity 118 model 8 1.1589441184931355 20240809-16:18:58 test_perplexity 118 model 9 1.1556271560140015 20240809-16:18:59 test_perplexity 118 model 8 1.1583916982461226 20240809-16:24:47 test_accuracy 118 model 8 val 1576 / 1643 20240809-16:24:50 test_accuracy 118 model 9 val 1547 / 1623 20240809-16:24:51 wrote gpt_008.pth 20240809-16:24:52 wrote gpt_009.pth 20240809-16:25:25 wrote non_validated_0118_08.png 20240809-16:25:58 wrote non_validated_0118_09.png 20240809-16:25:58 wrote state.pth 20240809-16:25:58 --- epoch 119 ---------------------------------------- 20240809-16:25:58 current_test_accuracies 0.9432 0.9615 0.9438 0.9536 0.9600 0.9410 0.9535 0.9534 0.9592 0.9532 20240809-16:25:58 training model 5 20240809-16:25:58 training model 0 20240809-16:29:40 train_perplexity 119 model 0 1.1583612744683676 20240809-16:29:43 train_perplexity 119 model 5 1.1570774201940472 20240809-16:29:47 test_perplexity 119 model 0 1.1575575457878555 20240809-16:29:49 test_perplexity 119 model 5 1.1567396274224069 20240809-16:35:41 test_accuracy 119 model 0 val 1509 / 1619 20240809-16:35:44 test_accuracy 119 model 5 val 1538 / 1609 20240809-16:35:45 wrote gpt_005.pth 20240809-16:35:46 wrote gpt_000.pth 20240809-16:36:19 wrote non_validated_0119_05.png 20240809-16:36:51 wrote non_validated_0119_00.png 20240809-16:36:52 wrote state.pth 20240809-16:36:52 --- epoch 120 ---------------------------------------- 20240809-16:36:52 current_test_accuracies 0.9321 0.9615 0.9438 0.9536 0.9600 0.9559 0.9535 0.9534 0.9592 0.9532 20240809-16:36:52 training model 0 20240809-16:36:52 training model 2 20240809-16:40:32 train_perplexity 120 model 2 1.1581535944925667 20240809-16:40:38 test_perplexity 120 model 2 1.158395796213204 20240809-16:40:40 train_perplexity 120 model 0 1.1585590695541155 20240809-16:40:44 test_perplexity 120 model 0 1.1559994437529226 20240809-16:46:33 test_accuracy 120 model 2 val 1527 / 1613 20240809-16:46:39 test_accuracy 120 model 0 val 1500 / 1588 20240809-16:46:41 wrote gpt_000.pth 20240809-16:46:42 wrote gpt_002.pth 20240809-16:47:15 wrote non_validated_0120_00.png 20240809-16:47:47 wrote non_validated_0120_02.png 20240809-16:47:47 wrote state.pth 20240809-16:47:47 --- epoch 121 ---------------------------------------- 20240809-16:47:47 current_test_accuracies 0.9446 0.9615 0.9467 0.9536 0.9600 0.9559 0.9535 0.9534 0.9592 0.9532 20240809-16:47:47 training model 0 20240809-16:47:47 training model 2 20240809-16:51:28 train_perplexity 121 model 2 1.1582010990068885 20240809-16:51:33 train_perplexity 121 model 0 1.1586083886243304 20240809-16:51:37 test_perplexity 121 model 2 1.1566402367568938 20240809-16:51:38 test_perplexity 121 model 0 1.156462040248221 20240809-16:57:29 test_accuracy 121 model 0 val 1527 / 1623 20240809-16:57:38 test_accuracy 121 model 2 val 1498 / 1566 20240809-16:57:40 wrote gpt_000.pth 20240809-16:57:40 wrote gpt_002.pth 20240809-16:58:13 wrote non_validated_0121_00.png 20240809-16:58:46 wrote non_validated_0121_02.png 20240809-16:58:46 wrote state.pth 20240809-16:58:46 --- epoch 122 ---------------------------------------- 20240809-16:58:46 current_test_accuracies 0.9409 0.9615 0.9566 0.9536 0.9600 0.9559 0.9535 0.9534 0.9592 0.9532 20240809-16:58:46 training model 0 20240809-16:58:46 training model 9 20240809-17:02:28 train_perplexity 122 model 9 1.157979646088491 20240809-17:02:30 train_perplexity 122 model 0 1.1577684129310577 20240809-17:02:35 test_perplexity 122 model 9 1.1540353823715574 20240809-17:02:36 test_perplexity 122 model 0 1.1567656824845822 20240809-17:08:34 test_accuracy 122 model 0 val 1505 / 1580 20240809-17:08:38 test_accuracy 122 model 9 val 1500 / 1568 20240809-17:08:40 wrote gpt_000.pth 20240809-17:08:41 wrote gpt_009.pth 20240809-17:09:13 wrote non_validated_0122_00.png 20240809-17:09:46 wrote non_validated_0122_09.png 20240809-17:09:46 wrote state.pth 20240809-17:09:46 --- epoch 123 ---------------------------------------- 20240809-17:09:46 current_test_accuracies 0.9525 0.9615 0.9566 0.9536 0.9600 0.9559 0.9535 0.9534 0.9592 0.9566 20240809-17:13:09 keep c_quizzes model 7 validated 17 / 420 (4.05%) nb_accumulated 17 / 420 (finishes Fri 18:33 -- 301/h) 20240809-17:16:09 keep c_quizzes model 8 validated 22 / 420 (5.24%) nb_accumulated 39 / 420 (finishes Fri 18:18 -- 366/h) 20240809-17:19:09 keep c_quizzes model 0 validated 27 / 420 (6.43%) nb_accumulated 66 / 420 (finishes Fri 18:09 -- 421/h) 20240809-17:22:11 keep c_quizzes model 8 validated 19 / 420 (4.52%) nb_accumulated 85 / 420 (finishes Fri 18:11 -- 410/h) 20240809-17:25:12 keep c_quizzes model 7 validated 27 / 420 (6.43%) nb_accumulated 112 / 420 (finishes Fri 18:07 -- 435/h) 20240809-17:28:13 keep c_quizzes model 9 validated 19 / 420 (4.52%) nb_accumulated 131 / 420 (finishes Fri 18:08 -- 426/h) 20240809-17:31:14 keep c_quizzes model 4 validated 23 / 420 (5.48%) nb_accumulated 154 / 420 (finishes Fri 18:08 -- 430/h) 20240809-17:34:15 keep c_quizzes model 3 validated 21 / 420 (5.00%) nb_accumulated 175 / 420 (finishes Fri 18:08 -- 428/h) 20240809-17:37:16 keep c_quizzes model 2 validated 17 / 420 (4.05%) nb_accumulated 192 / 420 (finishes Fri 18:09 -- 418/h) 20240809-17:40:18 keep c_quizzes model 6 validated 27 / 420 (6.43%) nb_accumulated 219 / 420 (finishes Fri 18:08 -- 430/h) 20240809-17:43:23 keep c_quizzes model 3 validated 21 / 420 (5.00%) nb_accumulated 240 / 420 (finishes Fri 18:08 -- 428/h) 20240809-17:46:30 keep c_quizzes model 7 validated 27 / 420 (6.43%) nb_accumulated 267 / 420 (finishes Fri 18:07 -- 436/h) 20240809-17:49:31 keep c_quizzes model 8 validated 34 / 420 (8.10%) nb_accumulated 301 / 420 (finishes Fri 18:05 -- 454/h) 20240809-17:52:31 keep c_quizzes model 0 validated 24 / 420 (5.71%) nb_accumulated 325 / 420 (finishes Fri 18:05 -- 456/h) 20240809-17:55:31 keep c_quizzes model 1 validated 21 / 420 (5.00%) nb_accumulated 346 / 420 (finishes Fri 18:05 -- 453/h) 20240809-17:58:30 keep c_quizzes model 1 validated 23 / 420 (5.48%) nb_accumulated 369 / 420 (finishes Fri 18:05 -- 454/h) 20240809-18:01:30 keep c_quizzes model 1 validated 17 / 420 (4.05%) nb_accumulated 386 / 420 (finishes Fri 18:06 -- 447/h) 20240809-18:04:30 keep c_quizzes model 1 validated 18 / 420 (4.29%) nb_accumulated 404 / 420 (finishes Fri 18:06 -- 442/h) 20240809-18:07:31 keep c_quizzes model 0 validated 23 / 420 (5.48%) nb_accumulated 427 / 420 (finishes now! -- 443/h) 20240809-18:07:44 wrote c_quizzes.pth 20240809-18:07:44 training model 0 20240809-18:07:44 training model 1 20240809-18:11:26 train_perplexity 123 model 0 1.1586396325142276 20240809-18:11:26 train_perplexity 123 model 1 1.158901782826021 20240809-18:11:34 test_perplexity 123 model 0 1.1558368826894476 20240809-18:11:34 test_perplexity 123 model 1 1.1576792073114428 20240809-18:17:31 test_accuracy 123 model 1 val 1520 / 1591 20240809-18:17:33 test_accuracy 123 model 0 val 1524 / 1584 20240809-18:17:34 wrote gpt_000.pth 20240809-18:17:35 wrote gpt_001.pth 20240809-18:18:08 wrote non_validated_0123_00.png 20240809-18:18:41 wrote non_validated_0123_01.png 20240809-18:18:41 wrote state.pth 20240809-18:18:41 --- epoch 124 ---------------------------------------- 20240809-18:18:41 current_test_accuracies 0.9621 0.9554 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240809-18:18:41 training model 2 20240809-18:18:41 training model 3 20240809-18:22:23 train_perplexity 124 model 3 1.1585471455106553 20240809-18:22:25 train_perplexity 124 model 2 1.158526579088597 20240809-18:22:30 test_perplexity 124 model 3 1.1570392258686475 20240809-18:22:31 test_perplexity 124 model 2 1.1561426093411251 20240809-18:28:23 test_accuracy 124 model 3 val 1532 / 1610 20240809-18:28:24 test_accuracy 124 model 2 val 1543 / 1617 20240809-18:28:26 wrote gpt_002.pth 20240809-18:28:27 wrote gpt_003.pth 20240809-18:28:59 wrote non_validated_0124_02.png 20240809-18:29:32 wrote non_validated_0124_03.png 20240809-18:29:32 wrote state.pth 20240809-18:29:32 --- epoch 125 ---------------------------------------- 20240809-18:29:32 current_test_accuracies 0.9621 0.9554 0.9542 0.9516 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240809-18:29:32 training model 4 20240809-18:29:32 training model 5 20240809-18:33:14 train_perplexity 125 model 5 1.1571756035976057 20240809-18:33:19 train_perplexity 125 model 4 1.1576335755451899 20240809-18:33:20 test_perplexity 125 model 5 1.1575726320348274 20240809-18:33:23 test_perplexity 125 model 4 1.1564499113425246 20240809-18:39:17 test_accuracy 125 model 4 val 1537 / 1609 20240809-18:39:19 test_accuracy 125 model 5 val 1509 / 1599 20240809-18:39:21 wrote gpt_004.pth 20240809-18:39:21 wrote gpt_005.pth 20240809-18:39:54 wrote non_validated_0125_04.png 20240809-18:40:27 wrote non_validated_0125_05.png 20240809-18:40:27 wrote state.pth 20240809-18:40:27 --- epoch 126 ---------------------------------------- 20240809-18:40:27 current_test_accuracies 0.9621 0.9554 0.9542 0.9516 0.9553 0.9437 0.0000 0.0000 0.0000 0.0000 20240809-18:40:27 training model 6 20240809-18:40:27 training model 7 20240809-18:44:08 train_perplexity 126 model 7 1.158323811384131 20240809-18:44:14 test_perplexity 126 model 7 1.155699479831714 20240809-18:44:14 train_perplexity 126 model 6 1.158653316057933 20240809-18:44:18 test_perplexity 126 model 6 1.1580516551505555 20240809-18:50:13 test_accuracy 126 model 6 val 1515 / 1596 20240809-18:50:14 test_accuracy 126 model 7 val 1516 / 1586 20240809-18:50:16 wrote gpt_006.pth 20240809-18:50:17 wrote gpt_007.pth 20240809-18:50:49 wrote non_validated_0126_06.png 20240809-18:51:22 wrote non_validated_0126_07.png 20240809-18:51:22 wrote state.pth 20240809-18:51:22 --- epoch 127 ---------------------------------------- 20240809-18:51:22 current_test_accuracies 0.9621 0.9554 0.9542 0.9516 0.9553 0.9437 0.9492 0.9559 0.0000 0.0000 20240809-18:51:22 training model 8 20240809-18:51:22 training model 9 20240809-18:55:03 train_perplexity 127 model 9 1.1582549480076663 20240809-18:55:08 train_perplexity 127 model 8 1.1584101490189331 20240809-18:55:11 test_perplexity 127 model 9 1.1592818085965744 20240809-18:55:13 test_perplexity 127 model 8 1.1563580898502561 20240809-19:01:06 test_accuracy 127 model 9 val 1525 / 1604 20240809-19:01:07 test_accuracy 127 model 8 val 1535 / 1600 20240809-19:01:09 wrote gpt_008.pth 20240809-19:01:09 wrote gpt_009.pth 20240809-19:01:42 wrote non_validated_0127_08.png 20240809-19:02:15 wrote non_validated_0127_09.png 20240809-19:02:15 wrote state.pth 20240809-19:02:15 --- epoch 128 ---------------------------------------- 20240809-19:02:15 current_test_accuracies 0.9621 0.9554 0.9542 0.9516 0.9553 0.9437 0.9492 0.9559 0.9594 0.9507 20240809-19:02:15 training model 5 20240809-19:02:15 training model 6 20240809-19:05:58 train_perplexity 128 model 6 1.1576818242176359 20240809-19:06:02 train_perplexity 128 model 5 1.1572677318072373 20240809-19:06:04 test_perplexity 128 model 6 1.1588233774938104 20240809-19:06:07 test_perplexity 128 model 5 1.1542668691747504 20240809-19:11:57 test_accuracy 128 model 5 val 1539 / 1615 20240809-19:11:59 test_accuracy 128 model 6 val 1505 / 1591 20240809-19:12:01 wrote gpt_005.pth 20240809-19:12:02 wrote gpt_006.pth 20240809-19:12:35 wrote non_validated_0128_05.png 20240809-19:13:07 wrote non_validated_0128_06.png 20240809-19:13:07 wrote state.pth 20240809-19:13:07 --- epoch 129 ---------------------------------------- 20240809-19:13:07 current_test_accuracies 0.9621 0.9554 0.9542 0.9516 0.9553 0.9529 0.9459 0.9559 0.9594 0.9507 20240809-19:13:07 training model 6 20240809-19:13:07 training model 9 20240809-19:16:50 train_perplexity 129 model 9 1.1581735695445465 20240809-19:16:50 train_perplexity 129 model 6 1.1579282116911538 20240809-19:16:58 test_perplexity 129 model 9 1.1579783558824592 20240809-19:16:58 test_perplexity 129 model 6 1.1545585469485409 20240809-19:22:46 test_accuracy 129 model 9 val 1554 / 1632 20240809-19:22:52 test_accuracy 129 model 6 val 1537 / 1607 20240809-19:22:54 wrote gpt_006.pth 20240809-19:22:54 wrote gpt_009.pth 20240809-19:23:27 wrote non_validated_0129_06.png 20240809-19:24:00 wrote non_validated_0129_09.png 20240809-19:24:00 wrote state.pth 20240809-19:24:00 --- epoch 130 ---------------------------------------- 20240809-19:24:00 current_test_accuracies 0.9621 0.9554 0.9542 0.9516 0.9553 0.9529 0.9564 0.9559 0.9594 0.9522 20240809-19:27:23 keep c_quizzes model 5 validated 27 / 420 (6.43%) nb_accumulated 27 / 420 (finishes Fri 20:16 -- 479/h) 20240809-19:30:24 keep c_quizzes model 4 validated 30 / 420 (7.14%) nb_accumulated 57 / 420 (finishes Fri 20:11 -- 534/h) 20240809-19:33:25 keep c_quizzes model 6 validated 29 / 420 (6.90%) nb_accumulated 86 / 420 (finishes Fri 20:09 -- 548/h) 20240809-19:36:25 keep c_quizzes model 9 validated 21 / 420 (5.00%) nb_accumulated 107 / 420 (finishes Fri 20:12 -- 517/h) 20240809-19:39:25 keep c_quizzes model 6 validated 22 / 420 (5.24%) nb_accumulated 129 / 420 (finishes Fri 20:14 -- 501/h) 20240809-19:42:26 keep c_quizzes model 8 validated 19 / 420 (4.52%) nb_accumulated 148 / 420 (finishes Fri 20:16 -- 481/h) 20240809-19:45:26 keep c_quizzes model 7 validated 24 / 420 (5.71%) nb_accumulated 172 / 420 (finishes Fri 20:16 -- 481/h) 20240809-19:48:27 keep c_quizzes model 2 validated 16 / 420 (3.81%) nb_accumulated 188 / 420 (finishes Fri 20:18 -- 461/h) 20240809-19:51:27 keep c_quizzes model 7 validated 27 / 420 (6.43%) nb_accumulated 215 / 420 (finishes Fri 20:17 -- 469/h) 20240809-19:54:28 keep c_quizzes model 8 validated 26 / 420 (6.19%) nb_accumulated 241 / 420 (finishes Fri 20:17 -- 474/h) 20240809-19:57:28 keep c_quizzes model 1 validated 15 / 420 (3.57%) nb_accumulated 256 / 420 (finishes Fri 20:18 -- 459/h) 20240809-20:00:28 keep c_quizzes model 1 validated 26 / 420 (6.19%) nb_accumulated 282 / 420 (finishes Fri 20:18 -- 463/h) 20240809-20:03:28 keep c_quizzes model 1 validated 34 / 420 (8.10%) nb_accumulated 316 / 420 (finishes Fri 20:16 -- 480/h) 20240809-20:06:28 keep c_quizzes model 9 validated 27 / 420 (6.43%) nb_accumulated 343 / 420 (finishes Fri 20:16 -- 484/h) 20240809-20:09:28 keep c_quizzes model 7 validated 27 / 420 (6.43%) nb_accumulated 370 / 420 (finishes Fri 20:15 -- 488/h) 20240809-20:12:28 keep c_quizzes model 1 validated 25 / 420 (5.95%) nb_accumulated 395 / 420 (finishes Fri 20:15 -- 489/h) 20240809-20:15:28 keep c_quizzes model 5 validated 22 / 420 (5.24%) nb_accumulated 417 / 420 (finishes Fri 20:15 -- 486/h) 20240809-20:18:29 keep c_quizzes model 9 validated 27 / 420 (6.43%) nb_accumulated 444 / 420 (finishes now! -- 488/h) 20240809-20:18:43 wrote c_quizzes.pth 20240809-20:18:43 training model 0 20240809-20:18:43 training model 1 20240809-20:22:24 train_perplexity 130 model 1 1.1590533477801042 20240809-20:22:26 train_perplexity 130 model 0 1.1583565584094417 20240809-20:22:31 test_perplexity 130 model 1 1.157913426301964 20240809-20:22:32 test_perplexity 130 model 0 1.1569981301918766 20240809-20:28:26 test_accuracy 130 model 1 val 1507 / 1607 20240809-20:28:30 test_accuracy 130 model 0 val 1478 / 1571 20240809-20:28:32 wrote gpt_000.pth 20240809-20:28:32 wrote gpt_001.pth 20240809-20:29:05 wrote non_validated_0130_00.png 20240809-20:29:37 wrote non_validated_0130_01.png 20240809-20:29:37 wrote state.pth 20240809-20:29:37 --- epoch 131 ---------------------------------------- 20240809-20:29:37 current_test_accuracies 0.9408 0.9378 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240809-20:29:37 training model 2 20240809-20:29:37 training model 3 20240809-20:33:20 train_perplexity 131 model 2 1.1589894742201756 20240809-20:33:20 train_perplexity 131 model 3 1.1586229612340597 20240809-20:33:28 test_perplexity 131 model 2 1.1561399243811108 20240809-20:33:28 test_perplexity 131 model 3 1.1565838391166208 20240809-20:39:22 test_accuracy 131 model 3 val 1549 / 1618 20240809-20:39:29 test_accuracy 131 model 2 val 1463 / 1562 20240809-20:39:31 wrote gpt_002.pth 20240809-20:39:32 wrote gpt_003.pth 20240809-20:40:04 wrote non_validated_0131_02.png 20240809-20:40:37 wrote non_validated_0131_03.png 20240809-20:40:37 wrote state.pth 20240809-20:40:37 --- epoch 132 ---------------------------------------- 20240809-20:40:37 current_test_accuracies 0.9408 0.9378 0.9366 0.9574 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240809-20:40:37 training model 4 20240809-20:40:37 training model 5 20240809-20:44:19 train_perplexity 132 model 5 1.1579603439863615 20240809-20:44:23 train_perplexity 132 model 4 1.158130305383025 20240809-20:44:25 test_perplexity 132 model 5 1.1577609915461988 20240809-20:44:28 test_perplexity 132 model 4 1.1553930296466066 20240809-20:50:14 test_accuracy 132 model 5 val 1556 / 1612 20240809-20:50:15 test_accuracy 132 model 4 val 1548 / 1614 20240809-20:50:17 wrote gpt_004.pth 20240809-20:50:18 wrote gpt_005.pth 20240809-20:50:51 wrote non_validated_0132_04.png 20240809-20:51:24 wrote non_validated_0132_05.png 20240809-20:51:24 wrote state.pth 20240809-20:51:24 --- epoch 133 ---------------------------------------- 20240809-20:51:24 current_test_accuracies 0.9408 0.9378 0.9366 0.9574 0.9591 0.9653 0.0000 0.0000 0.0000 0.0000 20240809-20:51:24 training model 6 20240809-20:51:24 training model 7 20240809-20:55:06 train_perplexity 133 model 7 1.1591063107372124 20240809-20:55:07 train_perplexity 133 model 6 1.1582626720158091 20240809-20:55:14 test_perplexity 133 model 7 1.158787472221148 20240809-20:55:14 test_perplexity 133 model 6 1.1566672338800381 20240809-21:01:10 test_accuracy 133 model 7 val 1498 / 1596 20240809-21:01:12 test_accuracy 133 model 6 val 1517 / 1584 20240809-21:01:14 wrote gpt_006.pth 20240809-21:01:15 wrote gpt_007.pth 20240809-21:01:48 wrote non_validated_0133_06.png 20240809-21:02:20 wrote non_validated_0133_07.png 20240809-21:02:20 wrote state.pth 20240809-21:02:20 --- epoch 134 ---------------------------------------- 20240809-21:02:20 current_test_accuracies 0.9408 0.9378 0.9366 0.9574 0.9591 0.9653 0.9577 0.9386 0.0000 0.0000 20240809-21:02:20 training model 8 20240809-21:02:20 training model 9 20240809-21:06:02 train_perplexity 134 model 9 1.1582201577654299 20240809-21:06:07 train_perplexity 134 model 8 1.1589350217547267 20240809-21:06:08 test_perplexity 134 model 9 1.1578736675743049 20240809-21:06:12 test_perplexity 134 model 8 1.1564215884402262 20240809-21:12:05 test_accuracy 134 model 9 val 1508 / 1600 20240809-21:12:10 test_accuracy 134 model 8 val 1488 / 1574 20240809-21:12:12 wrote gpt_008.pth 20240809-21:12:13 wrote gpt_009.pth 20240809-21:12:45 wrote non_validated_0134_08.png 20240809-21:13:18 wrote non_validated_0134_09.png 20240809-21:13:18 wrote state.pth 20240809-21:13:18 --- epoch 135 ---------------------------------------- 20240809-21:13:18 current_test_accuracies 0.9408 0.9378 0.9366 0.9574 0.9591 0.9653 0.9577 0.9386 0.9454 0.9425 20240809-21:13:18 training model 2 20240809-21:13:18 training model 1 20240809-21:16:59 train_perplexity 135 model 1 1.1585775310530033 20240809-21:17:04 train_perplexity 135 model 2 1.1579962505973755 20240809-21:17:06 test_perplexity 135 model 1 1.1581956799131132 20240809-21:17:09 test_perplexity 135 model 2 1.157179021448531 20240809-21:22:56 test_accuracy 135 model 2 val 1541 / 1633 20240809-21:23:01 test_accuracy 135 model 1 val 1510 / 1590 20240809-21:23:02 wrote gpt_002.pth 20240809-21:23:03 wrote gpt_001.pth 20240809-21:23:36 wrote non_validated_0135_02.png 20240809-21:24:09 wrote non_validated_0135_01.png 20240809-21:24:09 wrote state.pth 20240809-21:24:09 --- epoch 136 ---------------------------------------- 20240809-21:24:09 current_test_accuracies 0.9408 0.9497 0.9437 0.9574 0.9591 0.9653 0.9577 0.9386 0.9454 0.9425 20240809-21:24:09 training model 7 20240809-21:24:09 training model 0 20240809-21:27:51 train_perplexity 136 model 0 1.1581731415483687 20240809-21:27:56 train_perplexity 136 model 7 1.1578978955334165 20240809-21:27:57 test_perplexity 136 model 0 1.156462883893338 20240809-21:28:00 test_perplexity 136 model 7 1.1586508699804137 20240809-21:33:56 test_accuracy 136 model 0 val 1518 / 1594 20240809-21:33:59 test_accuracy 136 model 7 val 1496 / 1589 20240809-21:34:01 wrote gpt_007.pth 20240809-21:34:01 wrote gpt_000.pth 20240809-21:34:34 wrote non_validated_0136_07.png 20240809-21:35:07 wrote non_validated_0136_00.png 20240809-21:35:07 wrote state.pth 20240809-21:35:07 --- epoch 137 ---------------------------------------- 20240809-21:35:07 current_test_accuracies 0.9523 0.9497 0.9437 0.9574 0.9591 0.9653 0.9577 0.9415 0.9454 0.9425 20240809-21:35:07 training model 7 20240809-21:35:07 training model 9 20240809-21:38:49 train_perplexity 137 model 9 1.158066674620911 20240809-21:38:52 train_perplexity 137 model 7 1.1577904901553084 20240809-21:38:55 test_perplexity 137 model 9 1.1553790376134427 20240809-21:38:57 test_perplexity 137 model 7 1.1579313527485593 20240809-21:44:59 test_accuracy 137 model 7 val 1511 / 1578 20240809-21:45:00 test_accuracy 137 model 9 val 1512 / 1579 20240809-21:45:02 wrote gpt_007.pth 20240809-21:45:03 wrote gpt_009.pth 20240809-21:45:36 wrote non_validated_0137_07.png 20240809-21:46:08 wrote non_validated_0137_09.png 20240809-21:46:09 wrote state.pth 20240809-21:46:09 --- epoch 138 ---------------------------------------- 20240809-21:46:09 current_test_accuracies 0.9523 0.9497 0.9437 0.9574 0.9591 0.9653 0.9577 0.9575 0.9454 0.9576 20240809-21:46:09 training model 2 20240809-21:46:09 training model 8 20240809-21:49:51 train_perplexity 138 model 8 1.1585857364360865 20240809-21:49:52 train_perplexity 138 model 2 1.1579157430330485 20240809-21:49:58 test_perplexity 138 model 8 1.156284533255075 20240809-21:49:59 test_perplexity 138 model 2 1.156361990865268 20240809-21:55:50 test_accuracy 138 model 2 val 1568 / 1625 20240809-21:55:57 test_accuracy 138 model 8 val 1508 / 1579 20240809-21:55:59 wrote gpt_002.pth 20240809-21:55:59 wrote gpt_008.pth 20240809-21:56:32 wrote non_validated_0138_02.png 20240809-21:57:05 wrote non_validated_0138_08.png 20240809-21:57:05 wrote state.pth 20240809-21:57:05 --- epoch 139 ---------------------------------------- 20240809-21:57:05 current_test_accuracies 0.9523 0.9497 0.9649 0.9574 0.9591 0.9653 0.9577 0.9575 0.9550 0.9576 20240809-21:57:05 training model 1 20240809-21:57:05 training model 0 20240809-22:00:48 train_perplexity 139 model 0 1.1582016861372182 20240809-22:00:50 train_perplexity 139 model 1 1.1584443829549438 20240809-22:00:54 test_perplexity 139 model 0 1.1559495963525157 20240809-22:00:56 test_perplexity 139 model 1 1.1570480753937977 20240809-22:06:57 test_accuracy 139 model 1 val 1516 / 1586 20240809-22:07:01 test_accuracy 139 model 0 val 1486 / 1559 20240809-22:07:03 wrote gpt_001.pth 20240809-22:07:04 wrote gpt_000.pth 20240809-22:07:36 wrote non_validated_0139_01.png 20240809-22:08:09 wrote non_validated_0139_00.png 20240809-22:08:09 wrote state.pth 20240809-22:08:09 --- epoch 140 ---------------------------------------- 20240809-22:08:09 current_test_accuracies 0.9532 0.9559 0.9649 0.9574 0.9591 0.9653 0.9577 0.9575 0.9550 0.9576 20240809-22:11:32 keep c_quizzes model 3 validated 25 / 420 (5.95%) nb_accumulated 25 / 420 (finishes Fri 23:04 -- 444/h) 20240809-22:14:32 keep c_quizzes model 6 validated 32 / 420 (7.62%) nb_accumulated 57 / 420 (finishes Fri 22:55 -- 535/h) 20240809-22:17:34 keep c_quizzes model 3 validated 18 / 420 (4.29%) nb_accumulated 75 / 420 (finishes Fri 23:00 -- 478/h) 20240809-22:20:34 keep c_quizzes model 5 validated 13 / 420 (3.10%) nb_accumulated 88 / 420 (finishes Fri 23:07 -- 425/h) 20240809-22:23:35 keep c_quizzes model 6 validated 24 / 420 (5.71%) nb_accumulated 112 / 420 (finishes Fri 23:06 -- 435/h) 20240809-22:26:36 keep c_quizzes model 8 validated 24 / 420 (5.71%) nb_accumulated 136 / 420 (finishes Fri 23:05 -- 442/h) 20240809-22:29:36 keep c_quizzes model 5 validated 17 / 420 (4.05%) nb_accumulated 153 / 420 (finishes Fri 23:07 -- 427/h) 20240809-22:32:37 keep c_quizzes model 4 validated 11 / 420 (2.62%) nb_accumulated 164 / 420 (finishes Fri 23:10 -- 402/h) 20240809-22:35:38 keep c_quizzes model 5 validated 28 / 420 (6.67%) nb_accumulated 192 / 420 (finishes Fri 23:08 -- 419/h) 20240809-22:38:40 keep c_quizzes model 5 validated 20 / 420 (4.76%) nb_accumulated 212 / 420 (finishes Fri 23:08 -- 416/h) 20240809-22:41:41 keep c_quizzes model 1 validated 26 / 420 (6.19%) nb_accumulated 238 / 420 (finishes Fri 23:07 -- 425/h) 20240809-22:44:42 keep c_quizzes model 1 validated 15 / 420 (3.57%) nb_accumulated 253 / 420 (finishes Fri 23:08 -- 415/h) 20240809-22:47:43 keep c_quizzes model 9 validated 20 / 420 (4.76%) nb_accumulated 273 / 420 (finishes Fri 23:09 -- 414/h) 20240809-22:50:44 keep c_quizzes model 7 validated 17 / 420 (4.05%) nb_accumulated 290 / 420 (finishes Fri 23:09 -- 408/h) 20240809-22:53:44 keep c_quizzes model 8 validated 19 / 420 (4.52%) nb_accumulated 309 / 420 (finishes Fri 23:10 -- 406/h) 20240809-22:56:45 keep c_quizzes model 3 validated 19 / 420 (4.52%) nb_accumulated 328 / 420 (finishes Fri 23:10 -- 405/h) 20240809-22:59:45 keep c_quizzes model 2 validated 22 / 420 (5.24%) nb_accumulated 350 / 420 (finishes Fri 23:10 -- 407/h) 20240809-23:02:45 keep c_quizzes model 1 validated 24 / 420 (5.71%) nb_accumulated 374 / 420 (finishes Fri 23:09 -- 411/h) 20240809-23:05:46 keep c_quizzes model 5 validated 20 / 420 (4.76%) nb_accumulated 394 / 420 (finishes Fri 23:09 -- 410/h) 20240809-23:08:46 keep c_quizzes model 5 validated 17 / 420 (4.05%) nb_accumulated 411 / 420 (finishes Fri 23:10 -- 406/h) 20240809-23:11:47 keep c_quizzes model 9 validated 15 / 420 (3.57%) nb_accumulated 426 / 420 (finishes now! -- 401/h) 20240809-23:12:00 wrote c_quizzes.pth 20240809-23:12:00 training model 0 20240809-23:12:00 training model 1 20240809-23:15:42 train_perplexity 140 model 1 1.1586400940313806 20240809-23:15:43 train_perplexity 140 model 0 1.158276364796678 20240809-23:15:49 test_perplexity 140 model 1 1.1578823364936393 20240809-23:15:50 test_perplexity 140 model 0 1.1563046194966788 20240809-23:21:43 test_accuracy 140 model 1 val 1557 / 1618 20240809-23:21:47 test_accuracy 140 model 0 val 1520 / 1589 20240809-23:21:49 wrote gpt_000.pth 20240809-23:21:50 wrote gpt_001.pth 20240809-23:22:23 wrote non_validated_0140_00.png 20240809-23:22:55 wrote non_validated_0140_01.png 20240809-23:22:55 wrote state.pth 20240809-23:22:55 --- epoch 141 ---------------------------------------- 20240809-23:22:55 current_test_accuracies 0.9566 0.9623 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240809-23:22:55 training model 2 20240809-23:22:55 training model 3 20240809-23:26:38 train_perplexity 141 model 3 1.1591072937428304 20240809-23:26:39 train_perplexity 141 model 2 1.1580499216687916 20240809-23:26:45 test_perplexity 141 model 3 1.155160354020241 20240809-23:26:46 test_perplexity 141 model 2 1.158121415255404 20240809-23:32:31 test_accuracy 141 model 2 val 1552 / 1629 20240809-23:32:32 test_accuracy 141 model 3 val 1546 / 1618 20240809-23:32:34 wrote gpt_002.pth 20240809-23:32:35 wrote gpt_003.pth 20240809-23:33:07 wrote non_validated_0141_02.png 20240809-23:33:40 wrote non_validated_0141_03.png 20240809-23:33:40 wrote state.pth 20240809-23:33:40 --- epoch 142 ---------------------------------------- 20240809-23:33:40 current_test_accuracies 0.9566 0.9623 0.9527 0.9555 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240809-23:33:40 training model 4 20240809-23:33:40 training model 5 20240809-23:37:23 train_perplexity 142 model 5 1.1580322567512424 20240809-23:37:25 train_perplexity 142 model 4 1.1582210141648366 20240809-23:37:30 test_perplexity 142 model 5 1.157769060326839 20240809-23:37:31 test_perplexity 142 model 4 1.155510522688636 20240809-23:43:23 test_accuracy 142 model 4 val 1547 / 1614 20240809-23:43:28 test_accuracy 142 model 5 val 1524 / 1589 20240809-23:43:30 wrote gpt_004.pth 20240809-23:43:31 wrote gpt_005.pth 20240809-23:44:04 wrote non_validated_0142_04.png 20240809-23:44:36 wrote non_validated_0142_05.png 20240809-23:44:36 wrote state.pth 20240809-23:44:36 --- epoch 143 ---------------------------------------- 20240809-23:44:36 current_test_accuracies 0.9566 0.9623 0.9527 0.9555 0.9585 0.9591 0.0000 0.0000 0.0000 0.0000 20240809-23:44:36 training model 6 20240809-23:44:36 training model 7 20240809-23:48:19 train_perplexity 143 model 7 1.1585631652054014 20240809-23:48:22 train_perplexity 143 model 6 1.1588572603918261 20240809-23:48:25 test_perplexity 143 model 7 1.1558154344830964 20240809-23:48:27 test_perplexity 143 model 6 1.1592873179258538 20240809-23:54:14 test_accuracy 143 model 7 val 1532 / 1611 20240809-23:54:16 test_accuracy 143 model 6 val 1558 / 1619 20240809-23:54:18 wrote gpt_006.pth 20240809-23:54:19 wrote gpt_007.pth 20240809-23:54:53 wrote non_validated_0143_06.png 20240809-23:55:26 wrote non_validated_0143_07.png 20240809-23:55:26 wrote state.pth 20240809-23:55:26 --- epoch 144 ---------------------------------------- 20240809-23:55:26 current_test_accuracies 0.9566 0.9623 0.9527 0.9555 0.9585 0.9591 0.9623 0.9510 0.0000 0.0000 20240809-23:55:26 training model 8 20240809-23:55:26 training model 9 20240809-23:59:08 train_perplexity 144 model 9 1.1586217139210822 20240809-23:59:10 train_perplexity 144 model 8 1.158440277943505 20240809-23:59:15 test_perplexity 144 model 9 1.1574698095178073 20240809-23:59:16 test_perplexity 144 model 8 1.156082188912788 20240810-00:05:10 test_accuracy 144 model 9 val 1533 / 1606 20240810-00:05:12 test_accuracy 144 model 8 val 1506 / 1594 20240810-00:05:14 wrote gpt_008.pth 20240810-00:05:15 wrote gpt_009.pth 20240810-00:05:47 wrote non_validated_0144_08.png 20240810-00:06:20 wrote non_validated_0144_09.png 20240810-00:06:20 wrote state.pth 20240810-00:06:20 --- epoch 145 ---------------------------------------- 20240810-00:06:20 current_test_accuracies 0.9566 0.9623 0.9527 0.9555 0.9585 0.9591 0.9623 0.9510 0.9448 0.9545 20240810-00:06:20 training model 8 20240810-00:06:20 training model 7 20240810-00:10:02 train_perplexity 145 model 7 1.1577660082342942 20240810-00:10:07 train_perplexity 145 model 8 1.1588033447579766 20240810-00:10:08 test_perplexity 145 model 7 1.155758886755317 20240810-00:10:11 test_perplexity 145 model 8 1.1553753646964993 20240810-00:15:59 test_accuracy 145 model 8 val 1553 / 1622 20240810-00:16:04 test_accuracy 145 model 7 val 1519 / 1586 20240810-00:16:06 wrote gpt_008.pth 20240810-00:16:07 wrote gpt_007.pth 20240810-00:16:40 wrote non_validated_0145_08.png 20240810-00:17:12 wrote non_validated_0145_07.png 20240810-00:17:12 wrote state.pth 20240810-00:17:12 --- epoch 146 ---------------------------------------- 20240810-00:17:12 current_test_accuracies 0.9566 0.9623 0.9527 0.9555 0.9585 0.9591 0.9623 0.9578 0.9575 0.9545 20240810-00:20:36 keep c_quizzes model 5 validated 20 / 420 (4.76%) nb_accumulated 20 / 420 (finishes Sat 01:28 -- 354/h) 20240810-00:23:36 keep c_quizzes model 0 validated 15 / 420 (3.57%) nb_accumulated 35 / 420 (finishes Sat 01:34 -- 328/h) 20240810-00:26:38 keep c_quizzes model 8 validated 13 / 420 (3.10%) nb_accumulated 48 / 420 (finishes Sat 01:39 -- 305/h) 20240810-00:29:38 keep c_quizzes model 8 validated 13 / 420 (3.10%) nb_accumulated 61 / 420 (finishes Sat 01:42 -- 294/h) 20240810-00:32:39 keep c_quizzes model 0 validated 19 / 420 (4.52%) nb_accumulated 80 / 420 (finishes Sat 01:38 -- 310/h) 20240810-00:35:40 keep c_quizzes model 7 validated 17 / 420 (4.05%) nb_accumulated 97 / 420 (finishes Sat 01:37 -- 315/h) 20240810-00:38:41 keep c_quizzes model 3 validated 17 / 420 (4.05%) nb_accumulated 114 / 420 (finishes Sat 01:36 -- 318/h) 20240810-00:41:42 keep c_quizzes model 1 validated 16 / 420 (3.81%) nb_accumulated 130 / 420 (finishes Sat 01:36 -- 318/h) 20240810-00:44:43 keep c_quizzes model 3 validated 17 / 420 (4.05%) nb_accumulated 147 / 420 (finishes Sat 01:35 -- 320/h) 20240810-00:47:44 keep c_quizzes model 4 validated 24 / 420 (5.71%) nb_accumulated 171 / 420 (finishes Sat 01:32 -- 336/h) 20240810-00:50:45 keep c_quizzes model 9 validated 24 / 420 (5.71%) nb_accumulated 195 / 420 (finishes Sat 01:29 -- 348/h) 20240810-00:53:45 keep c_quizzes model 1 validated 25 / 420 (5.95%) nb_accumulated 220 / 420 (finishes Sat 01:26 -- 361/h) 20240810-00:56:46 keep c_quizzes model 2 validated 25 / 420 (5.95%) nb_accumulated 245 / 420 (finishes Sat 01:25 -- 371/h) 20240810-00:59:47 keep c_quizzes model 4 validated 26 / 420 (6.19%) nb_accumulated 271 / 420 (finishes Sat 01:23 -- 381/h) 20240810-01:02:48 keep c_quizzes model 7 validated 16 / 420 (3.81%) nb_accumulated 287 / 420 (finishes Sat 01:23 -- 377/h) 20240810-01:05:48 keep c_quizzes model 6 validated 24 / 420 (5.71%) nb_accumulated 311 / 420 (finishes Sat 01:22 -- 383/h) 20240810-01:08:49 keep c_quizzes model 2 validated 20 / 420 (4.76%) nb_accumulated 331 / 420 (finishes Sat 01:22 -- 384/h) 20240810-01:11:50 keep c_quizzes model 9 validated 19 / 420 (4.52%) nb_accumulated 350 / 420 (finishes Sat 01:22 -- 384/h) 20240810-01:14:51 keep c_quizzes model 9 validated 31 / 420 (7.38%) nb_accumulated 381 / 420 (finishes Sat 01:20 -- 396/h) 20240810-01:17:52 keep c_quizzes model 9 validated 21 / 420 (5.00%) nb_accumulated 402 / 420 (finishes Sat 01:20 -- 397/h) 20240810-01:20:53 keep c_quizzes model 3 validated 22 / 420 (5.24%) nb_accumulated 424 / 420 (finishes now! -- 399/h) 20240810-01:21:06 wrote c_quizzes.pth 20240810-01:21:06 training model 0 20240810-01:21:06 training model 1 20240810-01:24:48 train_perplexity 146 model 1 1.1591370248507744 20240810-01:24:50 train_perplexity 146 model 0 1.1585125842381192 20240810-01:24:55 test_perplexity 146 model 1 1.1586464024993939 20240810-01:24:56 test_perplexity 146 model 0 1.1555243266936834 20240810-01:30:47 test_accuracy 146 model 0 val 1541 / 1613 20240810-01:30:48 test_accuracy 146 model 1 val 1554 / 1617 20240810-01:30:50 wrote gpt_000.pth 20240810-01:30:51 wrote gpt_001.pth 20240810-01:31:24 wrote non_validated_0146_00.png 20240810-01:31:56 wrote non_validated_0146_01.png 20240810-01:31:57 wrote state.pth 20240810-01:31:57 --- epoch 147 ---------------------------------------- 20240810-01:31:57 current_test_accuracies 0.9554 0.9610 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240810-01:31:57 training model 2 20240810-01:31:57 training model 3 20240810-01:35:39 train_perplexity 147 model 3 1.1594782818360538 20240810-01:35:41 train_perplexity 147 model 2 1.158866582358896 20240810-01:35:46 test_perplexity 147 model 3 1.15986855496198 20240810-01:35:47 test_perplexity 147 model 2 1.1569101680356313 20240810-01:41:43 test_accuracy 147 model 2 val 1536 / 1583 20240810-01:41:46 test_accuracy 147 model 3 val 1488 / 1564 20240810-01:41:48 wrote gpt_002.pth 20240810-01:41:49 wrote gpt_003.pth 20240810-01:42:21 wrote non_validated_0147_02.png 20240810-01:42:54 wrote non_validated_0147_03.png 20240810-01:42:54 wrote state.pth 20240810-01:42:54 --- epoch 148 ---------------------------------------- 20240810-01:42:54 current_test_accuracies 0.9554 0.9610 0.9703 0.9514 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240810-01:42:54 training model 4 20240810-01:42:54 training model 5 20240810-01:46:37 train_perplexity 148 model 5 1.1581564756536848 20240810-01:46:40 train_perplexity 148 model 4 1.1588691419410972 20240810-01:46:43 test_perplexity 148 model 5 1.1562406340321165 20240810-01:46:46 test_perplexity 148 model 4 1.1564904959740938 20240810-01:52:44 test_accuracy 148 model 5 val 1516 / 1581 20240810-01:52:44 test_accuracy 148 model 4 val 1522 / 1589 20240810-01:52:46 wrote gpt_004.pth 20240810-01:52:47 wrote gpt_005.pth 20240810-01:53:20 wrote non_validated_0148_04.png 20240810-01:53:53 wrote non_validated_0148_05.png 20240810-01:53:53 wrote state.pth 20240810-01:53:53 --- epoch 149 ---------------------------------------- 20240810-01:53:53 current_test_accuracies 0.9554 0.9610 0.9703 0.9514 0.9578 0.9589 0.0000 0.0000 0.0000 0.0000 20240810-01:53:53 training model 6 20240810-01:53:53 training model 7 20240810-01:57:35 train_perplexity 149 model 7 1.1585272909015976 20240810-01:57:40 train_perplexity 149 model 6 1.1590674052736367 20240810-01:57:41 test_perplexity 149 model 7 1.1571197809830773 20240810-01:57:44 test_perplexity 149 model 6 1.15755887503595 20240810-02:03:38 test_accuracy 149 model 6 val 1506 / 1590 20240810-02:03:41 test_accuracy 149 model 7 val 1490 / 1577 20240810-02:03:43 wrote gpt_006.pth 20240810-02:03:44 wrote gpt_007.pth 20240810-02:04:16 wrote non_validated_0149_06.png 20240810-02:04:49 wrote non_validated_0149_07.png 20240810-02:04:49 wrote state.pth 20240810-02:04:49 --- epoch 150 ---------------------------------------- 20240810-02:04:49 current_test_accuracies 0.9554 0.9610 0.9703 0.9514 0.9578 0.9589 0.9472 0.9448 0.0000 0.0000 20240810-02:04:49 training model 8 20240810-02:04:49 training model 9 20240810-02:08:32 train_perplexity 150 model 9 1.15869557186351 20240810-02:08:33 train_perplexity 150 model 8 1.1591349981798098 20240810-02:08:39 test_perplexity 150 model 9 1.1572733598925218 20240810-02:08:40 test_perplexity 150 model 8 1.158499840454064 20240810-02:14:26 test_accuracy 150 model 8 val 1524 / 1633 20240810-02:14:27 test_accuracy 150 model 9 val 1551 / 1626 20240810-02:14:29 wrote gpt_008.pth 20240810-02:14:30 wrote gpt_009.pth 20240810-02:15:03 wrote non_validated_0150_08.png 20240810-02:15:35 wrote non_validated_0150_09.png 20240810-02:15:35 wrote state.pth 20240810-02:15:35 --- epoch 151 ---------------------------------------- 20240810-02:15:35 current_test_accuracies 0.9554 0.9610 0.9703 0.9514 0.9578 0.9589 0.9472 0.9448 0.9333 0.9539 20240810-02:15:35 training model 8 20240810-02:15:35 training model 7 20240810-02:19:18 train_perplexity 151 model 7 1.1582656802840519 20240810-02:19:20 train_perplexity 151 model 8 1.1590679626018805 20240810-02:19:25 test_perplexity 151 model 7 1.1584186267456653 20240810-02:19:26 test_perplexity 151 model 8 1.1551623182666924 20240810-02:25:14 test_accuracy 151 model 8 val 1540 / 1612 20240810-02:25:17 test_accuracy 151 model 7 val 1533 / 1608 20240810-02:25:19 wrote gpt_008.pth 20240810-02:25:20 wrote gpt_007.pth 20240810-02:25:52 wrote non_validated_0151_08.png 20240810-02:26:25 wrote non_validated_0151_07.png 20240810-02:26:25 wrote state.pth 20240810-02:26:25 --- epoch 152 ---------------------------------------- 20240810-02:26:25 current_test_accuracies 0.9554 0.9610 0.9703 0.9514 0.9578 0.9589 0.9472 0.9534 0.9553 0.9539 20240810-02:26:25 training model 6 20240810-02:26:25 training model 3 20240810-02:30:08 train_perplexity 152 model 3 1.159099778770743 20240810-02:30:10 train_perplexity 152 model 6 1.1588316341554428 20240810-02:30:14 test_perplexity 152 model 3 1.1572176951774262 20240810-02:30:16 test_perplexity 152 model 6 1.157136939697367 20240810-02:36:07 test_accuracy 152 model 3 val 1530 / 1606 20240810-02:36:10 test_accuracy 152 model 6 val 1537 / 1609 20240810-02:36:12 wrote gpt_006.pth 20240810-02:36:12 wrote gpt_003.pth 20240810-02:36:45 wrote non_validated_0152_06.png 20240810-02:37:18 wrote non_validated_0152_03.png 20240810-02:37:18 wrote state.pth 20240810-02:37:18 --- epoch 153 ---------------------------------------- 20240810-02:37:18 current_test_accuracies 0.9554 0.9610 0.9703 0.9527 0.9578 0.9589 0.9553 0.9534 0.9553 0.9539 20240810-02:40:39 keep c_quizzes model 1 validated 25 / 420 (5.95%) nb_accumulated 25 / 420 (finishes Sat 03:33 -- 448/h) 20240810-02:43:39 keep c_quizzes model 8 validated 22 / 420 (5.24%) nb_accumulated 47 / 420 (finishes Sat 03:34 -- 444/h) 20240810-02:46:39 keep c_quizzes model 8 validated 12 / 420 (2.86%) nb_accumulated 59 / 420 (finishes Sat 03:43 -- 378/h) 20240810-02:49:38 keep c_quizzes model 2 validated 20 / 420 (4.76%) nb_accumulated 79 / 420 (finishes Sat 03:42 -- 384/h) 20240810-02:52:39 keep c_quizzes model 4 validated 29 / 420 (6.90%) nb_accumulated 108 / 420 (finishes Sat 03:36 -- 422/h) 20240810-02:55:39 keep c_quizzes model 1 validated 21 / 420 (5.00%) nb_accumulated 129 / 420 (finishes Sat 03:37 -- 422/h) 20240810-02:58:38 keep c_quizzes model 8 validated 23 / 420 (5.48%) nb_accumulated 152 / 420 (finishes Sat 03:36 -- 427/h) 20240810-03:01:38 keep c_quizzes model 1 validated 26 / 420 (6.19%) nb_accumulated 178 / 420 (finishes Sat 03:34 -- 438/h) 20240810-03:04:39 keep c_quizzes model 2 validated 22 / 420 (5.24%) nb_accumulated 200 / 420 (finishes Sat 03:34 -- 438/h) 20240810-03:07:39 keep c_quizzes model 5 validated 19 / 420 (4.52%) nb_accumulated 219 / 420 (finishes Sat 03:35 -- 433/h) 20240810-03:10:39 keep c_quizzes model 8 validated 15 / 420 (3.57%) nb_accumulated 234 / 420 (finishes Sat 03:37 -- 421/h) 20240810-03:13:38 keep c_quizzes model 5 validated 19 / 420 (4.52%) nb_accumulated 253 / 420 (finishes Sat 03:37 -- 417/h) 20240810-03:16:38 keep c_quizzes model 1 validated 19 / 420 (4.52%) nb_accumulated 272 / 420 (finishes Sat 03:38 -- 414/h) 20240810-03:19:38 keep c_quizzes model 6 validated 22 / 420 (5.24%) nb_accumulated 294 / 420 (finishes Sat 03:37 -- 416/h) 20240810-03:22:38 keep c_quizzes model 5 validated 25 / 420 (5.95%) nb_accumulated 319 / 420 (finishes Sat 03:37 -- 422/h) 20240810-03:25:38 keep c_quizzes model 1 validated 19 / 420 (4.52%) nb_accumulated 338 / 420 (finishes Sat 03:37 -- 419/h) 20240810-03:28:39 keep c_quizzes model 3 validated 21 / 420 (5.00%) nb_accumulated 359 / 420 (finishes Sat 03:37 -- 419/h) 20240810-03:31:40 keep c_quizzes model 3 validated 24 / 420 (5.71%) nb_accumulated 383 / 420 (finishes Sat 03:36 -- 422/h) 20240810-03:34:40 keep c_quizzes model 2 validated 18 / 420 (4.29%) nb_accumulated 401 / 420 (finishes Sat 03:37 -- 419/h) 20240810-03:37:39 keep c_quizzes model 2 validated 12 / 420 (2.86%) nb_accumulated 413 / 420 (finishes Sat 03:38 -- 410/h) 20240810-03:40:38 keep c_quizzes model 2 validated 19 / 420 (4.52%) nb_accumulated 432 / 420 (finishes now! -- 409/h) 20240810-03:40:52 wrote c_quizzes.pth 20240810-03:40:52 training model 0 20240810-03:40:52 training model 1 20240810-03:44:34 train_perplexity 153 model 1 1.1590928320526355 20240810-03:44:34 train_perplexity 153 model 0 1.158809815410516 20240810-03:44:41 test_perplexity 153 model 1 1.1576618186714402 20240810-03:44:41 test_perplexity 153 model 0 1.1574447297744825 20240810-03:50:33 test_accuracy 153 model 1 val 1527 / 1611 20240810-03:50:35 test_accuracy 153 model 0 val 1533 / 1606 20240810-03:50:37 wrote gpt_000.pth 20240810-03:50:37 wrote gpt_001.pth 20240810-03:51:10 wrote non_validated_0153_00.png 20240810-03:51:42 wrote non_validated_0153_01.png 20240810-03:51:42 wrote state.pth 20240810-03:51:42 --- epoch 154 ---------------------------------------- 20240810-03:51:42 current_test_accuracies 0.9545 0.9479 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240810-03:51:42 training model 2 20240810-03:51:42 training model 3 20240810-03:55:25 train_perplexity 154 model 3 1.159176868911222 20240810-03:55:25 train_perplexity 154 model 2 1.1586020081598485 20240810-03:55:33 test_perplexity 154 model 3 1.1586247121370739 20240810-03:55:33 test_perplexity 154 model 2 1.1575948379750325 20240810-04:01:22 test_accuracy 154 model 3 val 1551 / 1615 20240810-04:01:23 test_accuracy 154 model 2 val 1538 / 1600 20240810-04:01:25 wrote gpt_002.pth 20240810-04:01:26 wrote gpt_003.pth 20240810-04:01:58 wrote non_validated_0154_02.png 20240810-04:02:31 wrote non_validated_0154_03.png 20240810-04:02:31 wrote state.pth 20240810-04:02:31 --- epoch 155 ---------------------------------------- 20240810-04:02:31 current_test_accuracies 0.9545 0.9479 0.9613 0.9604 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240810-04:02:31 training model 4 20240810-04:02:31 training model 5 20240810-04:06:13 train_perplexity 155 model 5 1.1586998272327727 20240810-04:06:17 train_perplexity 155 model 4 1.1590602161033456 20240810-04:06:19 test_perplexity 155 model 5 1.156264652072579 20240810-04:06:22 test_perplexity 155 model 4 1.1569863779704057 20240810-04:12:21 test_accuracy 155 model 4 val 1500 / 1572 20240810-04:12:24 test_accuracy 155 model 5 val 1480 / 1558 20240810-04:12:26 wrote gpt_004.pth 20240810-04:12:26 wrote gpt_005.pth 20240810-04:12:59 wrote non_validated_0155_04.png 20240810-04:13:32 wrote non_validated_0155_05.png 20240810-04:13:32 wrote state.pth 20240810-04:13:32 --- epoch 156 ---------------------------------------- 20240810-04:13:32 current_test_accuracies 0.9545 0.9479 0.9613 0.9604 0.9542 0.9499 0.0000 0.0000 0.0000 0.0000 20240810-04:13:32 training model 6 20240810-04:13:32 training model 7 20240810-04:17:14 train_perplexity 156 model 7 1.1585441214812187 20240810-04:17:16 train_perplexity 156 model 6 1.1591740230827956 20240810-04:17:21 test_perplexity 156 model 7 1.156267918933338 20240810-04:17:23 test_perplexity 156 model 6 1.1568054834298205 20240810-04:23:16 test_accuracy 156 model 6 val 1525 / 1590 20240810-04:23:19 test_accuracy 156 model 7 val 1513 / 1587 20240810-04:23:20 wrote gpt_006.pth 20240810-04:23:21 wrote gpt_007.pth 20240810-04:23:54 wrote non_validated_0156_06.png 20240810-04:24:26 wrote non_validated_0156_07.png 20240810-04:24:27 wrote state.pth 20240810-04:24:27 --- epoch 157 ---------------------------------------- 20240810-04:24:27 current_test_accuracies 0.9545 0.9479 0.9613 0.9604 0.9542 0.9499 0.9591 0.9534 0.0000 0.0000 20240810-04:24:27 training model 8 20240810-04:24:27 training model 9 20240810-04:28:09 train_perplexity 157 model 9 1.1592878035725205 20240810-04:28:10 train_perplexity 157 model 8 1.1585845363701293 20240810-04:28:17 test_perplexity 157 model 9 1.1582355770913308 20240810-04:28:17 test_perplexity 157 model 8 1.1587959469247073 20240810-04:34:12 test_accuracy 157 model 9 val 1530 / 1599 20240810-04:34:14 test_accuracy 157 model 8 val 1501 / 1581 20240810-04:34:16 wrote gpt_008.pth 20240810-04:34:17 wrote gpt_009.pth 20240810-04:34:50 wrote non_validated_0157_08.png 20240810-04:35:22 wrote non_validated_0157_09.png 20240810-04:35:22 wrote state.pth 20240810-04:35:22 --- epoch 158 ---------------------------------------- 20240810-04:35:22 current_test_accuracies 0.9545 0.9479 0.9613 0.9604 0.9542 0.9499 0.9591 0.9534 0.9494 0.9568 20240810-04:35:22 training model 1 20240810-04:35:22 training model 8 20240810-04:39:04 train_perplexity 158 model 8 1.1585440232186694 20240810-04:39:09 train_perplexity 158 model 1 1.158679499992334 20240810-04:39:10 test_perplexity 158 model 8 1.157428653920889 20240810-04:39:13 test_perplexity 158 model 1 1.1567696318460206 20240810-04:45:07 test_accuracy 158 model 8 val 1551 / 1615 20240810-04:45:09 test_accuracy 158 model 1 val 1519 / 1604 20240810-04:45:11 wrote gpt_001.pth 20240810-04:45:12 wrote gpt_008.pth 20240810-04:45:45 wrote non_validated_0158_01.png 20240810-04:46:18 wrote non_validated_0158_08.png 20240810-04:46:18 wrote state.pth 20240810-04:46:18 --- epoch 159 ---------------------------------------- 20240810-04:46:18 current_test_accuracies 0.9545 0.9470 0.9613 0.9604 0.9542 0.9499 0.9591 0.9534 0.9604 0.9568 20240810-04:46:18 training model 1 20240810-04:46:18 training model 5 20240810-04:50:00 train_perplexity 159 model 5 1.158959987584519 20240810-04:50:03 train_perplexity 159 model 1 1.158665153582723 20240810-04:50:07 test_perplexity 159 model 5 1.1575179624925094 20240810-04:50:09 test_perplexity 159 model 1 1.1577608416696048 20240810-04:55:58 test_accuracy 159 model 5 val 1541 / 1619 20240810-04:55:58 test_accuracy 159 model 1 val 1530 / 1602 20240810-04:56:01 wrote gpt_001.pth 20240810-04:56:01 wrote gpt_005.pth 20240810-04:56:34 wrote non_validated_0159_01.png 20240810-04:57:07 wrote non_validated_0159_05.png 20240810-04:57:07 wrote state.pth 20240810-04:57:07 --- epoch 160 ---------------------------------------- 20240810-04:57:07 current_test_accuracies 0.9545 0.9551 0.9613 0.9604 0.9542 0.9518 0.9591 0.9534 0.9604 0.9568 20240810-05:00:33 keep c_quizzes model 4 validated 18 / 420 (4.29%) nb_accumulated 18 / 420 (finishes Sat 06:17 -- 314/h) 20240810-05:03:34 keep c_quizzes model 4 validated 14 / 420 (3.33%) nb_accumulated 32 / 420 (finishes Sat 06:21 -- 297/h) 20240810-05:06:35 keep c_quizzes model 3 validated 12 / 420 (2.86%) nb_accumulated 44 / 420 (finishes Sat 06:27 -- 278/h) 20240810-05:09:36 keep c_quizzes model 3 validated 15 / 420 (3.57%) nb_accumulated 59 / 420 (finishes Sat 06:26 -- 283/h) 20240810-05:12:38 keep c_quizzes model 3 validated 17 / 420 (4.05%) nb_accumulated 76 / 420 (finishes Sat 06:22 -- 293/h) 20240810-05:15:39 keep c_quizzes model 7 validated 15 / 420 (3.57%) nb_accumulated 91 / 420 (finishes Sat 06:22 -- 294/h) 20240810-05:18:39 keep c_quizzes model 6 validated 14 / 420 (3.33%) nb_accumulated 105 / 420 (finishes Sat 06:23 -- 292/h) 20240810-05:21:40 keep c_quizzes model 3 validated 17 / 420 (4.05%) nb_accumulated 122 / 420 (finishes Sat 06:21 -- 298/h) 20240810-05:24:41 keep c_quizzes model 5 validated 13 / 420 (3.10%) nb_accumulated 135 / 420 (finishes Sat 06:22 -- 293/h) 20240810-05:27:41 keep c_quizzes model 1 validated 18 / 420 (4.29%) nb_accumulated 153 / 420 (finishes Sat 06:21 -- 300/h) 20240810-05:30:42 keep c_quizzes model 0 validated 12 / 420 (2.86%) nb_accumulated 165 / 420 (finishes Sat 06:22 -- 294/h) 20240810-05:33:43 keep c_quizzes model 7 validated 20 / 420 (4.76%) nb_accumulated 185 / 420 (finishes Sat 06:20 -- 303/h) 20240810-05:36:44 keep c_quizzes model 1 validated 26 / 420 (6.19%) nb_accumulated 211 / 420 (finishes Sat 06:15 -- 319/h) 20240810-05:39:44 keep c_quizzes model 7 validated 18 / 420 (4.29%) nb_accumulated 229 / 420 (finishes Sat 06:15 -- 322/h) 20240810-05:42:45 keep c_quizzes model 8 validated 20 / 420 (4.76%) nb_accumulated 249 / 420 (finishes Sat 06:14 -- 327/h) 20240810-05:45:45 keep c_quizzes model 4 validated 25 / 420 (5.95%) nb_accumulated 274 / 420 (finishes Sat 06:11 -- 337/h) 20240810-05:48:46 keep c_quizzes model 0 validated 19 / 420 (4.52%) nb_accumulated 293 / 420 (finishes Sat 06:11 -- 340/h) 20240810-05:51:47 keep c_quizzes model 1 validated 23 / 420 (5.48%) nb_accumulated 316 / 420 (finishes Sat 06:09 -- 346/h) 20240810-05:54:47 keep c_quizzes model 1 validated 17 / 420 (4.05%) nb_accumulated 333 / 420 (finishes Sat 06:09 -- 346/h) 20240810-05:57:48 keep c_quizzes model 2 validated 15 / 420 (3.57%) nb_accumulated 348 / 420 (finishes Sat 06:10 -- 344/h) 20240810-06:00:47 keep c_quizzes model 9 validated 18 / 420 (4.29%) nb_accumulated 366 / 420 (finishes Sat 06:10 -- 344/h) 20240810-06:03:48 keep c_quizzes model 2 validated 13 / 420 (3.10%) nb_accumulated 379 / 420 (finishes Sat 06:11 -- 341/h) 20240810-06:06:48 keep c_quizzes model 6 validated 31 / 420 (7.38%) nb_accumulated 410 / 420 (finishes Sat 06:08 -- 352/h) 20240810-06:09:49 keep c_quizzes model 7 validated 17 / 420 (4.05%) nb_accumulated 427 / 420 (finishes now! -- 352/h) 20240810-06:10:03 wrote c_quizzes.pth 20240810-06:10:03 training model 0 20240810-06:10:03 training model 1 20240810-06:13:44 train_perplexity 160 model 1 1.1591553633525111 20240810-06:13:44 train_perplexity 160 model 0 1.159053169287242 20240810-06:13:52 test_perplexity 160 model 1 1.1586351630317122 20240810-06:13:52 test_perplexity 160 model 0 1.1583521371300651 20240810-06:19:31 test_accuracy 160 model 1 val 1577 / 1652 20240810-06:19:38 test_accuracy 160 model 0 val 1521 / 1617 20240810-06:19:40 wrote gpt_000.pth 20240810-06:19:40 wrote gpt_001.pth 20240810-06:20:13 wrote non_validated_0160_00.png 20240810-06:20:45 wrote non_validated_0160_01.png 20240810-06:20:46 wrote state.pth 20240810-06:20:46 --- epoch 161 ---------------------------------------- 20240810-06:20:46 current_test_accuracies 0.9406 0.9546 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240810-06:20:46 training model 2 20240810-06:20:46 training model 3 20240810-06:24:28 train_perplexity 161 model 3 1.159324168263187 20240810-06:24:30 train_perplexity 161 model 2 1.1592054664209628 20240810-06:24:35 test_perplexity 161 model 3 1.1583177092661447 20240810-06:24:36 test_perplexity 161 model 2 1.1574844886496825 20240810-06:30:28 test_accuracy 161 model 2 val 1531 / 1590 20240810-06:30:30 test_accuracy 161 model 3 val 1523 / 1585 20240810-06:30:31 wrote gpt_002.pth 20240810-06:30:32 wrote gpt_003.pth 20240810-06:31:05 wrote non_validated_0161_02.png 20240810-06:31:37 wrote non_validated_0161_03.png 20240810-06:31:37 wrote state.pth 20240810-06:31:37 --- epoch 162 ---------------------------------------- 20240810-06:31:37 current_test_accuracies 0.9406 0.9546 0.9629 0.9609 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240810-06:31:37 training model 4 20240810-06:31:37 training model 5 20240810-06:35:21 train_perplexity 162 model 5 1.1585936893639515 20240810-06:35:21 train_perplexity 162 model 4 1.1586917330269044 20240810-06:35:28 test_perplexity 162 model 5 1.1559254057202468 20240810-06:35:28 test_perplexity 162 model 4 1.159303949936569 20240810-06:41:23 test_accuracy 162 model 4 val 1513 / 1598 20240810-06:41:24 test_accuracy 162 model 5 val 1505 / 1588 20240810-06:41:26 wrote gpt_004.pth 20240810-06:41:27 wrote gpt_005.pth 20240810-06:41:59 wrote non_validated_0162_04.png 20240810-06:42:32 wrote non_validated_0162_05.png 20240810-06:42:32 wrote state.pth 20240810-06:42:32 --- epoch 163 ---------------------------------------- 20240810-06:42:32 current_test_accuracies 0.9406 0.9546 0.9629 0.9609 0.9468 0.9477 0.0000 0.0000 0.0000 0.0000 20240810-06:42:32 training model 6 20240810-06:42:32 training model 7 20240810-06:46:14 train_perplexity 163 model 7 1.1582915595969216 20240810-06:46:19 train_perplexity 163 model 6 1.1592723935915394 20240810-06:46:21 test_perplexity 163 model 7 1.1590121622373721 20240810-06:46:23 test_perplexity 163 model 6 1.1599436932535885 20240810-06:52:17 test_accuracy 163 model 7 val 1521 / 1601 20240810-06:52:20 test_accuracy 163 model 6 val 1512 / 1587 20240810-06:52:22 wrote gpt_006.pth 20240810-06:52:23 wrote gpt_007.pth 20240810-06:52:55 wrote non_validated_0163_06.png 20240810-06:53:28 wrote non_validated_0163_07.png 20240810-06:53:28 wrote state.pth 20240810-06:53:28 --- epoch 164 ---------------------------------------- 20240810-06:53:28 current_test_accuracies 0.9406 0.9546 0.9629 0.9609 0.9468 0.9477 0.9527 0.9500 0.0000 0.0000 20240810-06:53:28 training model 8 20240810-06:53:28 training model 9 20240810-06:57:11 train_perplexity 164 model 9 1.1588599355704903 20240810-06:57:13 train_perplexity 164 model 8 1.1589207935842551 20240810-06:57:18 test_perplexity 164 model 9 1.1581804446758326 20240810-06:57:19 test_perplexity 164 model 8 1.1561707791856133 20240810-07:03:14 test_accuracy 164 model 9 val 1513 / 1599 20240810-07:03:16 test_accuracy 164 model 8 val 1513 / 1583 20240810-07:03:18 wrote gpt_008.pth 20240810-07:03:19 wrote gpt_009.pth 20240810-07:03:52 wrote non_validated_0164_08.png 20240810-07:04:25 wrote non_validated_0164_09.png 20240810-07:04:25 wrote state.pth 20240810-07:04:25 --- epoch 165 ---------------------------------------- 20240810-07:04:25 current_test_accuracies 0.9406 0.9546 0.9629 0.9609 0.9468 0.9477 0.9527 0.9500 0.9558 0.9462 20240810-07:04:25 training model 0 20240810-07:04:25 training model 9 20240810-07:08:07 train_perplexity 165 model 9 1.1592098801614028 20240810-07:08:13 train_perplexity 165 model 0 1.1591383173409509 20240810-07:08:13 test_perplexity 165 model 9 1.1589672166257863 20240810-07:08:17 test_perplexity 165 model 0 1.1567119459421307 20240810-07:14:05 test_accuracy 165 model 9 val 1545 / 1612 20240810-07:14:07 test_accuracy 165 model 0 val 1521 / 1604 20240810-07:14:09 wrote gpt_000.pth 20240810-07:14:10 wrote gpt_009.pth 20240810-07:14:42 wrote non_validated_0165_00.png 20240810-07:15:15 wrote non_validated_0165_09.png 20240810-07:15:15 wrote state.pth 20240810-07:15:15 --- epoch 166 ---------------------------------------- 20240810-07:15:15 current_test_accuracies 0.9483 0.9546 0.9629 0.9609 0.9468 0.9477 0.9527 0.9500 0.9558 0.9584 20240810-07:15:15 training model 4 20240810-07:15:15 training model 5 20240810-07:18:58 train_perplexity 166 model 5 1.1585684196690231 20240810-07:19:00 train_perplexity 166 model 4 1.1586658839117514 20240810-07:19:05 test_perplexity 166 model 5 1.1563296650590065 20240810-07:19:06 test_perplexity 166 model 4 1.1559099159288626 20240810-07:24:59 test_accuracy 166 model 5 val 1529 / 1590 20240810-07:25:02 test_accuracy 166 model 4 val 1509 / 1586 20240810-07:25:04 wrote gpt_004.pth 20240810-07:25:05 wrote gpt_005.pth 20240810-07:25:37 wrote non_validated_0166_04.png 20240810-07:26:10 wrote non_validated_0166_05.png 20240810-07:26:10 wrote state.pth 20240810-07:26:10 --- epoch 167 ---------------------------------------- 20240810-07:26:10 current_test_accuracies 0.9483 0.9546 0.9629 0.9609 0.9515 0.9616 0.9527 0.9500 0.9558 0.9584 20240810-07:26:10 training model 0 20240810-07:26:10 training model 7 20240810-07:29:53 train_perplexity 167 model 7 1.1592888253465206 20240810-07:29:54 train_perplexity 167 model 0 1.1589356671314734 20240810-07:30:00 test_perplexity 167 model 7 1.1588368827297915 20240810-07:30:01 test_perplexity 167 model 0 1.1563677867879185 20240810-07:35:53 test_accuracy 167 model 7 val 1527 / 1592 20240810-07:35:54 test_accuracy 167 model 0 val 1523 / 1592 20240810-07:35:56 wrote gpt_000.pth 20240810-07:35:57 wrote gpt_007.pth 20240810-07:36:29 wrote non_validated_0167_00.png 20240810-07:37:02 wrote non_validated_0167_07.png 20240810-07:37:02 wrote state.pth 20240810-07:37:02 --- epoch 168 ---------------------------------------- 20240810-07:37:02 current_test_accuracies 0.9567 0.9546 0.9629 0.9609 0.9515 0.9616 0.9527 0.9592 0.9558 0.9584 20240810-07:40:26 keep c_quizzes model 0 validated 21 / 420 (5.00%) nb_accumulated 21 / 420 (finishes Sat 08:45 -- 370/h) 20240810-07:43:27 keep c_quizzes model 9 validated 17 / 420 (4.05%) nb_accumulated 38 / 420 (finishes Sat 08:47 -- 355/h) 20240810-07:46:27 keep c_quizzes model 2 validated 28 / 420 (6.67%) nb_accumulated 66 / 420 (finishes Sat 08:36 -- 420/h) 20240810-07:49:27 keep c_quizzes model 5 validated 27 / 420 (6.43%) nb_accumulated 93 / 420 (finishes Sat 08:33 -- 449/h) 20240810-07:52:27 keep c_quizzes model 1 validated 23 / 420 (5.48%) nb_accumulated 116 / 420 (finishes Sat 08:32 -- 451/h) 20240810-07:55:27 keep c_quizzes model 3 validated 21 / 420 (5.00%) nb_accumulated 137 / 420 (finishes Sat 08:33 -- 446/h) 20240810-07:58:28 keep c_quizzes model 7 validated 19 / 420 (4.52%) nb_accumulated 156 / 420 (finishes Sat 08:34 -- 436/h) 20240810-08:01:28 keep c_quizzes model 3 validated 33 / 420 (7.86%) nb_accumulated 189 / 420 (finishes Sat 08:31 -- 463/h) 20240810-08:04:28 keep c_quizzes model 7 validated 21 / 420 (5.00%) nb_accumulated 210 / 420 (finishes Sat 08:31 -- 459/h) 20240810-08:07:29 keep c_quizzes model 3 validated 24 / 420 (5.71%) nb_accumulated 234 / 420 (finishes Sat 08:31 -- 461/h) 20240810-08:10:29 keep c_quizzes model 8 validated 23 / 420 (5.48%) nb_accumulated 257 / 420 (finishes Sat 08:31 -- 460/h) 20240810-08:13:30 keep c_quizzes model 8 validated 23 / 420 (5.48%) nb_accumulated 280 / 420 (finishes Sat 08:31 -- 460/h) 20240810-08:16:30 keep c_quizzes model 6 validated 21 / 420 (5.00%) nb_accumulated 301 / 420 (finishes Sat 08:32 -- 457/h) 20240810-08:19:31 keep c_quizzes model 2 validated 16 / 420 (3.81%) nb_accumulated 317 / 420 (finishes Sat 08:33 -- 447/h) 20240810-08:22:31 keep c_quizzes model 5 validated 27 / 420 (6.43%) nb_accumulated 344 / 420 (finishes Sat 08:32 -- 453/h) 20240810-08:25:32 keep c_quizzes model 9 validated 26 / 420 (6.19%) nb_accumulated 370 / 420 (finishes Sat 08:32 -- 457/h) 20240810-08:28:34 keep c_quizzes model 2 validated 17 / 420 (4.05%) nb_accumulated 387 / 420 (finishes Sat 08:32 -- 450/h) 20240810-08:31:34 keep c_quizzes model 8 validated 29 / 420 (6.90%) nb_accumulated 416 / 420 (finishes Sat 08:32 -- 457/h) 20240810-08:34:34 keep c_quizzes model 1 validated 23 / 420 (5.48%) nb_accumulated 439 / 420 (finishes now! -- 457/h) 20240810-08:34:47 wrote c_quizzes.pth 20240810-08:34:47 training model 0 20240810-08:34:47 training model 1 20240810-08:38:29 train_perplexity 168 model 1 1.1590364317756816 20240810-08:38:30 train_perplexity 168 model 0 1.1595205581223318 20240810-08:38:36 test_perplexity 168 model 1 1.157758812299302 20240810-08:38:37 test_perplexity 168 model 0 1.1591907374378658 20240810-08:44:27 test_accuracy 168 model 0 val 1538 / 1619 20240810-08:44:30 test_accuracy 168 model 1 val 1525 / 1580 20240810-08:44:32 wrote gpt_000.pth 20240810-08:44:33 wrote gpt_001.pth 20240810-08:45:06 wrote non_validated_0168_00.png 20240810-08:45:38 wrote non_validated_0168_01.png 20240810-08:45:38 wrote state.pth 20240810-08:45:38 --- epoch 169 ---------------------------------------- 20240810-08:45:38 current_test_accuracies 0.9500 0.9652 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240810-08:45:38 training model 2 20240810-08:45:38 training model 3 20240810-08:49:22 train_perplexity 169 model 3 1.1598556603297308 20240810-08:49:22 train_perplexity 169 model 2 1.1595434115081857 20240810-08:49:29 test_perplexity 169 model 3 1.1599671758996992 20240810-08:49:29 test_perplexity 169 model 2 1.1592213083220526 20240810-08:55:15 test_accuracy 169 model 3 val 1556 / 1629 20240810-08:55:20 test_accuracy 169 model 2 val 1529 / 1598 20240810-08:55:22 wrote gpt_002.pth 20240810-08:55:23 wrote gpt_003.pth 20240810-08:55:55 wrote non_validated_0169_02.png 20240810-08:56:28 wrote non_validated_0169_03.png 20240810-08:56:28 wrote state.pth 20240810-08:56:28 --- epoch 170 ---------------------------------------- 20240810-08:56:28 current_test_accuracies 0.9500 0.9652 0.9568 0.9552 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240810-08:56:28 training model 4 20240810-08:56:28 training model 5 20240810-09:00:11 train_perplexity 170 model 5 1.1589802902838784 20240810-09:00:13 train_perplexity 170 model 4 1.159897350686579 20240810-09:00:17 test_perplexity 170 model 5 1.1581878884994532 20240810-09:00:19 test_perplexity 170 model 4 1.156933839351863 20240810-09:06:14 test_accuracy 170 model 4 val 1528 / 1594 20240810-09:06:15 test_accuracy 170 model 5 val 1522 / 1597 20240810-09:06:16 wrote gpt_004.pth 20240810-09:06:17 wrote gpt_005.pth 20240810-09:06:50 wrote non_validated_0170_04.png 20240810-09:07:23 wrote non_validated_0170_05.png 20240810-09:07:24 wrote state.pth 20240810-09:07:24 --- epoch 171 ---------------------------------------- 20240810-09:07:24 current_test_accuracies 0.9500 0.9652 0.9568 0.9552 0.9586 0.9530 0.0000 0.0000 0.0000 0.0000 20240810-09:07:24 training model 6 20240810-09:07:24 training model 7 20240810-09:11:06 train_perplexity 171 model 7 1.1594369152810267 20240810-09:11:09 train_perplexity 171 model 6 1.1597521020703976 20240810-09:11:12 test_perplexity 171 model 7 1.1559434314369244 20240810-09:11:15 test_perplexity 171 model 6 1.159984105353186 20240810-09:17:04 test_accuracy 171 model 6 val 1529 / 1613 20240810-09:17:08 test_accuracy 171 model 7 val 1523 / 1585 20240810-09:17:10 wrote gpt_006.pth 20240810-09:17:11 wrote gpt_007.pth 20240810-09:17:44 wrote non_validated_0171_06.png 20240810-09:18:16 wrote non_validated_0171_07.png 20240810-09:18:16 wrote state.pth 20240810-09:18:16 --- epoch 172 ---------------------------------------- 20240810-09:18:16 current_test_accuracies 0.9500 0.9652 0.9568 0.9552 0.9586 0.9530 0.9479 0.9609 0.0000 0.0000 20240810-09:18:16 training model 8 20240810-09:18:16 training model 9 20240810-09:21:59 train_perplexity 172 model 9 1.1598308679362384 20240810-09:22:00 train_perplexity 172 model 8 1.1595023545069558 20240810-09:22:07 test_perplexity 172 model 9 1.158812761937964 20240810-09:22:07 test_perplexity 172 model 8 1.1589071535944877 20240810-09:27:59 test_accuracy 172 model 8 val 1549 / 1617 20240810-09:28:02 test_accuracy 172 model 9 val 1530 / 1582 20240810-09:28:04 wrote gpt_008.pth 20240810-09:28:05 wrote gpt_009.pth 20240810-09:28:38 wrote non_validated_0172_08.png 20240810-09:29:11 wrote non_validated_0172_09.png 20240810-09:29:11 wrote state.pth 20240810-09:29:11 --- epoch 173 ---------------------------------------- 20240810-09:29:11 current_test_accuracies 0.9500 0.9652 0.9568 0.9552 0.9586 0.9530 0.9479 0.9609 0.9579 0.9671 20240810-09:29:11 training model 6 20240810-09:29:11 training model 0 20240810-09:32:54 train_perplexity 173 model 0 1.1588857918047966 20240810-09:32:55 train_perplexity 173 model 6 1.1591907685459784 20240810-09:33:01 test_perplexity 173 model 0 1.1573939116998109 20240810-09:33:02 test_perplexity 173 model 6 1.158269874038192 20240810-09:38:51 test_accuracy 173 model 6 val 1566 / 1634 20240810-09:38:53 test_accuracy 173 model 0 val 1539 / 1622 20240810-09:38:55 wrote gpt_006.pth 20240810-09:38:56 wrote gpt_000.pth 20240810-09:39:28 wrote non_validated_0173_06.png 20240810-09:40:01 wrote non_validated_0173_00.png 20240810-09:40:01 wrote state.pth 20240810-09:40:01 --- epoch 174 ---------------------------------------- 20240810-09:40:01 current_test_accuracies 0.9488 0.9652 0.9568 0.9552 0.9586 0.9530 0.9584 0.9609 0.9579 0.9671 20240810-09:40:01 training model 0 20240810-09:40:01 training model 5 20240810-09:43:43 train_perplexity 174 model 5 1.1589850728306235 20240810-09:43:48 train_perplexity 174 model 0 1.158797089935762 20240810-09:43:49 test_perplexity 174 model 5 1.158565183587923 20240810-09:43:52 test_perplexity 174 model 0 1.1597049327847455 20240810-09:49:44 test_accuracy 174 model 5 val 1517 / 1601 20240810-09:49:48 test_accuracy 174 model 0 val 1521 / 1585 20240810-09:49:50 wrote gpt_000.pth 20240810-09:49:51 wrote gpt_005.pth 20240810-09:50:23 wrote non_validated_0174_00.png 20240810-09:50:57 wrote non_validated_0174_05.png 20240810-09:50:57 wrote state.pth 20240810-09:50:57 --- epoch 175 ---------------------------------------- 20240810-09:50:57 current_test_accuracies 0.9596 0.9652 0.9568 0.9552 0.9586 0.9475 0.9584 0.9609 0.9579 0.9671 20240810-09:50:57 training model 5 20240810-09:50:57 training model 3 20240810-09:54:39 train_perplexity 175 model 3 1.1598295006710189 20240810-09:54:42 train_perplexity 175 model 5 1.1583296676536325 20240810-09:54:46 test_perplexity 175 model 3 1.1588500721148487 20240810-09:54:48 test_perplexity 175 model 5 1.1580340186844411 20240810-10:00:39 test_accuracy 175 model 5 val 1520 / 1600 20240810-10:00:42 test_accuracy 175 model 3 val 1515 / 1583 20240810-10:00:44 wrote gpt_005.pth 20240810-10:00:45 wrote gpt_003.pth 20240810-10:01:18 wrote non_validated_0175_05.png 20240810-10:01:51 wrote non_validated_0175_03.png 20240810-10:01:51 wrote state.pth 20240810-10:01:51 --- epoch 176 ---------------------------------------- 20240810-10:01:51 current_test_accuracies 0.9596 0.9652 0.9568 0.9570 0.9586 0.9500 0.9584 0.9609 0.9579 0.9671 20240810-10:05:14 keep c_quizzes model 9 validated 23 / 420 (5.48%) nb_accumulated 23 / 420 (finishes Sat 11:03 -- 407/h) 20240810-10:08:15 keep c_quizzes model 0 validated 14 / 420 (3.33%) nb_accumulated 37 / 420 (finishes Sat 11:14 -- 346/h) 20240810-10:11:15 keep c_quizzes model 7 validated 18 / 420 (4.29%) nb_accumulated 55 / 420 (finishes Sat 11:13 -- 350/h) 20240810-10:14:16 keep c_quizzes model 2 validated 12 / 420 (2.86%) nb_accumulated 67 / 420 (finishes Sat 11:19 -- 323/h) 20240810-10:17:16 keep c_quizzes model 6 validated 20 / 420 (4.76%) nb_accumulated 87 / 420 (finishes Sat 11:16 -- 338/h) 20240810-10:20:17 keep c_quizzes model 2 validated 24 / 420 (5.71%) nb_accumulated 111 / 420 (finishes Sat 11:11 -- 361/h) 20240810-10:23:18 keep c_quizzes model 6 validated 23 / 420 (5.48%) nb_accumulated 134 / 420 (finishes Sat 11:09 -- 374/h) 20240810-10:26:19 keep c_quizzes model 7 validated 18 / 420 (4.29%) nb_accumulated 152 / 420 (finishes Sat 11:09 -- 372/h) 20240810-10:29:20 keep c_quizzes model 5 validated 26 / 420 (6.19%) nb_accumulated 178 / 420 (finishes Sat 11:06 -- 388/h) 20240810-10:32:21 keep c_quizzes model 3 validated 23 / 420 (5.48%) nb_accumulated 201 / 420 (finishes Sat 11:05 -- 395/h) 20240810-10:35:23 keep c_quizzes model 4 validated 15 / 420 (3.57%) nb_accumulated 216 / 420 (finishes Sat 11:07 -- 386/h) 20240810-10:38:24 keep c_quizzes model 1 validated 14 / 420 (3.33%) nb_accumulated 230 / 420 (finishes Sat 11:08 -- 377/h) 20240810-10:41:25 keep c_quizzes model 7 validated 15 / 420 (3.57%) nb_accumulated 245 / 420 (finishes Sat 11:09 -- 371/h) 20240810-10:44:26 keep c_quizzes model 8 validated 23 / 420 (5.48%) nb_accumulated 268 / 420 (finishes Sat 11:08 -- 377/h) 20240810-10:47:27 keep c_quizzes model 0 validated 26 / 420 (6.19%) nb_accumulated 294 / 420 (finishes Sat 11:07 -- 386/h) 20240810-10:50:28 keep c_quizzes model 1 validated 26 / 420 (6.19%) nb_accumulated 320 / 420 (finishes Sat 11:05 -- 394/h) 20240810-10:53:29 keep c_quizzes model 3 validated 22 / 420 (5.24%) nb_accumulated 342 / 420 (finishes Sat 11:05 -- 397/h) 20240810-10:56:30 keep c_quizzes model 6 validated 22 / 420 (5.24%) nb_accumulated 364 / 420 (finishes Sat 11:04 -- 399/h) 20240810-10:59:32 keep c_quizzes model 6 validated 25 / 420 (5.95%) nb_accumulated 389 / 420 (finishes Sat 11:04 -- 404/h) 20240810-11:02:33 keep c_quizzes model 3 validated 22 / 420 (5.24%) nb_accumulated 411 / 420 (finishes Sat 11:03 -- 406/h) 20240810-11:05:35 keep c_quizzes model 9 validated 22 / 420 (5.24%) nb_accumulated 433 / 420 (finishes now! -- 407/h) 20240810-11:05:48 wrote c_quizzes.pth 20240810-11:05:48 training model 0 20240810-11:05:48 training model 1 20240810-11:09:30 train_perplexity 176 model 1 1.1596035887283807 20240810-11:09:31 train_perplexity 176 model 0 1.1595577439864326 20240810-11:09:38 test_perplexity 176 model 0 1.1580832702285686 20240810-11:09:38 test_perplexity 176 model 1 1.1558973934272965 20240810-11:15:38 test_accuracy 176 model 1 val 1513 / 1588 20240810-11:15:40 test_accuracy 176 model 0 val 1506 / 1578 20240810-11:15:42 wrote gpt_000.pth 20240810-11:15:43 wrote gpt_001.pth 20240810-11:16:16 wrote non_validated_0176_00.png 20240810-11:16:48 wrote non_validated_0176_01.png 20240810-11:16:48 wrote state.pth 20240810-11:16:48 --- epoch 177 ---------------------------------------- 20240810-11:16:48 current_test_accuracies 0.9544 0.9528 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240810-11:16:48 training model 2 20240810-11:16:48 training model 3 20240810-11:20:31 train_perplexity 177 model 3 1.159926143453179 20240810-11:20:33 train_perplexity 177 model 2 1.1598968876207965 20240810-11:20:38 test_perplexity 177 model 3 1.1596696747821391 20240810-11:20:40 test_perplexity 177 model 2 1.1573680418956964 20240810-11:26:31 test_accuracy 177 model 3 val 1529 / 1597 20240810-11:26:32 test_accuracy 177 model 2 val 1519 / 1595 20240810-11:26:34 wrote gpt_002.pth 20240810-11:26:35 wrote gpt_003.pth 20240810-11:27:07 wrote non_validated_0177_02.png 20240810-11:27:40 wrote non_validated_0177_03.png 20240810-11:27:40 wrote state.pth 20240810-11:27:40 --- epoch 178 ---------------------------------------- 20240810-11:27:40 current_test_accuracies 0.9544 0.9528 0.9524 0.9574 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240810-11:27:40 training model 4 20240810-11:27:40 training model 5 20240810-11:31:22 train_perplexity 178 model 5 1.1591389306662427 20240810-11:31:26 train_perplexity 178 model 4 1.1591068323580327 20240810-11:31:28 test_perplexity 178 model 5 1.1596469002699674 20240810-11:31:31 test_perplexity 178 model 4 1.1587542262792558 20240810-11:37:21 test_accuracy 178 model 4 val 1553 / 1619 20240810-11:37:21 test_accuracy 178 model 5 val 1535 / 1601 20240810-11:37:23 wrote gpt_004.pth 20240810-11:37:24 wrote gpt_005.pth 20240810-11:37:57 wrote non_validated_0178_04.png 20240810-11:38:30 wrote non_validated_0178_05.png 20240810-11:38:30 wrote state.pth 20240810-11:38:30 --- epoch 179 ---------------------------------------- 20240810-11:38:30 current_test_accuracies 0.9544 0.9528 0.9524 0.9574 0.9592 0.9588 0.0000 0.0000 0.0000 0.0000 20240810-11:38:30 training model 6 20240810-11:38:30 training model 7 20240810-11:42:12 train_perplexity 179 model 7 1.1594384692255142 20240810-11:42:16 train_perplexity 179 model 6 1.1595088810641678 20240810-11:42:19 test_perplexity 179 model 7 1.1587697623877036 20240810-11:42:21 test_perplexity 179 model 6 1.1584922503725397 20240810-11:48:12 test_accuracy 179 model 7 val 1528 / 1606 20240810-11:48:14 test_accuracy 179 model 6 val 1524 / 1592 20240810-11:48:16 wrote gpt_006.pth 20240810-11:48:16 wrote gpt_007.pth 20240810-11:48:49 wrote non_validated_0179_06.png 20240810-11:49:22 wrote non_validated_0179_07.png 20240810-11:49:22 wrote state.pth 20240810-11:49:22 --- epoch 180 ---------------------------------------- 20240810-11:49:22 current_test_accuracies 0.9544 0.9528 0.9524 0.9574 0.9592 0.9588 0.9573 0.9514 0.0000 0.0000 20240810-11:49:22 training model 8 20240810-11:49:22 training model 9 20240810-11:53:04 train_perplexity 180 model 9 1.1597799871574481 20240810-11:53:09 train_perplexity 180 model 8 1.1598926137744312 20240810-11:53:10 test_perplexity 180 model 9 1.1591892482654778 20240810-11:53:13 test_perplexity 180 model 8 1.157553294575933 20240810-11:59:02 test_accuracy 180 model 8 val 1540 / 1617 20240810-11:59:07 test_accuracy 180 model 9 val 1476 / 1589 20240810-11:59:09 wrote gpt_008.pth 20240810-11:59:09 wrote gpt_009.pth 20240810-11:59:42 wrote non_validated_0180_08.png 20240810-12:00:15 wrote non_validated_0180_09.png 20240810-12:00:15 wrote state.pth 20240810-12:00:15 --- epoch 181 ---------------------------------------- 20240810-12:00:15 current_test_accuracies 0.9544 0.9528 0.9524 0.9574 0.9592 0.9588 0.9573 0.9514 0.9524 0.9289 20240810-12:00:15 training model 9 20240810-12:00:15 training model 7 20240810-12:03:58 train_perplexity 181 model 7 1.1591428449920305 20240810-12:04:02 train_perplexity 181 model 9 1.1595236809565044 20240810-12:04:04 test_perplexity 181 model 7 1.1589035631461333 20240810-12:04:07 test_perplexity 181 model 9 1.1580692118578935 20240810-12:09:49 test_accuracy 181 model 9 val 1560 / 1634 20240810-12:09:54 test_accuracy 181 model 7 val 1545 / 1608 20240810-12:09:56 wrote gpt_009.pth 20240810-12:09:56 wrote gpt_007.pth 20240810-12:10:29 wrote non_validated_0181_09.png 20240810-12:11:02 wrote non_validated_0181_07.png 20240810-12:11:02 wrote state.pth 20240810-12:11:02 --- epoch 182 ---------------------------------------- 20240810-12:11:02 current_test_accuracies 0.9544 0.9528 0.9524 0.9574 0.9592 0.9588 0.9573 0.9608 0.9524 0.9547 20240810-12:14:25 keep c_quizzes model 6 validated 16 / 420 (3.81%) nb_accumulated 16 / 420 (finishes Sat 13:39 -- 283/h) 20240810-12:17:28 keep c_quizzes model 2 validated 21 / 420 (5.00%) nb_accumulated 37 / 420 (finishes Sat 13:24 -- 344/h) 20240810-12:20:30 keep c_quizzes model 9 validated 24 / 420 (5.71%) nb_accumulated 61 / 420 (finishes Sat 13:16 -- 386/h) 20240810-12:23:32 keep c_quizzes model 4 validated 24 / 420 (5.71%) nb_accumulated 85 / 420 (finishes Sat 13:12 -- 407/h) 20240810-12:26:35 keep c_quizzes model 9 validated 21 / 420 (5.00%) nb_accumulated 106 / 420 (finishes Sat 13:12 -- 408/h) 20240810-12:29:38 keep c_quizzes model 5 validated 22 / 420 (5.24%) nb_accumulated 128 / 420 (finishes Sat 13:12 -- 413/h) 20240810-12:32:41 keep c_quizzes model 3 validated 30 / 420 (7.14%) nb_accumulated 158 / 420 (finishes Sat 13:08 -- 437/h) 20240810-12:35:43 keep c_quizzes model 5 validated 23 / 420 (5.48%) nb_accumulated 181 / 420 (finishes Sat 13:08 -- 439/h) 20240810-12:38:46 keep c_quizzes model 5 validated 25 / 420 (5.95%) nb_accumulated 206 / 420 (finishes Sat 13:07 -- 445/h) 20240810-12:41:48 keep c_quizzes model 0 validated 19 / 420 (4.52%) nb_accumulated 225 / 420 (finishes Sat 13:08 -- 438/h) 20240810-12:44:50 keep c_quizzes model 0 validated 26 / 420 (6.19%) nb_accumulated 251 / 420 (finishes Sat 13:07 -- 445/h) 20240810-12:47:53 keep c_quizzes model 2 validated 22 / 420 (5.24%) nb_accumulated 273 / 420 (finishes Sat 13:07 -- 444/h) 20240810-12:50:55 keep c_quizzes model 6 validated 16 / 420 (3.81%) nb_accumulated 289 / 420 (finishes Sat 13:09 -- 434/h) 20240810-12:53:58 keep c_quizzes model 6 validated 25 / 420 (5.95%) nb_accumulated 314 / 420 (finishes Sat 13:08 -- 438/h) 20240810-12:56:59 keep c_quizzes model 4 validated 24 / 420 (5.71%) nb_accumulated 338 / 420 (finishes Sat 13:08 -- 441/h) 20240810-13:00:02 keep c_quizzes model 6 validated 22 / 420 (5.24%) nb_accumulated 360 / 420 (finishes Sat 13:08 -- 440/h) 20240810-13:03:05 keep c_quizzes model 6 validated 25 / 420 (5.95%) nb_accumulated 385 / 420 (finishes Sat 13:07 -- 443/h) 20240810-13:06:07 keep c_quizzes model 8 validated 22 / 420 (5.24%) nb_accumulated 407 / 420 (finishes Sat 13:07 -- 443/h) 20240810-13:09:08 keep c_quizzes model 5 validated 26 / 420 (6.19%) nb_accumulated 433 / 420 (finishes now! -- 447/h) 20240810-13:09:22 wrote c_quizzes.pth 20240810-13:09:22 training model 0 20240810-13:09:22 training model 1 20240810-13:13:03 train_perplexity 182 model 1 1.160118743599526 20240810-13:13:05 train_perplexity 182 model 0 1.1592082216782365 20240810-13:13:10 test_perplexity 182 model 1 1.1589310071640058 20240810-13:13:12 test_perplexity 182 model 0 1.1572901556141861 20240810-13:19:04 test_accuracy 182 model 1 val 1535 / 1604 20240810-13:19:04 test_accuracy 182 model 0 val 1520 / 1606 20240810-13:19:06 wrote gpt_000.pth 20240810-13:19:07 wrote gpt_001.pth 20240810-13:19:39 wrote non_validated_0182_00.png 20240810-13:20:12 wrote non_validated_0182_01.png 20240810-13:20:12 wrote state.pth 20240810-13:20:12 --- epoch 183 ---------------------------------------- 20240810-13:20:12 current_test_accuracies 0.9465 0.9570 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240810-13:20:12 training model 2 20240810-13:20:12 training model 3 20240810-13:23:56 train_perplexity 183 model 3 1.159986955250024 20240810-13:23:57 train_perplexity 183 model 2 1.1600769951650898 20240810-13:24:03 test_perplexity 183 model 3 1.160158849370975 20240810-13:24:03 test_perplexity 183 model 2 1.1583620377468582 20240810-13:29:56 test_accuracy 183 model 3 val 1533 / 1598 20240810-13:29:57 test_accuracy 183 model 2 val 1501 / 1580 20240810-13:29:59 wrote gpt_002.pth 20240810-13:30:00 wrote gpt_003.pth 20240810-13:30:33 wrote non_validated_0183_02.png 20240810-13:31:06 wrote non_validated_0183_03.png 20240810-13:31:06 wrote state.pth 20240810-13:31:06 --- epoch 184 ---------------------------------------- 20240810-13:31:06 current_test_accuracies 0.9465 0.9570 0.9500 0.9593 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240810-13:31:06 training model 4 20240810-13:31:06 training model 5 20240810-13:34:48 train_perplexity 184 model 5 1.159363558818925 20240810-13:34:53 train_perplexity 184 model 4 1.1599562736996814 20240810-13:34:54 test_perplexity 184 model 5 1.1582198836467765 20240810-13:34:58 test_perplexity 184 model 4 1.1576257988828202 20240810-13:40:51 test_accuracy 184 model 4 val 1538 / 1597 20240810-13:40:54 test_accuracy 184 model 5 val 1513 / 1574 20240810-13:40:56 wrote gpt_004.pth 20240810-13:40:57 wrote gpt_005.pth 20240810-13:41:29 wrote non_validated_0184_04.png 20240810-13:42:02 wrote non_validated_0184_05.png 20240810-13:42:02 wrote state.pth 20240810-13:42:02 --- epoch 185 ---------------------------------------- 20240810-13:42:02 current_test_accuracies 0.9465 0.9570 0.9500 0.9593 0.9631 0.9612 0.0000 0.0000 0.0000 0.0000 20240810-13:42:02 training model 6 20240810-13:42:02 training model 7 20240810-13:45:44 train_perplexity 185 model 7 1.160160834110291 20240810-13:45:48 train_perplexity 185 model 6 1.160055503940925 20240810-13:45:51 test_perplexity 185 model 7 1.1597093286347688 20240810-13:45:53 test_perplexity 185 model 6 1.1588876802999883 20240810-13:51:40 test_accuracy 185 model 7 val 1548 / 1616 20240810-13:51:50 test_accuracy 185 model 6 val 1485 / 1558 20240810-13:51:51 wrote gpt_006.pth 20240810-13:51:52 wrote gpt_007.pth 20240810-13:52:25 wrote non_validated_0185_06.png 20240810-13:52:57 wrote non_validated_0185_07.png 20240810-13:52:57 wrote state.pth 20240810-13:52:57 --- epoch 186 ---------------------------------------- 20240810-13:52:57 current_test_accuracies 0.9465 0.9570 0.9500 0.9593 0.9631 0.9612 0.9531 0.9579 0.0000 0.0000 20240810-13:52:57 training model 8 20240810-13:52:57 training model 9 20240810-13:56:40 train_perplexity 186 model 9 1.1601765781922269 20240810-13:56:41 train_perplexity 186 model 8 1.1599239959199552 20240810-13:56:47 test_perplexity 186 model 9 1.1597208281276294 20240810-13:56:48 test_perplexity 186 model 8 1.156633190348503 20240810-14:02:37 test_accuracy 186 model 8 val 1539 / 1596 20240810-14:02:41 test_accuracy 186 model 9 val 1513 / 1576 20240810-14:02:43 wrote gpt_008.pth 20240810-14:02:44 wrote gpt_009.pth 20240810-14:03:17 wrote non_validated_0186_08.png 20240810-14:03:49 wrote non_validated_0186_09.png 20240810-14:03:49 wrote state.pth 20240810-14:03:49 --- epoch 187 ---------------------------------------- 20240810-14:03:49 current_test_accuracies 0.9465 0.9570 0.9500 0.9593 0.9631 0.9612 0.9531 0.9579 0.9643 0.9600 20240810-14:03:49 training model 0 20240810-14:03:49 training model 2 20240810-14:07:32 train_perplexity 187 model 2 1.160075905938193 20240810-14:07:34 train_perplexity 187 model 0 1.1598508240802512 20240810-14:07:38 test_perplexity 187 model 2 1.1596260414504918 20240810-14:07:40 test_perplexity 187 model 0 1.1582239888804058 20240810-14:13:37 test_accuracy 187 model 0 val 1538 / 1592 20240810-14:13:39 test_accuracy 187 model 2 val 1515 / 1595 20240810-14:13:41 wrote gpt_000.pth 20240810-14:13:42 wrote gpt_002.pth 20240810-14:14:14 wrote non_validated_0187_00.png 20240810-14:14:47 wrote non_validated_0187_02.png 20240810-14:14:47 wrote state.pth 20240810-14:14:47 --- epoch 188 ---------------------------------------- 20240810-14:14:47 current_test_accuracies 0.9661 0.9570 0.9498 0.9593 0.9631 0.9612 0.9531 0.9579 0.9643 0.9600 20240810-14:14:47 training model 2 20240810-14:14:47 training model 6 20240810-14:18:30 train_perplexity 188 model 6 1.1597193924237033 20240810-14:18:30 train_perplexity 188 model 2 1.159997547175358 20240810-14:18:38 test_perplexity 188 model 6 1.1603477485824307 20240810-14:18:38 test_perplexity 188 model 2 1.1606375787914145 20240810-14:24:28 test_accuracy 188 model 2 val 1541 / 1623 20240810-14:24:31 test_accuracy 188 model 6 val 1539 / 1617 20240810-14:24:33 wrote gpt_002.pth 20240810-14:24:34 wrote gpt_006.pth 20240810-14:25:06 wrote non_validated_0188_02.png 20240810-14:25:39 wrote non_validated_0188_06.png 20240810-14:25:39 wrote state.pth 20240810-14:25:39 --- epoch 189 ---------------------------------------- 20240810-14:25:39 current_test_accuracies 0.9661 0.9570 0.9495 0.9593 0.9631 0.9612 0.9518 0.9579 0.9643 0.9600 20240810-14:25:39 training model 2 20240810-14:25:39 training model 6 20240810-14:29:22 train_perplexity 189 model 6 1.1598452375290802 20240810-14:29:23 train_perplexity 189 model 2 1.1595825425830542 20240810-14:29:30 test_perplexity 189 model 6 1.160050757356307 20240810-14:29:30 test_perplexity 189 model 2 1.1604630289676001 20240810-14:35:19 test_accuracy 189 model 6 val 1547 / 1623 20240810-14:35:26 test_accuracy 189 model 2 val 1516 / 1588 20240810-14:35:27 wrote gpt_002.pth 20240810-14:35:28 wrote gpt_006.pth 20240810-14:36:01 wrote non_validated_0189_02.png 20240810-14:36:33 wrote non_validated_0189_06.png 20240810-14:36:33 wrote state.pth 20240810-14:36:33 --- epoch 190 ---------------------------------------- 20240810-14:36:33 current_test_accuracies 0.9661 0.9570 0.9547 0.9593 0.9631 0.9612 0.9532 0.9579 0.9643 0.9600 20240810-14:39:54 keep c_quizzes model 1 validated 16 / 420 (3.81%) nb_accumulated 16 / 420 (finishes Sat 16:04 -- 287/h) 20240810-14:42:54 keep c_quizzes model 4 validated 22 / 420 (5.24%) nb_accumulated 38 / 420 (finishes Sat 15:46 -- 359/h) 20240810-14:45:55 keep c_quizzes model 3 validated 12 / 420 (2.86%) nb_accumulated 50 / 420 (finishes Sat 15:55 -- 320/h) 20240810-14:48:54 keep c_quizzes model 3 validated 16 / 420 (3.81%) nb_accumulated 66 / 420 (finishes Sat 15:55 -- 320/h) 20240810-14:51:54 keep c_quizzes model 9 validated 22 / 420 (5.24%) nb_accumulated 88 / 420 (finishes Sat 15:49 -- 344/h) 20240810-14:54:54 keep c_quizzes model 9 validated 23 / 420 (5.48%) nb_accumulated 111 / 420 (finishes Sat 15:45 -- 363/h) 20240810-14:57:53 keep c_quizzes model 3 validated 22 / 420 (5.24%) nb_accumulated 133 / 420 (finishes Sat 15:43 -- 374/h) 20240810-15:00:52 keep c_quizzes model 4 validated 20 / 420 (4.76%) nb_accumulated 153 / 420 (finishes Sat 15:43 -- 377/h) 20240810-15:03:51 keep c_quizzes model 2 validated 19 / 420 (4.52%) nb_accumulated 172 / 420 (finishes Sat 15:43 -- 378/h) 20240810-15:06:50 keep c_quizzes model 4 validated 15 / 420 (3.57%) nb_accumulated 187 / 420 (finishes Sat 15:44 -- 370/h) 20240810-15:09:50 keep c_quizzes model 3 validated 17 / 420 (4.05%) nb_accumulated 204 / 420 (finishes Sat 15:45 -- 367/h) 20240810-15:12:51 keep c_quizzes model 4 validated 24 / 420 (5.71%) nb_accumulated 228 / 420 (finishes Sat 15:43 -- 377/h) 20240810-15:15:50 keep c_quizzes model 1 validated 18 / 420 (4.29%) nb_accumulated 246 / 420 (finishes Sat 15:43 -- 375/h) 20240810-15:18:49 keep c_quizzes model 1 validated 16 / 420 (3.81%) nb_accumulated 262 / 420 (finishes Sat 15:44 -- 371/h) 20240810-15:21:49 keep c_quizzes model 0 validated 26 / 420 (6.19%) nb_accumulated 288 / 420 (finishes Sat 15:42 -- 381/h) 20240810-15:24:48 keep c_quizzes model 3 validated 12 / 420 (2.86%) nb_accumulated 300 / 420 (finishes Sat 15:44 -- 373/h) 20240810-15:27:47 keep c_quizzes model 3 validated 20 / 420 (4.76%) nb_accumulated 320 / 420 (finishes Sat 15:43 -- 374/h) 20240810-15:30:46 keep c_quizzes model 1 validated 22 / 420 (5.24%) nb_accumulated 342 / 420 (finishes Sat 15:43 -- 378/h) 20240810-15:33:45 keep c_quizzes model 0 validated 21 / 420 (5.00%) nb_accumulated 363 / 420 (finishes Sat 15:42 -- 380/h) 20240810-15:36:45 keep c_quizzes model 3 validated 16 / 420 (3.81%) nb_accumulated 379 / 420 (finishes Sat 15:43 -- 377/h) 20240810-15:39:44 keep c_quizzes model 7 validated 19 / 420 (4.52%) nb_accumulated 398 / 420 (finishes Sat 15:43 -- 377/h) 20240810-15:42:44 keep c_quizzes model 1 validated 20 / 420 (4.76%) nb_accumulated 418 / 420 (finishes Sat 15:43 -- 378/h) 20240810-15:45:43 keep c_quizzes model 0 validated 22 / 420 (5.24%) nb_accumulated 440 / 420 (finishes now! -- 381/h) 20240810-15:45:57 wrote c_quizzes.pth 20240810-15:45:57 training model 0 20240810-15:45:57 training model 1 20240810-15:49:38 train_perplexity 190 model 1 1.1606535052773015 20240810-15:49:40 train_perplexity 190 model 0 1.1602175790209788 20240810-15:49:45 test_perplexity 190 model 1 1.1589137217930165 20240810-15:49:46 test_perplexity 190 model 0 1.160180163373284 20240810-15:55:30 test_accuracy 190 model 0 val 1546 / 1636 20240810-15:55:37 test_accuracy 190 model 1 val 1508 / 1582 20240810-15:55:39 wrote gpt_000.pth 20240810-15:55:39 wrote gpt_001.pth 20240810-15:56:12 wrote non_validated_0190_00.png 20240810-15:56:45 wrote non_validated_0190_01.png 20240810-15:56:45 wrote state.pth 20240810-15:56:45 --- epoch 191 ---------------------------------------- 20240810-15:56:45 current_test_accuracies 0.9450 0.9532 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240810-15:56:45 training model 2 20240810-15:56:45 training model 3 20240810-16:00:28 train_perplexity 191 model 3 1.1606873901024224 20240810-16:00:29 train_perplexity 191 model 2 1.1599166067905826 20240810-16:00:36 test_perplexity 191 model 3 1.1586277369433728 20240810-16:00:36 test_perplexity 191 model 2 1.159256850026144 20240810-16:06:28 test_accuracy 191 model 2 val 1525 / 1592 20240810-16:06:30 test_accuracy 191 model 3 val 1524 / 1589 20240810-16:06:32 wrote gpt_002.pth 20240810-16:06:33 wrote gpt_003.pth 20240810-16:07:05 wrote non_validated_0191_02.png 20240810-16:07:38 wrote non_validated_0191_03.png 20240810-16:07:38 wrote state.pth 20240810-16:07:38 --- epoch 192 ---------------------------------------- 20240810-16:07:38 current_test_accuracies 0.9450 0.9532 0.9579 0.9591 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240810-16:07:38 training model 4 20240810-16:07:38 training model 5 20240810-16:11:20 train_perplexity 192 model 5 1.1599574150572942 20240810-16:11:25 train_perplexity 192 model 4 1.1603860370694188 20240810-16:11:26 test_perplexity 192 model 5 1.1581533799831105 20240810-16:11:29 test_perplexity 192 model 4 1.1588219537634254 20240810-16:17:16 test_accuracy 192 model 5 val 1560 / 1613 20240810-16:17:19 test_accuracy 192 model 4 val 1524 / 1603 20240810-16:17:21 wrote gpt_004.pth 20240810-16:17:22 wrote gpt_005.pth 20240810-16:17:54 wrote non_validated_0192_04.png 20240810-16:18:27 wrote non_validated_0192_05.png 20240810-16:18:27 wrote state.pth 20240810-16:18:27 --- epoch 193 ---------------------------------------- 20240810-16:18:27 current_test_accuracies 0.9450 0.9532 0.9579 0.9591 0.9507 0.9671 0.0000 0.0000 0.0000 0.0000 20240810-16:18:27 training model 6 20240810-16:18:27 training model 7 20240810-16:22:09 train_perplexity 193 model 7 1.1599857240292575 20240810-16:22:12 train_perplexity 193 model 6 1.1602161147648435 20240810-16:22:16 test_perplexity 193 model 7 1.1604461334848477 20240810-16:22:18 test_perplexity 193 model 6 1.1600767202936568 20240810-16:28:03 test_accuracy 193 model 7 val 1571 / 1629 20240810-16:28:05 test_accuracy 193 model 6 val 1531 / 1613 20240810-16:28:06 wrote gpt_006.pth 20240810-16:28:07 wrote gpt_007.pth 20240810-16:28:40 wrote non_validated_0193_06.png 20240810-16:29:13 wrote non_validated_0193_07.png 20240810-16:29:13 wrote state.pth 20240810-16:29:13 --- epoch 194 ---------------------------------------- 20240810-16:29:13 current_test_accuracies 0.9450 0.9532 0.9579 0.9591 0.9507 0.9671 0.9492 0.9644 0.0000 0.0000 20240810-16:29:13 training model 8 20240810-16:29:13 training model 9 20240810-16:32:56 train_perplexity 194 model 9 1.160632589557465 20240810-16:32:56 train_perplexity 194 model 8 1.1603302653310656 20240810-16:33:03 test_perplexity 194 model 9 1.1596433025617703 20240810-16:33:03 test_perplexity 194 model 8 1.1595291732444062 20240810-16:38:56 test_accuracy 194 model 9 val 1516 / 1590 20240810-16:38:56 test_accuracy 194 model 8 val 1532 / 1597 20240810-16:38:58 wrote gpt_008.pth 20240810-16:38:59 wrote gpt_009.pth 20240810-16:39:32 wrote non_validated_0194_08.png 20240810-16:40:05 wrote non_validated_0194_09.png 20240810-16:40:05 wrote state.pth 20240810-16:40:05 --- epoch 195 ---------------------------------------- 20240810-16:40:05 current_test_accuracies 0.9450 0.9532 0.9579 0.9591 0.9507 0.9671 0.9492 0.9644 0.9593 0.9535 20240810-16:40:05 training model 0 20240810-16:40:05 training model 6 20240810-16:43:48 train_perplexity 195 model 6 1.159866616329143 20240810-16:43:49 train_perplexity 195 model 0 1.1594608603531935 20240810-16:43:56 test_perplexity 195 model 6 1.157897166437937 20240810-16:43:56 test_perplexity 195 model 0 1.1586181741712733 20240810-16:49:50 test_accuracy 195 model 0 val 1520 / 1607 20240810-16:49:52 test_accuracy 195 model 6 val 1519 / 1598 20240810-16:49:54 wrote gpt_000.pth 20240810-16:49:55 wrote gpt_006.pth 20240810-16:50:27 wrote non_validated_0195_00.png 20240810-16:51:00 wrote non_validated_0195_06.png 20240810-16:51:00 wrote state.pth 20240810-16:51:00 --- epoch 196 ---------------------------------------- 20240810-16:51:00 current_test_accuracies 0.9459 0.9532 0.9579 0.9591 0.9507 0.9671 0.9506 0.9644 0.9593 0.9535 20240810-16:51:00 training model 0 20240810-16:51:00 training model 6 20240810-16:54:42 train_perplexity 196 model 6 1.1599007379956623 20240810-16:54:45 train_perplexity 196 model 0 1.1600812164536483 20240810-16:54:49 test_perplexity 196 model 6 1.1586497393233395 20240810-16:54:51 test_perplexity 196 model 0 1.1568268573536917 20240810-17:00:38 test_accuracy 196 model 6 val 1534 / 1620 20240810-17:00:43 test_accuracy 196 model 0 val 1529 / 1590 20240810-17:00:45 wrote gpt_000.pth 20240810-17:00:46 wrote gpt_006.pth 20240810-17:01:19 wrote non_validated_0196_00.png 20240810-17:01:51 wrote non_validated_0196_06.png 20240810-17:01:51 wrote state.pth 20240810-17:01:51 --- epoch 197 ---------------------------------------- 20240810-17:01:51 current_test_accuracies 0.9616 0.9532 0.9579 0.9591 0.9507 0.9671 0.9469 0.9644 0.9593 0.9535 20240810-17:01:51 training model 6 20240810-17:01:51 training model 4 20240810-17:05:34 train_perplexity 197 model 4 1.1603999199762343 20240810-17:05:39 train_perplexity 197 model 6 1.1600155983654947 20240810-17:05:40 test_perplexity 197 model 4 1.1598557713742714 20240810-17:05:43 test_perplexity 197 model 6 1.1619990782689356 20240810-17:11:30 test_accuracy 197 model 6 val 1570 / 1634 20240810-17:11:32 test_accuracy 197 model 4 val 1575 / 1624 20240810-17:11:34 wrote gpt_006.pth 20240810-17:11:34 wrote gpt_004.pth 20240810-17:12:07 wrote non_validated_0197_06.png 20240810-17:12:39 wrote non_validated_0197_04.png 20240810-17:12:39 wrote state.pth 20240810-17:12:39 --- epoch 198 ---------------------------------------- 20240810-17:12:39 current_test_accuracies 0.9616 0.9532 0.9579 0.9591 0.9698 0.9671 0.9608 0.9644 0.9593 0.9535 20240810-17:16:02 keep c_quizzes model 4 validated 22 / 420 (5.24%) nb_accumulated 22 / 420 (finishes Sat 18:17 -- 390/h) 20240810-17:19:03 keep c_quizzes model 3 validated 23 / 420 (5.48%) nb_accumulated 45 / 420 (finishes Sat 18:12 -- 421/h) 20240810-17:22:05 keep c_quizzes model 5 validated 26 / 420 (6.19%) nb_accumulated 71 / 420 (finishes Sat 18:08 -- 451/h) 20240810-17:25:08 keep c_quizzes model 6 validated 26 / 420 (6.19%) nb_accumulated 97 / 420 (finishes Sat 18:06 -- 466/h) 20240810-17:28:08 keep c_quizzes model 2 validated 30 / 420 (7.14%) nb_accumulated 127 / 420 (finishes Sat 18:03 -- 492/h) 20240810-17:31:08 keep c_quizzes model 2 validated 24 / 420 (5.71%) nb_accumulated 151 / 420 (finishes Sat 18:04 -- 490/h) 20240810-17:34:09 keep c_quizzes model 7 validated 21 / 420 (5.00%) nb_accumulated 172 / 420 (finishes Sat 18:05 -- 480/h) 20240810-17:37:10 keep c_quizzes model 4 validated 25 / 420 (5.95%) nb_accumulated 197 / 420 (finishes Sat 18:04 -- 482/h) 20240810-17:40:11 keep c_quizzes model 3 validated 21 / 420 (5.00%) nb_accumulated 218 / 420 (finishes Sat 18:05 -- 475/h) 20240810-17:43:13 keep c_quizzes model 6 validated 27 / 420 (6.43%) nb_accumulated 245 / 420 (finishes Sat 18:05 -- 480/h) 20240810-17:46:14 keep c_quizzes model 8 validated 19 / 420 (4.52%) nb_accumulated 264 / 420 (finishes Sat 18:06 -- 471/h) 20240810-17:49:14 keep c_quizzes model 9 validated 17 / 420 (4.05%) nb_accumulated 281 / 420 (finishes Sat 18:07 -- 460/h) 20240810-17:52:15 keep c_quizzes model 0 validated 21 / 420 (5.00%) nb_accumulated 302 / 420 (finishes Sat 18:07 -- 457/h) 20240810-17:55:16 keep c_quizzes model 6 validated 20 / 420 (4.76%) nb_accumulated 322 / 420 (finishes Sat 18:08 -- 453/h) 20240810-17:58:17 keep c_quizzes model 6 validated 27 / 420 (6.43%) nb_accumulated 349 / 420 (finishes Sat 18:07 -- 458/h) 20240810-18:01:18 keep c_quizzes model 1 validated 16 / 420 (3.81%) nb_accumulated 365 / 420 (finishes Sat 18:08 -- 450/h) 20240810-18:04:19 keep c_quizzes model 3 validated 21 / 420 (5.00%) nb_accumulated 386 / 420 (finishes Sat 18:08 -- 448/h) 20240810-18:07:20 keep c_quizzes model 4 validated 29 / 420 (6.90%) nb_accumulated 415 / 420 (finishes Sat 18:08 -- 455/h) 20240810-18:10:21 keep c_quizzes model 9 validated 20 / 420 (4.76%) nb_accumulated 435 / 420 (finishes now! -- 452/h) 20240810-18:10:35 wrote c_quizzes.pth 20240810-18:10:35 training model 0 20240810-18:10:35 training model 1 20240810-18:14:16 train_perplexity 198 model 0 1.159904803190113 20240810-18:14:17 train_perplexity 198 model 1 1.160935412267519 20240810-18:14:24 test_perplexity 198 model 0 1.1611104290981387 20240810-18:14:24 test_perplexity 198 model 1 1.1594133896933798 20240810-18:20:21 test_accuracy 198 model 0 val 1520 / 1595 20240810-18:20:22 test_accuracy 198 model 1 val 1534 / 1599 20240810-18:20:24 wrote gpt_000.pth 20240810-18:20:25 wrote gpt_001.pth 20240810-18:20:58 wrote non_validated_0198_00.png 20240810-18:21:30 wrote non_validated_0198_01.png 20240810-18:21:30 wrote state.pth 20240810-18:21:30 --- epoch 199 ---------------------------------------- 20240810-18:21:30 current_test_accuracies 0.9530 0.9593 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240810-18:21:30 training model 2 20240810-18:21:30 training model 3 20240810-18:25:13 train_perplexity 199 model 3 1.1613285794229193 20240810-18:25:14 train_perplexity 199 model 2 1.1603897439358224 20240810-18:25:20 test_perplexity 199 model 3 1.161312274094991 20240810-18:25:21 test_perplexity 199 model 2 1.1601913526735117 20240810-18:31:09 test_accuracy 199 model 2 val 1545 / 1609 20240810-18:31:10 test_accuracy 199 model 3 val 1529 / 1605 20240810-18:31:12 wrote gpt_002.pth 20240810-18:31:12 wrote gpt_003.pth 20240810-18:31:45 wrote non_validated_0199_02.png 20240810-18:32:18 wrote non_validated_0199_03.png 20240810-18:32:18 wrote state.pth 20240810-18:32:18 --- epoch 200 ---------------------------------------- 20240810-18:32:18 current_test_accuracies 0.9530 0.9593 0.9602 0.9526 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240810-18:32:18 training model 4 20240810-18:32:18 training model 5 20240810-18:36:01 train_perplexity 200 model 5 1.1602928550670353 20240810-18:36:02 train_perplexity 200 model 4 1.160225457133647 20240810-18:36:08 test_perplexity 200 model 5 1.1597026886395292 20240810-18:36:08 test_perplexity 200 model 4 1.1584438104499393 20240810-18:41:59 test_accuracy 200 model 5 val 1524 / 1605 20240810-18:42:03 test_accuracy 200 model 4 val 1524 / 1588 20240810-18:42:05 wrote gpt_004.pth 20240810-18:42:06 wrote gpt_005.pth 20240810-18:42:38 wrote non_validated_0200_04.png 20240810-18:43:11 wrote non_validated_0200_05.png 20240810-18:43:11 wrote state.pth 20240810-18:43:11 --- epoch 201 ---------------------------------------- 20240810-18:43:11 current_test_accuracies 0.9530 0.9593 0.9602 0.9526 0.9597 0.9495 0.0000 0.0000 0.0000 0.0000 20240810-18:43:11 training model 6 20240810-18:43:11 training model 7 20240810-18:46:53 train_perplexity 201 model 7 1.1608569812854317 20240810-18:46:57 train_perplexity 201 model 6 1.160302125301327 20240810-18:46:59 test_perplexity 201 model 7 1.1597141599847811 20240810-18:47:02 test_perplexity 201 model 6 1.1606651170554623 20240810-18:52:46 test_accuracy 201 model 7 val 1559 / 1624 20240810-18:52:54 test_accuracy 201 model 6 val 1491 / 1572 20240810-18:52:56 wrote gpt_006.pth 20240810-18:52:57 wrote gpt_007.pth 20240810-18:53:29 wrote non_validated_0201_06.png 20240810-18:54:02 wrote non_validated_0201_07.png 20240810-18:54:02 wrote state.pth 20240810-18:54:02 --- epoch 202 ---------------------------------------- 20240810-18:54:02 current_test_accuracies 0.9530 0.9593 0.9602 0.9526 0.9597 0.9495 0.9485 0.9600 0.0000 0.0000 20240810-18:54:02 training model 8 20240810-18:54:02 training model 9 20240810-18:57:44 train_perplexity 202 model 8 1.1606851166317864 20240810-18:57:44 train_perplexity 202 model 9 1.1610553597525692 20240810-18:57:52 test_perplexity 202 model 8 1.1586359203184315 20240810-18:57:52 test_perplexity 202 model 9 1.1608601322822671 20240810-19:03:39 test_accuracy 202 model 8 val 1547 / 1611 20240810-19:03:42 test_accuracy 202 model 9 val 1537 / 1595 20240810-19:03:44 wrote gpt_008.pth 20240810-19:03:45 wrote gpt_009.pth 20240810-19:04:17 wrote non_validated_0202_08.png 20240810-19:04:50 wrote non_validated_0202_09.png 20240810-19:04:50 wrote state.pth 20240810-19:04:50 --- epoch 203 ---------------------------------------- 20240810-19:04:50 current_test_accuracies 0.9530 0.9593 0.9602 0.9526 0.9597 0.9495 0.9485 0.9600 0.9603 0.9636 20240810-19:04:50 training model 6 20240810-19:04:50 training model 5 20240810-19:08:33 train_perplexity 203 model 5 1.1607269599950054 20240810-19:08:34 train_perplexity 203 model 6 1.1603651981154204 20240810-19:08:41 test_perplexity 203 model 5 1.158865957683398 20240810-19:08:41 test_perplexity 203 model 6 1.1602631553763336 20240810-19:14:26 test_accuracy 203 model 6 val 1577 / 1641 20240810-19:14:35 test_accuracy 203 model 5 val 1498 / 1572 20240810-19:14:37 wrote gpt_006.pth 20240810-19:14:37 wrote gpt_005.pth 20240810-19:15:10 wrote non_validated_0203_06.png 20240810-19:15:43 wrote non_validated_0203_05.png 20240810-19:15:44 wrote state.pth 20240810-19:15:44 --- epoch 204 ---------------------------------------- 20240810-19:15:44 current_test_accuracies 0.9530 0.9593 0.9602 0.9526 0.9597 0.9529 0.9610 0.9600 0.9603 0.9636 20240810-19:19:08 keep c_quizzes model 0 validated 24 / 420 (5.71%) nb_accumulated 24 / 420 (finishes Sat 20:15 -- 422/h) 20240810-19:22:09 keep c_quizzes model 9 validated 27 / 420 (6.43%) nb_accumulated 51 / 420 (finishes Sat 20:08 -- 476/h) 20240810-19:25:10 keep c_quizzes model 9 validated 22 / 420 (5.24%) nb_accumulated 73 / 420 (finishes Sat 20:10 -- 463/h) 20240810-19:28:11 keep c_quizzes model 2 validated 23 / 420 (5.48%) nb_accumulated 96 / 420 (finishes Sat 20:10 -- 462/h) 20240810-19:31:12 keep c_quizzes model 5 validated 13 / 420 (3.10%) nb_accumulated 109 / 420 (finishes Sat 20:15 -- 422/h) 20240810-19:34:14 keep c_quizzes model 7 validated 20 / 420 (4.76%) nb_accumulated 129 / 420 (finishes Sat 20:16 -- 418/h) 20240810-19:37:16 keep c_quizzes model 4 validated 24 / 420 (5.71%) nb_accumulated 153 / 420 (finishes Sat 20:14 -- 426/h) 20240810-19:40:17 keep c_quizzes model 8 validated 17 / 420 (4.05%) nb_accumulated 170 / 420 (finishes Sat 20:16 -- 415/h) 20240810-19:43:18 keep c_quizzes model 3 validated 22 / 420 (5.24%) nb_accumulated 192 / 420 (finishes Sat 20:16 -- 417/h) 20240810-19:46:18 keep c_quizzes model 2 validated 19 / 420 (4.52%) nb_accumulated 211 / 420 (finishes Sat 20:16 -- 414/h) 20240810-19:49:19 keep c_quizzes model 9 validated 23 / 420 (5.48%) nb_accumulated 234 / 420 (finishes Sat 20:16 -- 417/h) 20240810-19:52:20 keep c_quizzes model 6 validated 20 / 420 (4.76%) nb_accumulated 254 / 420 (finishes Sat 20:16 -- 416/h) 20240810-19:55:21 keep c_quizzes model 3 validated 22 / 420 (5.24%) nb_accumulated 276 / 420 (finishes Sat 20:16 -- 417/h) 20240810-19:58:22 keep c_quizzes model 2 validated 22 / 420 (5.24%) nb_accumulated 298 / 420 (finishes Sat 20:15 -- 419/h) 20240810-20:01:23 keep c_quizzes model 6 validated 20 / 420 (4.76%) nb_accumulated 318 / 420 (finishes Sat 20:16 -- 417/h) 20240810-20:04:24 keep c_quizzes model 7 validated 19 / 420 (4.52%) nb_accumulated 337 / 420 (finishes Sat 20:16 -- 415/h) 20240810-20:07:25 keep c_quizzes model 7 validated 23 / 420 (5.48%) nb_accumulated 360 / 420 (finishes Sat 20:16 -- 417/h) 20240810-20:10:26 keep c_quizzes model 0 validated 19 / 420 (4.52%) nb_accumulated 379 / 420 (finishes Sat 20:16 -- 415/h) 20240810-20:13:27 keep c_quizzes model 4 validated 28 / 420 (6.67%) nb_accumulated 407 / 420 (finishes Sat 20:15 -- 423/h) 20240810-20:16:28 keep c_quizzes model 1 validated 18 / 420 (4.29%) nb_accumulated 425 / 420 (finishes now! -- 419/h) 20240810-20:16:41 wrote c_quizzes.pth 20240810-20:16:41 training model 0 20240810-20:16:41 training model 1 20240810-20:20:23 train_perplexity 204 model 0 1.1604592974094443 20240810-20:20:23 train_perplexity 204 model 1 1.1611875526984894 20240810-20:20:30 test_perplexity 204 model 0 1.1586181547484227 20240810-20:20:30 test_perplexity 204 model 1 1.1602985118203646 20240810-20:26:24 test_accuracy 204 model 0 val 1510 / 1605 20240810-20:26:26 test_accuracy 204 model 1 val 1538 / 1599 20240810-20:26:28 wrote gpt_000.pth 20240810-20:26:29 wrote gpt_001.pth 20240810-20:27:01 wrote non_validated_0204_00.png 20240810-20:27:34 wrote non_validated_0204_01.png 20240810-20:27:34 wrote state.pth 20240810-20:27:34 --- epoch 205 ---------------------------------------- 20240810-20:27:34 current_test_accuracies 0.9408 0.9619 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240810-20:27:34 training model 2 20240810-20:27:34 training model 3 20240810-20:31:17 train_perplexity 205 model 3 1.1619573057357686 20240810-20:31:19 train_perplexity 205 model 2 1.1610126008729689 20240810-20:31:24 test_perplexity 205 model 3 1.1605270302018953 20240810-20:31:25 test_perplexity 205 model 2 1.1618799778841973 20240810-20:37:05 test_accuracy 205 model 2 val 1571 / 1642 20240810-20:37:12 test_accuracy 205 model 3 val 1515 / 1586 20240810-20:37:14 wrote gpt_002.pth 20240810-20:37:15 wrote gpt_003.pth 20240810-20:37:48 wrote non_validated_0205_02.png 20240810-20:38:21 wrote non_validated_0205_03.png 20240810-20:38:21 wrote state.pth 20240810-20:38:21 --- epoch 206 ---------------------------------------- 20240810-20:38:21 current_test_accuracies 0.9408 0.9619 0.9568 0.9552 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240810-20:38:21 training model 4 20240810-20:38:21 training model 5 20240810-20:42:03 train_perplexity 206 model 5 1.1606083822508655 20240810-20:42:06 train_perplexity 206 model 4 1.1612132096311432 20240810-20:42:10 test_perplexity 206 model 5 1.1607352200959948 20240810-20:42:12 test_perplexity 206 model 4 1.1617778911072865 20240810-20:47:59 test_accuracy 206 model 5 val 1551 / 1623 20240810-20:48:03 test_accuracy 206 model 4 val 1540 / 1607 20240810-20:48:05 wrote gpt_004.pth 20240810-20:48:05 wrote gpt_005.pth 20240810-20:48:38 wrote non_validated_0206_04.png 20240810-20:49:10 wrote non_validated_0206_05.png 20240810-20:49:10 wrote state.pth 20240810-20:49:10 --- epoch 207 ---------------------------------------- 20240810-20:49:10 current_test_accuracies 0.9408 0.9619 0.9568 0.9552 0.9583 0.9556 0.0000 0.0000 0.0000 0.0000 20240810-20:49:10 training model 6 20240810-20:49:10 training model 7 20240810-20:52:53 train_perplexity 207 model 7 1.1605883928923992 20240810-20:52:54 train_perplexity 207 model 6 1.1605660955991115 20240810-20:53:01 test_perplexity 207 model 7 1.1613469625042052 20240810-20:53:01 test_perplexity 207 model 6 1.15883084045118 20240810-20:58:46 test_accuracy 207 model 7 val 1571 / 1624 20240810-20:58:53 test_accuracy 207 model 6 val 1501 / 1572 20240810-20:58:55 wrote gpt_006.pth 20240810-20:58:56 wrote gpt_007.pth 20240810-20:59:29 wrote non_validated_0207_06.png 20240810-21:00:01 wrote non_validated_0207_07.png 20240810-21:00:01 wrote state.pth 20240810-21:00:01 --- epoch 208 ---------------------------------------- 20240810-21:00:01 current_test_accuracies 0.9408 0.9619 0.9568 0.9552 0.9583 0.9556 0.9548 0.9674 0.0000 0.0000 20240810-21:00:01 training model 8 20240810-21:00:01 training model 9 20240810-21:03:44 train_perplexity 208 model 9 1.1611232832543386 20240810-21:03:44 train_perplexity 208 model 8 1.1617804497891808 20240810-21:03:51 test_perplexity 208 model 9 1.1609142636076375 20240810-21:03:51 test_perplexity 208 model 8 1.1617833047485489 20240810-21:09:39 test_accuracy 208 model 8 val 1544 / 1610 20240810-21:09:44 test_accuracy 208 model 9 val 1525 / 1586 20240810-21:09:46 wrote gpt_008.pth 20240810-21:09:46 wrote gpt_009.pth 20240810-21:10:19 wrote non_validated_0208_08.png 20240810-21:10:52 wrote non_validated_0208_09.png 20240810-21:10:52 wrote state.pth 20240810-21:10:52 --- epoch 209 ---------------------------------------- 20240810-21:10:52 current_test_accuracies 0.9408 0.9619 0.9568 0.9552 0.9583 0.9556 0.9548 0.9674 0.9590 0.9615 20240810-21:10:52 training model 0 20240810-21:10:52 training model 6 20240810-21:14:34 train_perplexity 209 model 6 1.1601458859917233 20240810-21:14:37 train_perplexity 209 model 0 1.1604991928119122 20240810-21:14:41 test_perplexity 209 model 6 1.1603939751917391 20240810-21:14:43 test_perplexity 209 model 0 1.1598003314413223 20240810-21:20:34 test_accuracy 209 model 0 val 1538 / 1619 20240810-21:20:39 test_accuracy 209 model 6 val 1506 / 1577 20240810-21:20:41 wrote gpt_000.pth 20240810-21:20:41 wrote gpt_006.pth 20240810-21:21:14 wrote non_validated_0209_00.png 20240810-21:21:47 wrote non_validated_0209_06.png 20240810-21:21:47 wrote state.pth 20240810-21:21:47 --- epoch 210 ---------------------------------------- 20240810-21:21:47 current_test_accuracies 0.9500 0.9619 0.9568 0.9552 0.9583 0.9556 0.9550 0.9674 0.9590 0.9615 20240810-21:21:47 training model 0 20240810-21:21:47 training model 6 20240810-21:25:29 train_perplexity 210 model 6 1.160544781445734 20240810-21:25:31 train_perplexity 210 model 0 1.1602098801028013 20240810-21:25:37 test_perplexity 210 model 6 1.1599150657084567 20240810-21:25:37 test_perplexity 210 model 0 1.1594184275625792 20240810-21:31:22 test_accuracy 210 model 6 val 1578 / 1644 20240810-21:31:29 test_accuracy 210 model 0 val 1553 / 1607 20240810-21:31:31 wrote gpt_000.pth 20240810-21:31:31 wrote gpt_006.pth 20240810-21:32:04 wrote non_validated_0210_00.png 20240810-21:32:36 wrote non_validated_0210_06.png 20240810-21:32:36 wrote state.pth 20240810-21:32:36 --- epoch 211 ---------------------------------------- 20240810-21:32:36 current_test_accuracies 0.9664 0.9619 0.9568 0.9552 0.9583 0.9556 0.9599 0.9674 0.9590 0.9615 20240810-21:35:57 keep c_quizzes model 0 validated 23 / 420 (5.48%) nb_accumulated 23 / 420 (finishes Sat 22:33 -- 412/h) 20240810-21:38:57 keep c_quizzes model 8 validated 25 / 420 (5.95%) nb_accumulated 48 / 420 (finishes Sat 22:28 -- 453/h) 20240810-21:41:57 keep c_quizzes model 6 validated 25 / 420 (5.95%) nb_accumulated 73 / 420 (finishes Sat 22:26 -- 468/h) 20240810-21:44:56 keep c_quizzes model 8 validated 25 / 420 (5.95%) nb_accumulated 98 / 420 (finishes Sat 22:25 -- 476/h) 20240810-21:47:55 keep c_quizzes model 0 validated 23 / 420 (5.48%) nb_accumulated 121 / 420 (finishes Sat 22:25 -- 473/h) 20240810-21:50:56 keep c_quizzes model 7 validated 23 / 420 (5.48%) nb_accumulated 144 / 420 (finishes Sat 22:26 -- 471/h) 20240810-21:53:55 keep c_quizzes model 7 validated 18 / 420 (4.29%) nb_accumulated 162 / 420 (finishes Sat 22:27 -- 456/h) 20240810-21:56:54 keep c_quizzes model 1 validated 19 / 420 (4.52%) nb_accumulated 181 / 420 (finishes Sat 22:28 -- 447/h) 20240810-21:59:53 keep c_quizzes model 3 validated 23 / 420 (5.48%) nb_accumulated 204 / 420 (finishes Sat 22:28 -- 448/h) 20240810-22:02:53 keep c_quizzes model 2 validated 25 / 420 (5.95%) nb_accumulated 229 / 420 (finishes Sat 22:28 -- 453/h) 20240810-22:05:52 keep c_quizzes model 9 validated 28 / 420 (6.67%) nb_accumulated 257 / 420 (finishes Sat 22:26 -- 463/h) 20240810-22:08:51 keep c_quizzes model 9 validated 28 / 420 (6.67%) nb_accumulated 285 / 420 (finishes Sat 22:26 -- 471/h) 20240810-22:11:50 keep c_quizzes model 8 validated 24 / 420 (5.71%) nb_accumulated 309 / 420 (finishes Sat 22:25 -- 472/h) 20240810-22:14:48 keep c_quizzes model 0 validated 23 / 420 (5.48%) nb_accumulated 332 / 420 (finishes Sat 22:26 -- 472/h) 20240810-22:17:48 keep c_quizzes model 4 validated 16 / 420 (3.81%) nb_accumulated 348 / 420 (finishes Sat 22:27 -- 461/h) 20240810-22:20:47 keep c_quizzes model 8 validated 23 / 420 (5.48%) nb_accumulated 371 / 420 (finishes Sat 22:27 -- 461/h) 20240810-22:23:47 keep c_quizzes model 0 validated 26 / 420 (6.19%) nb_accumulated 397 / 420 (finishes Sat 22:26 -- 465/h) 20240810-22:26:45 keep c_quizzes model 8 validated 29 / 420 (6.90%) nb_accumulated 426 / 420 (finishes now! -- 472/h) 20240810-22:26:59 wrote c_quizzes.pth 20240810-22:26:59 training model 0 20240810-22:26:59 training model 1 20240810-22:30:40 train_perplexity 211 model 1 1.1613285883567002 20240810-22:30:41 train_perplexity 211 model 0 1.1608405332495582 20240810-22:30:48 test_perplexity 211 model 1 1.1599576592200873 20240810-22:30:48 test_perplexity 211 model 0 1.1622281149336133 20240810-22:36:37 test_accuracy 211 model 0 val 1533 / 1603 20240810-22:36:38 test_accuracy 211 model 1 val 1525 / 1601 20240810-22:36:40 wrote gpt_000.pth 20240810-22:36:41 wrote gpt_001.pth 20240810-22:37:13 wrote non_validated_0211_00.png 20240810-22:37:46 wrote non_validated_0211_01.png 20240810-22:37:46 wrote state.pth 20240810-22:37:46 --- epoch 212 ---------------------------------------- 20240810-22:37:46 current_test_accuracies 0.9563 0.9525 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240810-22:37:46 training model 2 20240810-22:37:46 training model 3 20240810-22:41:28 train_perplexity 212 model 3 1.1619409473370041 20240810-22:41:30 train_perplexity 212 model 2 1.1617415752872404 20240810-22:41:35 test_perplexity 212 model 3 1.1622375947127512 20240810-22:41:36 test_perplexity 212 model 2 1.1602055431296114 20240810-22:47:20 test_accuracy 212 model 2 val 1539 / 1620 20240810-22:47:24 test_accuracy 212 model 3 val 1526 / 1602 20240810-22:47:25 wrote gpt_002.pth 20240810-22:47:26 wrote gpt_003.pth 20240810-22:47:59 wrote non_validated_0212_02.png 20240810-22:48:32 wrote non_validated_0212_03.png 20240810-22:48:32 wrote state.pth 20240810-22:48:32 --- epoch 213 ---------------------------------------- 20240810-22:48:32 current_test_accuracies 0.9563 0.9525 0.9500 0.9526 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240810-22:48:32 training model 4 20240810-22:48:32 training model 5 20240810-22:52:14 train_perplexity 213 model 5 1.1608684789196368 20240810-22:52:17 train_perplexity 213 model 4 1.1615702625481215 20240810-22:52:21 test_perplexity 213 model 5 1.1608705421477823 20240810-22:52:23 test_perplexity 213 model 4 1.1621393239625775 20240810-22:58:19 test_accuracy 213 model 5 val 1516 / 1604 20240810-22:58:21 test_accuracy 213 model 4 val 1528 / 1590 20240810-22:58:23 wrote gpt_004.pth 20240810-22:58:23 wrote gpt_005.pth 20240810-22:58:56 wrote non_validated_0213_04.png 20240810-22:59:28 wrote non_validated_0213_05.png 20240810-22:59:28 wrote state.pth 20240810-22:59:28 --- epoch 214 ---------------------------------------- 20240810-22:59:28 current_test_accuracies 0.9563 0.9525 0.9500 0.9526 0.9610 0.9451 0.0000 0.0000 0.0000 0.0000 20240810-22:59:28 training model 6 20240810-22:59:28 training model 7 20240810-23:03:11 train_perplexity 214 model 7 1.1612888741131184 20240810-23:03:12 train_perplexity 214 model 6 1.161052592681712 20240810-23:03:18 test_perplexity 214 model 7 1.1603928351607038 20240810-23:03:19 test_perplexity 214 model 6 1.163136955039111 20240810-23:09:09 test_accuracy 214 model 6 val 1527 / 1601 20240810-23:09:10 test_accuracy 214 model 7 val 1513 / 1593 20240810-23:09:12 wrote gpt_006.pth 20240810-23:09:13 wrote gpt_007.pth 20240810-23:09:46 wrote non_validated_0214_06.png 20240810-23:10:18 wrote non_validated_0214_07.png 20240810-23:10:18 wrote state.pth 20240810-23:10:18 --- epoch 215 ---------------------------------------- 20240810-23:10:18 current_test_accuracies 0.9563 0.9525 0.9500 0.9526 0.9610 0.9451 0.9538 0.9498 0.0000 0.0000 20240810-23:10:18 training model 8 20240810-23:10:18 training model 9 20240810-23:14:00 train_perplexity 215 model 9 1.1617609041186805 20240810-23:14:03 train_perplexity 215 model 8 1.1620300872497251 20240810-23:14:07 test_perplexity 215 model 9 1.1609535241770923 20240810-23:14:09 test_perplexity 215 model 8 1.1615101714206244 20240810-23:19:59 test_accuracy 215 model 9 val 1545 / 1622 20240810-23:20:02 test_accuracy 215 model 8 val 1513 / 1607 20240810-23:20:04 wrote gpt_008.pth 20240810-23:20:05 wrote gpt_009.pth 20240810-23:20:39 wrote non_validated_0215_08.png 20240810-23:21:13 wrote non_validated_0215_09.png 20240810-23:21:13 wrote state.pth 20240810-23:21:13 --- epoch 216 ---------------------------------------- 20240810-23:21:13 current_test_accuracies 0.9563 0.9525 0.9500 0.9526 0.9610 0.9451 0.9538 0.9498 0.9415 0.9525 20240810-23:21:13 training model 8 20240810-23:21:13 training model 5 20240810-23:24:56 train_perplexity 216 model 5 1.1603851454163974 20240810-23:24:56 train_perplexity 216 model 8 1.1615397013143394 20240810-23:25:03 test_perplexity 216 model 5 1.1591025692923098 20240810-23:25:03 test_perplexity 216 model 8 1.1603487021554881 20240810-23:30:56 test_accuracy 216 model 8 val 1536 / 1613 20240810-23:31:02 test_accuracy 216 model 5 val 1514 / 1573 20240810-23:31:04 wrote gpt_008.pth 20240810-23:31:04 wrote gpt_005.pth 20240810-23:31:38 wrote non_validated_0216_08.png 20240810-23:32:11 wrote non_validated_0216_05.png 20240810-23:32:11 wrote state.pth 20240810-23:32:11 --- epoch 217 ---------------------------------------- 20240810-23:32:11 current_test_accuracies 0.9563 0.9525 0.9500 0.9526 0.9610 0.9625 0.9538 0.9498 0.9523 0.9525 20240810-23:32:11 training model 7 20240810-23:32:11 training model 2 20240810-23:35:54 train_perplexity 217 model 2 1.1614138809099743 20240810-23:35:56 train_perplexity 217 model 7 1.1610450522731404 20240810-23:36:02 test_perplexity 217 model 2 1.1602461618789628 20240810-23:36:02 test_perplexity 217 model 7 1.1597257487400516 20240810-23:42:05 test_accuracy 217 model 7 val 1510 / 1581 20240810-23:42:05 test_accuracy 217 model 2 val 1533 / 1598 20240810-23:42:08 wrote gpt_007.pth 20240810-23:42:08 wrote gpt_002.pth 20240810-23:42:44 wrote non_validated_0217_07.png 20240810-23:43:16 wrote non_validated_0217_02.png 20240810-23:43:17 wrote state.pth 20240810-23:43:17 --- epoch 218 ---------------------------------------- 20240810-23:43:17 current_test_accuracies 0.9563 0.9525 0.9593 0.9526 0.9610 0.9625 0.9538 0.9551 0.9523 0.9525 20240810-23:46:40 keep c_quizzes model 1 validated 25 / 420 (5.95%) nb_accumulated 25 / 420 (finishes Sun 00:40 -- 443/h) 20240810-23:49:42 keep c_quizzes model 8 validated 23 / 420 (5.48%) nb_accumulated 48 / 420 (finishes Sun 00:39 -- 448/h) 20240810-23:52:48 keep c_quizzes model 2 validated 25 / 420 (5.95%) nb_accumulated 73 / 420 (finishes Sun 00:38 -- 460/h) 20240810-23:55:50 keep c_quizzes model 5 validated 28 / 420 (6.67%) nb_accumulated 101 / 420 (finishes Sun 00:35 -- 482/h) 20240810-23:58:55 keep c_quizzes model 9 validated 28 / 420 (6.67%) nb_accumulated 129 / 420 (finishes Sun 00:34 -- 494/h) 20240811-00:01:59 keep c_quizzes model 9 validated 13 / 420 (3.10%) nb_accumulated 142 / 420 (finishes Sun 00:38 -- 455/h) 20240811-00:05:03 keep c_quizzes model 6 validated 33 / 420 (7.86%) nb_accumulated 175 / 420 (finishes Sun 00:35 -- 482/h) 20240811-00:08:06 keep c_quizzes model 7 validated 21 / 420 (5.00%) nb_accumulated 196 / 420 (finishes Sun 00:36 -- 473/h) 20240811-00:11:09 keep c_quizzes model 8 validated 20 / 420 (4.76%) nb_accumulated 216 / 420 (finishes Sun 00:37 -- 464/h) 20240811-00:14:12 keep c_quizzes model 9 validated 26 / 420 (6.19%) nb_accumulated 242 / 420 (finishes Sun 00:36 -- 469/h) 20240811-00:17:14 keep c_quizzes model 3 validated 32 / 420 (7.62%) nb_accumulated 274 / 420 (finishes Sun 00:35 -- 484/h) 20240811-00:20:19 keep c_quizzes model 6 validated 36 / 420 (8.57%) nb_accumulated 310 / 420 (finishes Sun 00:33 -- 502/h) 20240811-00:23:24 keep c_quizzes model 8 validated 33 / 420 (7.86%) nb_accumulated 343 / 420 (finishes Sun 00:32 -- 512/h) 20240811-00:26:27 keep c_quizzes model 5 validated 28 / 420 (6.67%) nb_accumulated 371 / 420 (finishes Sun 00:32 -- 515/h) 20240811-00:29:28 keep c_quizzes model 8 validated 26 / 420 (6.19%) nb_accumulated 397 / 420 (finishes Sun 00:32 -- 515/h) 20240811-00:32:31 keep c_quizzes model 1 validated 25 / 420 (5.95%) nb_accumulated 422 / 420 (finishes now! -- 514/h) 20240811-00:32:45 wrote c_quizzes.pth 20240811-00:32:45 training model 0 20240811-00:32:45 training model 1 20240811-00:36:26 train_perplexity 218 model 1 1.1621433203304932 20240811-00:36:31 train_perplexity 218 model 0 1.1614147952947946 20240811-00:36:32 test_perplexity 218 model 1 1.1607822412632494 20240811-00:36:35 test_perplexity 218 model 0 1.1600212920548654 20240811-00:42:26 test_accuracy 218 model 1 val 1515 / 1594 20240811-00:42:30 test_accuracy 218 model 0 val 1508 / 1578 20240811-00:42:32 wrote gpt_000.pth 20240811-00:42:32 wrote gpt_001.pth 20240811-00:43:05 wrote non_validated_0218_00.png 20240811-00:43:38 wrote non_validated_0218_01.png 20240811-00:43:38 wrote state.pth 20240811-00:43:38 --- epoch 219 ---------------------------------------- 20240811-00:43:38 current_test_accuracies 0.9556 0.9504 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240811-00:43:38 training model 2 20240811-00:43:38 training model 3 20240811-00:47:19 train_perplexity 219 model 3 1.1626864431798956 20240811-00:47:24 train_perplexity 219 model 2 1.1617437051198838 20240811-00:47:26 test_perplexity 219 model 3 1.162293708189107 20240811-00:47:29 test_perplexity 219 model 2 1.161192113778741 20240811-00:53:17 test_accuracy 219 model 2 val 1505 / 1607 20240811-00:53:18 test_accuracy 219 model 3 val 1500 / 1593 20240811-00:53:20 wrote gpt_002.pth 20240811-00:53:20 wrote gpt_003.pth 20240811-00:53:53 wrote non_validated_0219_02.png 20240811-00:54:25 wrote non_validated_0219_03.png 20240811-00:54:26 wrote state.pth 20240811-00:54:26 --- epoch 220 ---------------------------------------- 20240811-00:54:26 current_test_accuracies 0.9556 0.9504 0.9365 0.9416 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240811-00:54:26 training model 4 20240811-00:54:26 training model 5 20240811-00:58:08 train_perplexity 220 model 5 1.1614956258104208 20240811-00:58:10 train_perplexity 220 model 4 1.1617783529980794 20240811-00:58:15 test_perplexity 220 model 5 1.1624164282711873 20240811-00:58:16 test_perplexity 220 model 4 1.161014801360749 20240811-01:04:12 test_accuracy 220 model 5 val 1544 / 1606 20240811-01:04:15 test_accuracy 220 model 4 val 1522 / 1581 20240811-01:04:17 wrote gpt_004.pth 20240811-01:04:17 wrote gpt_005.pth 20240811-01:04:50 wrote non_validated_0220_04.png 20240811-01:05:23 wrote non_validated_0220_05.png 20240811-01:05:23 wrote state.pth 20240811-01:05:23 --- epoch 221 ---------------------------------------- 20240811-01:05:23 current_test_accuracies 0.9556 0.9504 0.9365 0.9416 0.9627 0.9614 0.0000 0.0000 0.0000 0.0000 20240811-01:05:23 training model 6 20240811-01:05:23 training model 7 20240811-01:09:04 train_perplexity 221 model 7 1.1614685358721604 20240811-01:09:08 train_perplexity 221 model 6 1.1614657323909552 20240811-01:09:11 test_perplexity 221 model 7 1.1601945045324566 20240811-01:09:14 test_perplexity 221 model 6 1.159242592144664 20240811-01:15:05 test_accuracy 221 model 6 val 1523 / 1590 20240811-01:15:09 test_accuracy 221 model 7 val 1522 / 1578 20240811-01:15:11 wrote gpt_006.pth 20240811-01:15:12 wrote gpt_007.pth 20240811-01:15:44 wrote non_validated_0221_06.png 20240811-01:16:17 wrote non_validated_0221_07.png 20240811-01:16:17 wrote state.pth 20240811-01:16:17 --- epoch 222 ---------------------------------------- 20240811-01:16:17 current_test_accuracies 0.9556 0.9504 0.9365 0.9416 0.9627 0.9614 0.9579 0.9645 0.0000 0.0000 20240811-01:16:17 training model 8 20240811-01:16:17 training model 9 20240811-01:20:00 train_perplexity 222 model 9 1.1622835476362263 20240811-01:20:00 train_perplexity 222 model 8 1.162181622250171 20240811-01:20:07 test_perplexity 222 model 9 1.1606427334241098 20240811-01:20:07 test_perplexity 222 model 8 1.162557289513782 20240811-01:25:55 test_accuracy 222 model 8 val 1544 / 1614 20240811-01:25:59 test_accuracy 222 model 9 val 1507 / 1599 20240811-01:26:01 wrote gpt_008.pth 20240811-01:26:02 wrote gpt_009.pth 20240811-01:26:35 wrote non_validated_0222_08.png 20240811-01:27:07 wrote non_validated_0222_09.png 20240811-01:27:07 wrote state.pth 20240811-01:27:07 --- epoch 223 ---------------------------------------- 20240811-01:27:07 current_test_accuracies 0.9556 0.9504 0.9365 0.9416 0.9627 0.9614 0.9579 0.9645 0.9566 0.9425 20240811-01:27:07 training model 2 20240811-01:27:07 training model 3 20240811-01:30:49 train_perplexity 223 model 3 1.1621606020053854 20240811-01:30:53 train_perplexity 223 model 2 1.1613983652388864 20240811-01:30:56 test_perplexity 223 model 3 1.1617428273708559 20240811-01:30:58 test_perplexity 223 model 2 1.1605872786817624 20240811-01:36:42 test_accuracy 223 model 3 val 1532 / 1629 20240811-01:36:49 test_accuracy 223 model 2 val 1488 / 1574 20240811-01:36:51 wrote gpt_002.pth 20240811-01:36:51 wrote gpt_003.pth 20240811-01:37:24 wrote non_validated_0223_02.png 20240811-01:37:56 wrote non_validated_0223_03.png 20240811-01:37:56 wrote state.pth 20240811-01:37:56 --- epoch 224 ---------------------------------------- 20240811-01:37:56 current_test_accuracies 0.9556 0.9504 0.9454 0.9405 0.9627 0.9614 0.9579 0.9645 0.9566 0.9425 20240811-01:37:56 training model 3 20240811-01:37:56 training model 9 20240811-01:41:38 train_perplexity 224 model 9 1.1623343452268156 20240811-01:41:44 train_perplexity 224 model 3 1.162289367155469 20240811-01:41:44 test_perplexity 224 model 9 1.160789725700701 20240811-01:41:48 test_perplexity 224 model 3 1.1617570152720371 20240811-01:47:33 test_accuracy 224 model 9 val 1549 / 1623 20240811-01:47:41 test_accuracy 224 model 3 val 1513 / 1576 20240811-01:47:43 wrote gpt_003.pth 20240811-01:47:44 wrote gpt_009.pth 20240811-01:48:16 wrote non_validated_0224_03.png 20240811-01:48:48 wrote non_validated_0224_09.png 20240811-01:48:49 wrote state.pth 20240811-01:48:49 --- epoch 225 ---------------------------------------- 20240811-01:48:49 current_test_accuracies 0.9556 0.9504 0.9454 0.9600 0.9627 0.9614 0.9579 0.9645 0.9566 0.9544 20240811-01:48:49 training model 2 20240811-01:48:49 training model 1 20240811-01:52:31 train_perplexity 225 model 2 1.1615985384430552 20240811-01:52:31 train_perplexity 225 model 1 1.1621624211280572 20240811-01:52:39 test_perplexity 225 model 2 1.1615417647348207 20240811-01:52:39 test_perplexity 225 model 1 1.1620033393151734 20240811-01:58:26 test_accuracy 225 model 1 val 1530 / 1611 20240811-01:58:28 test_accuracy 225 model 2 val 1522 / 1601 20240811-01:58:30 wrote gpt_002.pth 20240811-01:58:31 wrote gpt_001.pth 20240811-01:59:04 wrote non_validated_0225_02.png 20240811-01:59:36 wrote non_validated_0225_01.png 20240811-01:59:37 wrote state.pth 20240811-01:59:37 --- epoch 226 ---------------------------------------- 20240811-01:59:37 current_test_accuracies 0.9556 0.9497 0.9507 0.9600 0.9627 0.9614 0.9579 0.9645 0.9566 0.9544 20240811-01:59:37 training model 1 20240811-01:59:37 training model 2 20240811-02:03:19 train_perplexity 226 model 2 1.1605864144797877 20240811-02:03:22 train_perplexity 226 model 1 1.1615315958540358 20240811-02:03:25 test_perplexity 226 model 2 1.1610447514534419 20240811-02:03:27 test_perplexity 226 model 1 1.161282064240013 20240811-02:09:19 test_accuracy 226 model 1 val 1543 / 1617 20240811-02:09:22 test_accuracy 226 model 2 val 1497 / 1590 20240811-02:09:24 wrote gpt_001.pth 20240811-02:09:24 wrote gpt_002.pth 20240811-02:09:57 wrote non_validated_0226_01.png 20240811-02:10:30 wrote non_validated_0226_02.png 20240811-02:10:30 wrote state.pth 20240811-02:10:30 --- epoch 227 ---------------------------------------- 20240811-02:10:30 current_test_accuracies 0.9556 0.9542 0.9415 0.9600 0.9627 0.9614 0.9579 0.9645 0.9566 0.9544 20240811-02:10:30 training model 2 20240811-02:10:30 training model 1 20240811-02:14:12 train_perplexity 227 model 1 1.1617018010039937 20240811-02:14:14 train_perplexity 227 model 2 1.161503625042178 20240811-02:14:19 test_perplexity 227 model 1 1.163532696874755 20240811-02:14:20 test_perplexity 227 model 2 1.1611211074434655 20240811-02:20:05 test_accuracy 227 model 2 val 1556 / 1623 20240811-02:20:08 test_accuracy 227 model 1 val 1531 / 1604 20240811-02:20:09 wrote gpt_002.pth 20240811-02:20:10 wrote gpt_001.pth 20240811-02:20:43 wrote non_validated_0227_02.png 20240811-02:21:15 wrote non_validated_0227_01.png 20240811-02:21:15 wrote state.pth 20240811-02:21:15 --- epoch 228 ---------------------------------------- 20240811-02:21:15 current_test_accuracies 0.9556 0.9545 0.9587 0.9600 0.9627 0.9614 0.9579 0.9645 0.9566 0.9544 20240811-02:24:38 keep c_quizzes model 8 validated 23 / 420 (5.48%) nb_accumulated 23 / 420 (finishes Sun 03:23 -- 407/h) 20240811-02:27:41 keep c_quizzes model 9 validated 19 / 420 (4.52%) nb_accumulated 42 / 420 (finishes Sun 03:25 -- 392/h) 20240811-02:30:42 keep c_quizzes model 6 validated 20 / 420 (4.76%) nb_accumulated 62 / 420 (finishes Sun 03:25 -- 394/h) 20240811-02:33:44 keep c_quizzes model 7 validated 23 / 420 (5.48%) nb_accumulated 85 / 420 (finishes Sun 03:22 -- 408/h) 20240811-02:36:45 keep c_quizzes model 7 validated 27 / 420 (6.43%) nb_accumulated 112 / 420 (finishes Sun 03:19 -- 433/h) 20240811-02:39:46 keep c_quizzes model 5 validated 18 / 420 (4.29%) nb_accumulated 130 / 420 (finishes Sun 03:21 -- 421/h) 20240811-02:42:47 keep c_quizzes model 1 validated 24 / 420 (5.71%) nb_accumulated 154 / 420 (finishes Sun 03:19 -- 429/h) 20240811-02:45:53 keep c_quizzes model 0 validated 25 / 420 (5.95%) nb_accumulated 179 / 420 (finishes Sun 03:19 -- 436/h) 20240811-02:49:00 keep c_quizzes model 0 validated 22 / 420 (5.24%) nb_accumulated 201 / 420 (finishes Sun 03:19 -- 434/h) 20240811-02:52:02 keep c_quizzes model 5 validated 29 / 420 (6.90%) nb_accumulated 230 / 420 (finishes Sun 03:17 -- 448/h) 20240811-02:55:03 keep c_quizzes model 7 validated 23 / 420 (5.48%) nb_accumulated 253 / 420 (finishes Sun 03:17 -- 449/h) 20240811-02:58:05 keep c_quizzes model 0 validated 17 / 420 (4.05%) nb_accumulated 270 / 420 (finishes Sun 03:18 -- 439/h) 20240811-03:01:06 keep c_quizzes model 8 validated 26 / 420 (6.19%) nb_accumulated 296 / 420 (finishes Sun 03:17 -- 445/h) 20240811-03:04:07 keep c_quizzes model 3 validated 20 / 420 (4.76%) nb_accumulated 316 / 420 (finishes Sun 03:18 -- 442/h) 20240811-03:07:09 keep c_quizzes model 7 validated 18 / 420 (4.29%) nb_accumulated 334 / 420 (finishes Sun 03:18 -- 436/h) 20240811-03:10:11 keep c_quizzes model 0 validated 25 / 420 (5.95%) nb_accumulated 359 / 420 (finishes Sun 03:18 -- 440/h) 20240811-03:13:12 keep c_quizzes model 2 validated 22 / 420 (5.24%) nb_accumulated 381 / 420 (finishes Sun 03:18 -- 440/h) 20240811-03:16:14 keep c_quizzes model 3 validated 26 / 420 (6.19%) nb_accumulated 407 / 420 (finishes Sun 03:17 -- 444/h) 20240811-03:19:15 keep c_quizzes model 4 validated 22 / 420 (5.24%) nb_accumulated 429 / 420 (finishes now! -- 443/h) 20240811-03:19:28 wrote c_quizzes.pth 20240811-03:19:28 training model 0 20240811-03:19:28 training model 1 20240811-03:23:10 train_perplexity 228 model 1 1.162447105314563 20240811-03:23:10 train_perplexity 228 model 0 1.161888849608787 20240811-03:23:17 test_perplexity 228 model 1 1.1614111068084636 20240811-03:23:17 test_perplexity 228 model 0 1.1627691105489766 20240811-03:29:02 test_accuracy 228 model 0 val 1571 / 1639 20240811-03:29:03 test_accuracy 228 model 1 val 1562 / 1626 20240811-03:29:05 wrote gpt_000.pth 20240811-03:29:06 wrote gpt_001.pth 20240811-03:29:39 wrote non_validated_0228_00.png 20240811-03:30:11 wrote non_validated_0228_01.png 20240811-03:30:11 wrote state.pth 20240811-03:30:11 --- epoch 229 ---------------------------------------- 20240811-03:30:11 current_test_accuracies 0.9585 0.9606 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240811-03:30:11 training model 2 20240811-03:30:11 training model 3 20240811-03:33:54 train_perplexity 229 model 3 1.1628669819014856 20240811-03:33:56 train_perplexity 229 model 2 1.1618251234130286 20240811-03:34:01 test_perplexity 229 model 3 1.1601166452102014 20240811-03:34:02 test_perplexity 229 model 2 1.162815316770938 20240811-03:39:46 test_accuracy 229 model 2 val 1557 / 1630 20240811-03:39:48 test_accuracy 229 model 3 val 1538 / 1611 20240811-03:39:50 wrote gpt_002.pth 20240811-03:39:51 wrote gpt_003.pth 20240811-03:40:24 wrote non_validated_0229_02.png 20240811-03:40:57 wrote non_validated_0229_03.png 20240811-03:40:57 wrote state.pth 20240811-03:40:57 --- epoch 230 ---------------------------------------- 20240811-03:40:57 current_test_accuracies 0.9585 0.9606 0.9552 0.9547 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240811-03:40:57 training model 4 20240811-03:40:57 training model 5 20240811-03:44:39 train_perplexity 230 model 5 1.1617076809409022 20240811-03:44:43 train_perplexity 230 model 4 1.1625032833859363 20240811-03:44:45 test_perplexity 230 model 5 1.1629682985053824 20240811-03:44:48 test_perplexity 230 model 4 1.1639383687914426 20240811-03:50:35 test_accuracy 230 model 4 val 1569 / 1632 20240811-03:50:40 test_accuracy 230 model 5 val 1537 / 1590 20240811-03:50:42 wrote gpt_004.pth 20240811-03:50:42 wrote gpt_005.pth 20240811-03:51:15 wrote non_validated_0230_04.png 20240811-03:51:48 wrote non_validated_0230_05.png 20240811-03:51:48 wrote state.pth 20240811-03:51:48 --- epoch 231 ---------------------------------------- 20240811-03:51:48 current_test_accuracies 0.9585 0.9606 0.9552 0.9547 0.9614 0.9667 0.0000 0.0000 0.0000 0.0000 20240811-03:51:48 training model 6 20240811-03:51:48 training model 7 20240811-03:55:30 train_perplexity 231 model 7 1.162187870987661 20240811-03:55:31 train_perplexity 231 model 6 1.1619047416320563 20240811-03:55:38 test_perplexity 231 model 7 1.1633890013499317 20240811-03:55:38 test_perplexity 231 model 6 1.162803881893126 20240811-04:01:24 test_accuracy 231 model 7 val 1544 / 1613 20240811-04:01:28 test_accuracy 231 model 6 val 1533 / 1593 20240811-04:01:30 wrote gpt_006.pth 20240811-04:01:31 wrote gpt_007.pth 20240811-04:02:04 wrote non_validated_0231_06.png 20240811-04:02:37 wrote non_validated_0231_07.png 20240811-04:02:37 wrote state.pth 20240811-04:02:37 --- epoch 232 ---------------------------------------- 20240811-04:02:37 current_test_accuracies 0.9585 0.9606 0.9552 0.9547 0.9614 0.9667 0.9623 0.9572 0.0000 0.0000 20240811-04:02:37 training model 8 20240811-04:02:37 training model 9 20240811-04:06:20 train_perplexity 232 model 9 1.162487590417099 20240811-04:06:20 train_perplexity 232 model 8 1.1625374638401558 20240811-04:06:27 test_perplexity 232 model 9 1.1622383385507138 20240811-04:06:27 test_perplexity 232 model 8 1.1614281087148384 20240811-04:12:16 test_accuracy 232 model 8 val 1529 / 1606 20240811-04:12:21 test_accuracy 232 model 9 val 1496 / 1575 20240811-04:12:23 wrote gpt_008.pth 20240811-04:12:24 wrote gpt_009.pth 20240811-04:12:56 wrote non_validated_0232_08.png 20240811-04:13:29 wrote non_validated_0232_09.png 20240811-04:13:29 wrote state.pth 20240811-04:13:29 --- epoch 233 ---------------------------------------- 20240811-04:13:29 current_test_accuracies 0.9585 0.9606 0.9552 0.9547 0.9614 0.9667 0.9623 0.9572 0.9521 0.9498 20240811-04:13:29 training model 9 20240811-04:13:29 training model 8 20240811-04:17:11 train_perplexity 233 model 8 1.1619414269207213 20240811-04:17:13 train_perplexity 233 model 9 1.1621758815791905 20240811-04:17:18 test_perplexity 233 model 8 1.16369948835562 20240811-04:17:19 test_perplexity 233 model 9 1.162396359273476 20240811-04:23:11 test_accuracy 233 model 9 val 1533 / 1622 20240811-04:23:13 test_accuracy 233 model 8 val 1537 / 1617 20240811-04:23:15 wrote gpt_009.pth 20240811-04:23:16 wrote gpt_008.pth 20240811-04:23:48 wrote non_validated_0233_09.png 20240811-04:24:21 wrote non_validated_0233_08.png 20240811-04:24:21 wrote state.pth 20240811-04:24:21 --- epoch 234 ---------------------------------------- 20240811-04:24:21 current_test_accuracies 0.9585 0.9606 0.9552 0.9547 0.9614 0.9667 0.9623 0.9572 0.9505 0.9451 20240811-04:24:21 training model 9 20240811-04:24:21 training model 8 20240811-04:28:03 train_perplexity 234 model 8 1.16230348420414 20240811-04:28:05 train_perplexity 234 model 9 1.161919712694283 20240811-04:28:10 test_perplexity 234 model 8 1.1627975425640087 20240811-04:28:12 test_perplexity 234 model 9 1.1617624403931615 20240811-04:34:08 test_accuracy 234 model 8 val 1530 / 1597 20240811-04:34:08 test_accuracy 234 model 9 val 1525 / 1594 20240811-04:34:10 wrote gpt_009.pth 20240811-04:34:11 wrote gpt_008.pth 20240811-04:34:44 wrote non_validated_0234_09.png 20240811-04:35:16 wrote non_validated_0234_08.png 20240811-04:35:16 wrote state.pth 20240811-04:35:16 --- epoch 235 ---------------------------------------- 20240811-04:35:16 current_test_accuracies 0.9585 0.9606 0.9552 0.9547 0.9614 0.9667 0.9623 0.9572 0.9580 0.9567 20240811-04:38:38 keep c_quizzes model 6 validated 26 / 420 (6.19%) nb_accumulated 26 / 420 (finishes Sun 05:29 -- 465/h) 20240811-04:41:39 keep c_quizzes model 4 validated 30 / 420 (7.14%) nb_accumulated 56 / 420 (finishes Sun 05:23 -- 527/h) 20240811-04:44:46 keep c_quizzes model 0 validated 26 / 420 (6.19%) nb_accumulated 82 / 420 (finishes Sun 05:23 -- 518/h) 20240811-04:47:52 keep c_quizzes model 6 validated 22 / 420 (5.24%) nb_accumulated 104 / 420 (finishes Sun 05:26 -- 495/h) 20240811-04:50:53 keep c_quizzes model 5 validated 30 / 420 (7.14%) nb_accumulated 134 / 420 (finishes Sun 05:24 -- 514/h) 20240811-04:53:54 keep c_quizzes model 5 validated 28 / 420 (6.67%) nb_accumulated 162 / 420 (finishes Sun 05:23 -- 521/h) 20240811-04:56:55 keep c_quizzes model 7 validated 29 / 420 (6.90%) nb_accumulated 191 / 420 (finishes Sun 05:22 -- 529/h) 20240811-04:59:57 keep c_quizzes model 3 validated 21 / 420 (5.00%) nb_accumulated 212 / 420 (finishes Sun 05:24 -- 515/h) 20240811-05:02:58 keep c_quizzes model 6 validated 26 / 420 (6.19%) nb_accumulated 238 / 420 (finishes Sun 05:24 -- 515/h) 20240811-05:06:00 keep c_quizzes model 5 validated 19 / 420 (4.52%) nb_accumulated 257 / 420 (finishes Sun 05:25 -- 501/h) 20240811-05:09:00 keep c_quizzes model 5 validated 29 / 420 (6.90%) nb_accumulated 286 / 420 (finishes Sun 05:24 -- 508/h) 20240811-05:12:01 keep c_quizzes model 8 validated 24 / 420 (5.71%) nb_accumulated 310 / 420 (finishes Sun 05:25 -- 506/h) 20240811-05:15:03 keep c_quizzes model 3 validated 27 / 420 (6.43%) nb_accumulated 337 / 420 (finishes Sun 05:24 -- 508/h) 20240811-05:18:12 keep c_quizzes model 6 validated 29 / 420 (6.90%) nb_accumulated 366 / 420 (finishes Sun 05:24 -- 511/h) 20240811-05:21:13 keep c_quizzes model 7 validated 26 / 420 (6.19%) nb_accumulated 392 / 420 (finishes Sun 05:24 -- 511/h) 20240811-05:24:15 keep c_quizzes model 0 validated 23 / 420 (5.48%) nb_accumulated 415 / 420 (finishes Sun 05:24 -- 508/h) 20240811-05:27:16 keep c_quizzes model 8 validated 29 / 420 (6.90%) nb_accumulated 444 / 420 (finishes now! -- 512/h) 20240811-05:27:29 wrote c_quizzes.pth 20240811-05:27:29 training model 0 20240811-05:27:29 training model 1 20240811-05:31:11 train_perplexity 235 model 1 1.1626434526352876 20240811-05:31:11 train_perplexity 235 model 0 1.1623840784176938 20240811-05:31:18 test_perplexity 235 model 1 1.162994331288938 20240811-05:31:18 test_perplexity 235 model 0 1.1641207345564926 20240811-05:37:04 test_accuracy 235 model 1 val 1543 / 1611 20240811-05:37:08 test_accuracy 235 model 0 val 1523 / 1599 20240811-05:37:10 wrote gpt_000.pth 20240811-05:37:11 wrote gpt_001.pth 20240811-05:37:43 wrote non_validated_0235_00.png 20240811-05:38:16 wrote non_validated_0235_01.png 20240811-05:38:16 wrote state.pth 20240811-05:38:16 --- epoch 236 ---------------------------------------- 20240811-05:38:16 current_test_accuracies 0.9525 0.9578 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240811-05:38:16 training model 2 20240811-05:38:16 training model 3 20240811-05:41:59 train_perplexity 236 model 3 1.163152943456139 20240811-05:42:01 train_perplexity 236 model 2 1.162173162946812 20240811-05:42:06 test_perplexity 236 model 3 1.1636620454564857 20240811-05:42:07 test_perplexity 236 model 2 1.1618626096832525 20240811-05:47:57 test_accuracy 236 model 3 val 1512 / 1607 20240811-05:47:58 test_accuracy 236 model 2 val 1530 / 1598 20240811-05:48:00 wrote gpt_002.pth 20240811-05:48:01 wrote gpt_003.pth 20240811-05:48:33 wrote non_validated_0236_02.png 20240811-05:49:06 wrote non_validated_0236_03.png 20240811-05:49:06 wrote state.pth 20240811-05:49:06 --- epoch 237 ---------------------------------------- 20240811-05:49:06 current_test_accuracies 0.9525 0.9578 0.9574 0.9409 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240811-05:49:06 training model 4 20240811-05:49:06 training model 5 20240811-05:52:48 train_perplexity 237 model 5 1.1627190088281212 20240811-05:52:51 train_perplexity 237 model 4 1.162616513153835 20240811-05:52:54 test_perplexity 237 model 5 1.163532415999379 20240811-05:52:56 test_perplexity 237 model 4 1.1657301971640952 20240811-05:58:49 test_accuracy 237 model 4 val 1517 / 1600 20240811-05:58:51 test_accuracy 237 model 5 val 1523 / 1599 20240811-05:58:53 wrote gpt_004.pth 20240811-05:58:53 wrote gpt_005.pth 20240811-05:59:26 wrote non_validated_0237_04.png 20240811-05:59:59 wrote non_validated_0237_05.png 20240811-05:59:59 wrote state.pth 20240811-05:59:59 --- epoch 238 ---------------------------------------- 20240811-05:59:59 current_test_accuracies 0.9525 0.9578 0.9574 0.9409 0.9481 0.9525 0.0000 0.0000 0.0000 0.0000 20240811-05:59:59 training model 6 20240811-05:59:59 training model 7 20240811-06:03:40 train_perplexity 238 model 7 1.1626054781130315 20240811-06:03:46 train_perplexity 238 model 6 1.162109378336152 20240811-06:03:46 test_perplexity 238 model 7 1.1628112446463357 20240811-06:03:50 test_perplexity 238 model 6 1.1625602596236901 20240811-06:09:42 test_accuracy 238 model 7 val 1520 / 1598 20240811-06:09:43 test_accuracy 238 model 6 val 1518 / 1596 20240811-06:09:45 wrote gpt_006.pth 20240811-06:09:46 wrote gpt_007.pth 20240811-06:10:19 wrote non_validated_0238_06.png 20240811-06:10:51 wrote non_validated_0238_07.png 20240811-06:10:51 wrote state.pth 20240811-06:10:51 --- epoch 239 ---------------------------------------- 20240811-06:10:51 current_test_accuracies 0.9525 0.9578 0.9574 0.9409 0.9481 0.9525 0.9511 0.9512 0.0000 0.0000 20240811-06:10:51 training model 8 20240811-06:10:51 training model 9 20240811-06:14:33 train_perplexity 239 model 9 1.1627778484128626 20240811-06:14:34 train_perplexity 239 model 8 1.1628057288689813 20240811-06:14:41 test_perplexity 239 model 9 1.1634438157724798 20240811-06:14:41 test_perplexity 239 model 8 1.1615916103073252 20240811-06:20:32 test_accuracy 239 model 9 val 1531 / 1605 20240811-06:20:34 test_accuracy 239 model 8 val 1512 / 1591 20240811-06:20:36 wrote gpt_008.pth 20240811-06:20:37 wrote gpt_009.pth 20240811-06:21:09 wrote non_validated_0239_08.png 20240811-06:21:42 wrote non_validated_0239_09.png 20240811-06:21:42 wrote state.pth 20240811-06:21:42 --- epoch 240 ---------------------------------------- 20240811-06:21:42 current_test_accuracies 0.9525 0.9578 0.9574 0.9409 0.9481 0.9525 0.9511 0.9512 0.9503 0.9539 20240811-06:21:42 training model 3 20240811-06:21:42 training model 4 20240811-06:25:25 train_perplexity 240 model 3 1.1628799844747277 20240811-06:25:25 train_perplexity 240 model 4 1.1624530850142107 20240811-06:25:32 test_perplexity 240 model 3 1.160675081544942 20240811-06:25:32 test_perplexity 240 model 4 1.1633101005678599 20240811-06:31:23 test_accuracy 240 model 4 val 1542 / 1614 20240811-06:31:29 test_accuracy 240 model 3 val 1517 / 1588 20240811-06:31:30 wrote gpt_003.pth 20240811-06:31:31 wrote gpt_004.pth 20240811-06:32:04 wrote non_validated_0240_03.png 20240811-06:32:36 wrote non_validated_0240_04.png 20240811-06:32:37 wrote state.pth 20240811-06:32:37 --- epoch 241 ---------------------------------------- 20240811-06:32:37 current_test_accuracies 0.9525 0.9578 0.9574 0.9553 0.9554 0.9525 0.9511 0.9512 0.9503 0.9539 20240811-06:36:00 keep c_quizzes model 0 validated 14 / 420 (3.33%) nb_accumulated 14 / 420 (finishes Sun 08:14 -- 247/h) 20240811-06:39:01 keep c_quizzes model 3 validated 23 / 420 (5.48%) nb_accumulated 37 / 420 (finishes Sun 07:45 -- 346/h) 20240811-06:42:00 keep c_quizzes model 1 validated 22 / 420 (5.24%) nb_accumulated 59 / 420 (finishes Sun 07:39 -- 376/h) 20240811-06:45:00 keep c_quizzes model 7 validated 19 / 420 (4.52%) nb_accumulated 78 / 420 (finishes Sun 07:39 -- 377/h) 20240811-06:48:00 keep c_quizzes model 9 validated 17 / 420 (4.05%) nb_accumulated 95 / 420 (finishes Sun 07:40 -- 370/h) 20240811-06:50:59 keep c_quizzes model 5 validated 28 / 420 (6.67%) nb_accumulated 123 / 420 (finishes Sun 07:35 -- 401/h) 20240811-06:53:59 keep c_quizzes model 0 validated 25 / 420 (5.95%) nb_accumulated 148 / 420 (finishes Sun 07:33 -- 415/h) 20240811-06:56:58 keep c_quizzes model 8 validated 19 / 420 (4.52%) nb_accumulated 167 / 420 (finishes Sun 07:33 -- 411/h) 20240811-06:59:57 keep c_quizzes model 1 validated 18 / 420 (4.29%) nb_accumulated 185 / 420 (finishes Sun 07:34 -- 405/h) 20240811-07:02:57 keep c_quizzes model 2 validated 19 / 420 (4.52%) nb_accumulated 204 / 420 (finishes Sun 07:35 -- 403/h) 20240811-07:05:57 keep c_quizzes model 1 validated 26 / 420 (6.19%) nb_accumulated 230 / 420 (finishes Sun 07:33 -- 413/h) 20240811-07:08:56 keep c_quizzes model 8 validated 23 / 420 (5.48%) nb_accumulated 253 / 420 (finishes Sun 07:32 -- 417/h) 20240811-07:11:56 keep c_quizzes model 2 validated 22 / 420 (5.24%) nb_accumulated 275 / 420 (finishes Sun 07:32 -- 419/h) 20240811-07:14:57 keep c_quizzes model 0 validated 25 / 420 (5.95%) nb_accumulated 300 / 420 (finishes Sun 07:31 -- 425/h) 20240811-07:17:57 keep c_quizzes model 0 validated 18 / 420 (4.29%) nb_accumulated 318 / 420 (finishes Sun 07:32 -- 420/h) 20240811-07:20:59 keep c_quizzes model 4 validated 20 / 420 (4.76%) nb_accumulated 338 / 420 (finishes Sun 07:32 -- 419/h) 20240811-07:23:59 keep c_quizzes model 0 validated 24 / 420 (5.71%) nb_accumulated 362 / 420 (finishes Sun 07:32 -- 422/h) 20240811-07:26:59 keep c_quizzes model 2 validated 30 / 420 (7.14%) nb_accumulated 392 / 420 (finishes Sun 07:30 -- 432/h) 20240811-07:29:59 keep c_quizzes model 4 validated 17 / 420 (4.05%) nb_accumulated 409 / 420 (finishes Sun 07:31 -- 427/h) 20240811-07:33:00 keep c_quizzes model 4 validated 11 / 420 (2.62%) nb_accumulated 420 / 420 (finishes now! -- 417/h) 20240811-07:33:13 wrote c_quizzes.pth 20240811-07:33:13 training model 0 20240811-07:33:13 training model 1 20240811-07:36:55 train_perplexity 241 model 1 1.1634621409167516 20240811-07:36:57 train_perplexity 241 model 0 1.1629489645378104 20240811-07:37:02 test_perplexity 241 model 1 1.1637638484362731 20240811-07:37:03 test_perplexity 241 model 0 1.1612471543322218 20240811-07:42:54 test_accuracy 241 model 1 val 1505 / 1589 20240811-07:42:56 test_accuracy 241 model 0 val 1495 / 1570 20240811-07:42:58 wrote gpt_000.pth 20240811-07:42:58 wrote gpt_001.pth 20240811-07:43:31 wrote non_validated_0241_00.png 20240811-07:44:04 wrote non_validated_0241_01.png 20240811-07:44:04 wrote state.pth 20240811-07:44:04 --- epoch 242 ---------------------------------------- 20240811-07:44:04 current_test_accuracies 0.9522 0.9471 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240811-07:44:04 training model 2 20240811-07:44:04 training model 3 20240811-07:47:47 train_perplexity 242 model 3 1.163247863568804 20240811-07:47:47 train_perplexity 242 model 2 1.1628863431899459 20240811-07:47:54 test_perplexity 242 model 3 1.1624208493388324 20240811-07:47:54 test_perplexity 242 model 2 1.1636212078538368 20240811-07:53:41 test_accuracy 242 model 3 val 1527 / 1616 20240811-07:53:44 test_accuracy 242 model 2 val 1522 / 1592 20240811-07:53:46 wrote gpt_002.pth 20240811-07:53:47 wrote gpt_003.pth 20240811-07:54:19 wrote non_validated_0242_02.png 20240811-07:54:52 wrote non_validated_0242_03.png 20240811-07:54:52 wrote state.pth 20240811-07:54:52 --- epoch 243 ---------------------------------------- 20240811-07:54:52 current_test_accuracies 0.9522 0.9471 0.9560 0.9449 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240811-07:54:52 training model 4 20240811-07:54:52 training model 5 20240811-07:58:35 train_perplexity 243 model 5 1.1631702429449478 20240811-07:58:36 train_perplexity 243 model 4 1.1631653543206426 20240811-07:58:42 test_perplexity 243 model 5 1.1641308063403615 20240811-07:58:42 test_perplexity 243 model 4 1.1619282608731782 20240811-08:04:34 test_accuracy 243 model 4 val 1524 / 1603 20240811-08:04:37 test_accuracy 243 model 5 val 1495 / 1584 20240811-08:04:39 wrote gpt_004.pth 20240811-08:04:40 wrote gpt_005.pth 20240811-08:05:12 wrote non_validated_0243_04.png 20240811-08:05:45 wrote non_validated_0243_05.png 20240811-08:05:45 wrote state.pth 20240811-08:05:45 --- epoch 244 ---------------------------------------- 20240811-08:05:45 current_test_accuracies 0.9522 0.9471 0.9560 0.9449 0.9507 0.9438 0.0000 0.0000 0.0000 0.0000 20240811-08:05:45 training model 6 20240811-08:05:45 training model 7 20240811-08:09:27 train_perplexity 244 model 7 1.1631930129227355 20240811-08:09:28 train_perplexity 244 model 6 1.1628560138978457 20240811-08:09:35 test_perplexity 244 model 7 1.1617495643058446 20240811-08:09:35 test_perplexity 244 model 6 1.1641575490634668 20240811-08:15:29 test_accuracy 244 model 6 val 1504 / 1596 20240811-08:15:32 test_accuracy 244 model 7 val 1500 / 1577 20240811-08:15:33 wrote gpt_006.pth 20240811-08:15:34 wrote gpt_007.pth 20240811-08:16:07 wrote non_validated_0244_06.png 20240811-08:16:39 wrote non_validated_0244_07.png 20240811-08:16:39 wrote state.pth 20240811-08:16:39 --- epoch 245 ---------------------------------------- 20240811-08:16:39 current_test_accuracies 0.9522 0.9471 0.9560 0.9449 0.9507 0.9438 0.9424 0.9512 0.0000 0.0000 20240811-08:16:39 training model 8 20240811-08:16:39 training model 9 20240811-08:20:21 train_perplexity 245 model 9 1.1637352480938499 20240811-08:20:25 train_perplexity 245 model 8 1.162798606455189 20240811-08:20:28 test_perplexity 245 model 9 1.1634899111907078 20240811-08:20:30 test_perplexity 245 model 8 1.1624691511232685 20240811-08:26:19 test_accuracy 245 model 9 val 1517 / 1607 20240811-08:26:21 test_accuracy 245 model 8 val 1520 / 1607 20240811-08:26:23 wrote gpt_008.pth 20240811-08:26:24 wrote gpt_009.pth 20240811-08:26:56 wrote non_validated_0245_08.png 20240811-08:27:29 wrote non_validated_0245_09.png 20240811-08:27:29 wrote state.pth 20240811-08:27:29 --- epoch 246 ---------------------------------------- 20240811-08:27:29 current_test_accuracies 0.9522 0.9471 0.9560 0.9449 0.9507 0.9438 0.9424 0.9512 0.9459 0.9440 20240811-08:27:29 training model 6 20240811-08:27:29 training model 5 20240811-08:31:11 train_perplexity 246 model 5 1.1627479068485624 20240811-08:31:12 train_perplexity 246 model 6 1.1624184890432838 20240811-08:31:19 test_perplexity 246 model 5 1.162833355061741 20240811-08:31:19 test_perplexity 246 model 6 1.1632094617697044 20240811-08:37:14 test_accuracy 246 model 5 val 1529 / 1605 20240811-08:37:14 test_accuracy 246 model 6 val 1529 / 1600 20240811-08:37:17 wrote gpt_006.pth 20240811-08:37:17 wrote gpt_005.pth 20240811-08:37:50 wrote non_validated_0246_06.png 20240811-08:38:22 wrote non_validated_0246_05.png 20240811-08:38:22 wrote state.pth 20240811-08:38:22 --- epoch 247 ---------------------------------------- 20240811-08:38:22 current_test_accuracies 0.9522 0.9471 0.9560 0.9449 0.9507 0.9526 0.9556 0.9512 0.9459 0.9440 20240811-08:38:22 training model 9 20240811-08:38:22 training model 3 20240811-08:42:05 train_perplexity 247 model 3 1.162990176442427 20240811-08:42:05 train_perplexity 247 model 9 1.1632158646609498 20240811-08:42:12 test_perplexity 247 model 3 1.1633910233615479 20240811-08:42:12 test_perplexity 247 model 9 1.1654329916240038 20240811-08:47:56 test_accuracy 247 model 9 val 1534 / 1640 20240811-08:48:05 test_accuracy 247 model 3 val 1512 / 1589 20240811-08:48:07 wrote gpt_009.pth 20240811-08:48:07 wrote gpt_003.pth 20240811-08:48:40 wrote non_validated_0247_09.png 20240811-08:49:12 wrote non_validated_0247_03.png 20240811-08:49:13 wrote state.pth 20240811-08:49:13 --- epoch 248 ---------------------------------------- 20240811-08:49:13 current_test_accuracies 0.9522 0.9471 0.9560 0.9515 0.9507 0.9526 0.9556 0.9512 0.9459 0.9354 20240811-08:49:13 training model 9 20240811-08:49:13 training model 8 20240811-08:52:55 train_perplexity 248 model 9 1.1628901846005881 20240811-08:52:55 train_perplexity 248 model 8 1.1629111365314895 20240811-08:53:02 test_perplexity 248 model 9 1.164417095974621 20240811-08:53:02 test_perplexity 248 model 8 1.1633567044651072 20240811-08:58:52 test_accuracy 248 model 8 val 1542 / 1621 20240811-08:58:53 test_accuracy 248 model 9 val 1553 / 1619 20240811-08:58:55 wrote gpt_009.pth 20240811-08:58:56 wrote gpt_008.pth 20240811-08:59:29 wrote non_validated_0248_09.png 20240811-09:00:01 wrote non_validated_0248_08.png 20240811-09:00:02 wrote state.pth 20240811-09:00:02 --- epoch 249 ---------------------------------------- 20240811-09:00:02 current_test_accuracies 0.9522 0.9471 0.9560 0.9515 0.9507 0.9526 0.9556 0.9512 0.9513 0.9592 20240811-09:00:02 training model 1 20240811-09:00:02 training model 4 20240811-09:03:44 train_perplexity 249 model 4 1.1631570200001 20240811-09:03:44 train_perplexity 249 model 1 1.1630718180977782 20240811-09:03:51 test_perplexity 249 model 4 1.161478217674258 20240811-09:03:52 test_perplexity 249 model 1 1.1621973516454012 20240811-09:09:46 test_accuracy 249 model 4 val 1543 / 1613 20240811-09:09:51 test_accuracy 249 model 1 val 1496 / 1572 20240811-09:09:53 wrote gpt_001.pth 20240811-09:09:54 wrote gpt_004.pth 20240811-09:10:26 wrote non_validated_0249_01.png 20240811-09:10:58 wrote non_validated_0249_04.png 20240811-09:10:59 wrote state.pth 20240811-09:10:59 --- epoch 250 ---------------------------------------- 20240811-09:10:59 current_test_accuracies 0.9522 0.9517 0.9560 0.9515 0.9566 0.9526 0.9556 0.9512 0.9513 0.9592 20240811-09:14:23 keep c_quizzes model 4 validated 25 / 420 (5.95%) nb_accumulated 25 / 420 (finishes Sun 10:08 -- 440/h) 20240811-09:17:25 keep c_quizzes model 1 validated 23 / 420 (5.48%) nb_accumulated 48 / 420 (finishes Sun 10:07 -- 447/h) 20240811-09:20:26 keep c_quizzes model 3 validated 23 / 420 (5.48%) nb_accumulated 71 / 420 (finishes Sun 10:06 -- 450/h) 20240811-09:23:28 keep c_quizzes model 8 validated 24 / 420 (5.71%) nb_accumulated 95 / 420 (finishes Sun 10:06 -- 456/h) 20240811-09:26:29 keep c_quizzes model 6 validated 24 / 420 (5.71%) nb_accumulated 119 / 420 (finishes Sun 10:05 -- 460/h) 20240811-09:29:30 keep c_quizzes model 2 validated 24 / 420 (5.71%) nb_accumulated 143 / 420 (finishes Sun 10:05 -- 463/h) 20240811-09:32:32 keep c_quizzes model 2 validated 18 / 420 (4.29%) nb_accumulated 161 / 420 (finishes Sun 10:07 -- 448/h) 20240811-09:35:34 keep c_quizzes model 2 validated 29 / 420 (6.90%) nb_accumulated 190 / 420 (finishes Sun 10:05 -- 463/h) 20240811-09:38:35 keep c_quizzes model 0 validated 22 / 420 (5.24%) nb_accumulated 212 / 420 (finishes Sun 10:05 -- 460/h) 20240811-09:41:36 keep c_quizzes model 9 validated 17 / 420 (4.05%) nb_accumulated 229 / 420 (finishes Sun 10:07 -- 448/h) 20240811-09:44:37 keep c_quizzes model 6 validated 13 / 420 (3.10%) nb_accumulated 242 / 420 (finishes Sun 10:09 -- 431/h) 20240811-09:47:38 keep c_quizzes model 6 validated 33 / 420 (7.86%) nb_accumulated 275 / 420 (finishes Sun 10:06 -- 450/h) 20240811-09:50:39 keep c_quizzes model 1 validated 24 / 420 (5.71%) nb_accumulated 299 / 420 (finishes Sun 10:06 -- 452/h) 20240811-09:53:40 keep c_quizzes model 2 validated 27 / 420 (6.43%) nb_accumulated 326 / 420 (finishes Sun 10:05 -- 458/h) 20240811-09:56:41 keep c_quizzes model 4 validated 26 / 420 (6.19%) nb_accumulated 352 / 420 (finishes Sun 10:05 -- 462/h) 20240811-09:59:42 keep c_quizzes model 0 validated 22 / 420 (5.24%) nb_accumulated 374 / 420 (finishes Sun 10:05 -- 460/h) 20240811-10:02:43 keep c_quizzes model 8 validated 23 / 420 (5.48%) nb_accumulated 397 / 420 (finishes Sun 10:05 -- 460/h) 20240811-10:05:44 keep c_quizzes model 8 validated 24 / 420 (5.71%) nb_accumulated 421 / 420 (finishes now! -- 461/h) 20240811-10:05:58 wrote c_quizzes.pth 20240811-10:05:58 training model 0 20240811-10:05:58 training model 1 20240811-10:09:40 train_perplexity 250 model 0 1.1633443926609448 20240811-10:09:40 train_perplexity 250 model 1 1.1638732979223076 20240811-10:09:47 test_perplexity 250 model 0 1.165180688916372 20240811-10:09:47 test_perplexity 250 model 1 1.1673591102224499 20240811-10:15:26 test_accuracy 250 model 1 val 1550 / 1646 20240811-10:15:33 test_accuracy 250 model 0 val 1520 / 1603 20240811-10:15:35 wrote gpt_000.pth 20240811-10:15:35 wrote gpt_001.pth 20240811-10:16:08 wrote non_validated_0250_00.png 20240811-10:16:41 wrote non_validated_0250_01.png 20240811-10:16:41 wrote state.pth 20240811-10:16:41 --- epoch 251 ---------------------------------------- 20240811-10:16:41 current_test_accuracies 0.9482 0.9417 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240811-10:16:41 training model 2 20240811-10:16:41 training model 3 20240811-10:20:24 train_perplexity 251 model 3 1.1641161904430695 20240811-10:20:24 train_perplexity 251 model 2 1.1633377734354282 20240811-10:20:31 test_perplexity 251 model 3 1.1647542922749778 20240811-10:20:31 test_perplexity 251 model 2 1.1642362735525156 20240811-10:26:15 test_accuracy 251 model 3 val 1533 / 1622 20240811-10:26:23 test_accuracy 251 model 2 val 1482 / 1578 20240811-10:26:25 wrote gpt_002.pth 20240811-10:26:26 wrote gpt_003.pth 20240811-10:26:58 wrote non_validated_0251_02.png 20240811-10:27:31 wrote non_validated_0251_03.png 20240811-10:27:31 wrote state.pth 20240811-10:27:31 --- epoch 252 ---------------------------------------- 20240811-10:27:31 current_test_accuracies 0.9482 0.9417 0.9392 0.9451 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240811-10:27:31 training model 4 20240811-10:27:31 training model 5 20240811-10:31:15 train_perplexity 252 model 5 1.1639875186622357 20240811-10:31:15 train_perplexity 252 model 4 1.1638327205651013 20240811-10:31:22 test_perplexity 252 model 5 1.16342575219253 20240811-10:31:22 test_perplexity 252 model 4 1.1631963514658483 20240811-10:37:12 test_accuracy 252 model 4 val 1512 / 1600 20240811-10:37:13 test_accuracy 252 model 5 val 1525 / 1604 20240811-10:37:15 wrote gpt_004.pth 20240811-10:37:16 wrote gpt_005.pth 20240811-10:37:48 wrote non_validated_0252_04.png 20240811-10:38:21 wrote non_validated_0252_05.png 20240811-10:38:21 wrote state.pth 20240811-10:38:21 --- epoch 253 ---------------------------------------- 20240811-10:38:21 current_test_accuracies 0.9482 0.9417 0.9392 0.9451 0.9450 0.9507 0.0000 0.0000 0.0000 0.0000 20240811-10:38:21 training model 6 20240811-10:38:21 training model 7 20240811-10:42:04 train_perplexity 253 model 7 1.1637919628962257 20240811-10:42:07 train_perplexity 253 model 6 1.1631084035260826 20240811-10:42:10 test_perplexity 253 model 7 1.1618194054240205 20240811-10:42:12 test_perplexity 253 model 6 1.1643490180590066 20240811-10:48:02 test_accuracy 253 model 6 val 1528 / 1606 20240811-10:48:04 test_accuracy 253 model 7 val 1502 / 1587 20240811-10:48:06 wrote gpt_006.pth 20240811-10:48:07 wrote gpt_007.pth 20240811-10:48:39 wrote non_validated_0253_06.png 20240811-10:49:12 wrote non_validated_0253_07.png 20240811-10:49:12 wrote state.pth 20240811-10:49:12 --- epoch 254 ---------------------------------------- 20240811-10:49:12 current_test_accuracies 0.9482 0.9417 0.9392 0.9451 0.9450 0.9507 0.9514 0.9464 0.0000 0.0000 20240811-10:49:12 training model 8 20240811-10:49:12 training model 9 20240811-10:52:55 train_perplexity 254 model 9 1.1640168928000039 20240811-10:52:59 train_perplexity 254 model 8 1.1639462460010406 20240811-10:53:01 test_perplexity 254 model 9 1.1638035857070996 20240811-10:53:04 test_perplexity 254 model 8 1.162912765032175 20240811-10:58:55 test_accuracy 254 model 9 val 1517 / 1586 20240811-10:58:58 test_accuracy 254 model 8 val 1510 / 1579 20240811-10:59:00 wrote gpt_008.pth 20240811-10:59:01 wrote gpt_009.pth 20240811-10:59:34 wrote non_validated_0254_08.png 20240811-11:00:07 wrote non_validated_0254_09.png 20240811-11:00:07 wrote state.pth 20240811-11:00:07 --- epoch 255 ---------------------------------------- 20240811-11:00:07 current_test_accuracies 0.9482 0.9417 0.9392 0.9451 0.9450 0.9507 0.9514 0.9464 0.9563 0.9565 20240811-11:00:07 training model 2 20240811-11:00:07 training model 1 20240811-11:03:49 train_perplexity 255 model 1 1.1636947982871613 20240811-11:03:55 test_perplexity 255 model 1 1.1650514759894044 20240811-11:03:55 train_perplexity 255 model 2 1.1632150689342624 20240811-11:03:59 test_perplexity 255 model 2 1.1632473165159745 20240811-11:09:43 test_accuracy 255 model 1 val 1562 / 1628 20240811-11:09:44 test_accuracy 255 model 2 val 1546 / 1626 20240811-11:09:46 wrote gpt_002.pth 20240811-11:09:46 wrote gpt_001.pth 20240811-11:10:19 wrote non_validated_0255_02.png 20240811-11:10:52 wrote non_validated_0255_01.png 20240811-11:10:52 wrote state.pth 20240811-11:10:52 --- epoch 256 ---------------------------------------- 20240811-11:10:52 current_test_accuracies 0.9482 0.9595 0.9508 0.9451 0.9450 0.9507 0.9514 0.9464 0.9563 0.9565 20240811-11:10:52 training model 4 20240811-11:10:52 training model 3 20240811-11:14:34 train_perplexity 256 model 3 1.1634228865225258 20240811-11:14:36 train_perplexity 256 model 4 1.1636512228200957 20240811-11:14:41 test_perplexity 256 model 3 1.1646080267321477 20240811-11:14:42 test_perplexity 256 model 4 1.162883927966076 20240811-11:20:36 test_accuracy 256 model 4 val 1523 / 1601 20240811-11:20:39 test_accuracy 256 model 3 val 1511 / 1588 20240811-11:20:41 wrote gpt_004.pth 20240811-11:20:41 wrote gpt_003.pth 20240811-11:21:14 wrote non_validated_0256_04.png 20240811-11:21:47 wrote non_validated_0256_03.png 20240811-11:21:47 wrote state.pth 20240811-11:21:47 --- epoch 257 ---------------------------------------- 20240811-11:21:47 current_test_accuracies 0.9482 0.9595 0.9508 0.9515 0.9513 0.9507 0.9514 0.9464 0.9563 0.9565 20240811-11:21:47 training model 7 20240811-11:21:47 training model 0 20240811-11:25:30 train_perplexity 257 model 0 1.16354305738748 20240811-11:25:31 train_perplexity 257 model 7 1.1636951339865889 20240811-11:25:37 test_perplexity 257 model 0 1.1638891812852854 20240811-11:25:37 test_perplexity 257 model 7 1.16300803179088 20240811-11:31:26 test_accuracy 257 model 7 val 1560 / 1631 20240811-11:31:31 test_accuracy 257 model 0 val 1519 / 1594 20240811-11:31:33 wrote gpt_007.pth 20240811-11:31:34 wrote gpt_000.pth 20240811-11:32:06 wrote non_validated_0257_07.png 20240811-11:32:39 wrote non_validated_0257_00.png 20240811-11:32:39 wrote state.pth 20240811-11:32:39 --- epoch 258 ---------------------------------------- 20240811-11:32:39 current_test_accuracies 0.9529 0.9595 0.9508 0.9515 0.9513 0.9507 0.9514 0.9565 0.9563 0.9565 20240811-11:36:04 keep c_quizzes model 9 validated 25 / 420 (5.95%) nb_accumulated 25 / 420 (finishes Sun 12:30 -- 438/h) 20240811-11:39:07 keep c_quizzes model 1 validated 21 / 420 (5.00%) nb_accumulated 46 / 420 (finishes Sun 12:31 -- 427/h) 20240811-11:42:09 keep c_quizzes model 5 validated 18 / 420 (4.29%) nb_accumulated 64 / 420 (finishes Sun 12:34 -- 404/h) 20240811-11:45:11 keep c_quizzes model 1 validated 13 / 420 (3.10%) nb_accumulated 77 / 420 (finishes Sun 12:41 -- 368/h) 20240811-11:48:14 keep c_quizzes model 4 validated 22 / 420 (5.24%) nb_accumulated 99 / 420 (finishes Sun 12:38 -- 381/h) 20240811-11:51:17 keep c_quizzes model 1 validated 16 / 420 (3.81%) nb_accumulated 115 / 420 (finishes Sun 12:40 -- 370/h) 20240811-11:54:19 keep c_quizzes model 1 validated 24 / 420 (5.71%) nb_accumulated 139 / 420 (finishes Sun 12:38 -- 385/h) 20240811-11:57:21 keep c_quizzes model 4 validated 18 / 420 (4.29%) nb_accumulated 157 / 420 (finishes Sun 12:38 -- 381/h) 20240811-12:00:23 keep c_quizzes model 8 validated 27 / 420 (6.43%) nb_accumulated 184 / 420 (finishes Sun 12:35 -- 398/h) 20240811-12:03:25 keep c_quizzes model 6 validated 19 / 420 (4.52%) nb_accumulated 203 / 420 (finishes Sun 12:36 -- 396/h) 20240811-12:06:27 keep c_quizzes model 3 validated 15 / 420 (3.57%) nb_accumulated 218 / 420 (finishes Sun 12:37 -- 387/h) 20240811-12:09:29 keep c_quizzes model 3 validated 26 / 420 (6.19%) nb_accumulated 244 / 420 (finishes Sun 12:36 -- 397/h) 20240811-12:12:31 keep c_quizzes model 2 validated 24 / 420 (5.71%) nb_accumulated 268 / 420 (finishes Sun 12:35 -- 403/h) 20240811-12:15:33 keep c_quizzes model 7 validated 21 / 420 (5.00%) nb_accumulated 289 / 420 (finishes Sun 12:34 -- 404/h) 20240811-12:18:35 keep c_quizzes model 9 validated 10 / 420 (2.38%) nb_accumulated 299 / 420 (finishes Sun 12:37 -- 390/h) 20240811-12:21:36 keep c_quizzes model 2 validated 14 / 420 (3.33%) nb_accumulated 313 / 420 (finishes Sun 12:38 -- 383/h) 20240811-12:24:39 keep c_quizzes model 5 validated 15 / 420 (3.57%) nb_accumulated 328 / 420 (finishes Sun 12:39 -- 378/h) 20240811-12:27:42 keep c_quizzes model 8 validated 17 / 420 (4.05%) nb_accumulated 345 / 420 (finishes Sun 12:39 -- 376/h) 20240811-12:30:43 keep c_quizzes model 8 validated 17 / 420 (4.05%) nb_accumulated 362 / 420 (finishes Sun 12:40 -- 374/h) 20240811-12:33:46 keep c_quizzes model 0 validated 17 / 420 (4.05%) nb_accumulated 379 / 420 (finishes Sun 12:40 -- 372/h) 20240811-12:36:48 keep c_quizzes model 2 validated 24 / 420 (5.71%) nb_accumulated 403 / 420 (finishes Sun 12:39 -- 376/h) 20240811-12:39:50 keep c_quizzes model 3 validated 19 / 420 (4.52%) nb_accumulated 422 / 420 (finishes now! -- 376/h) 20240811-12:40:04 wrote c_quizzes.pth 20240811-12:40:04 training model 0 20240811-12:40:04 training model 1 20240811-12:43:45 train_perplexity 258 model 1 1.1640809302878095 20240811-12:43:47 train_perplexity 258 model 0 1.1639438910254902 20240811-12:43:52 test_perplexity 258 model 1 1.1643412050947155 20240811-12:43:53 test_perplexity 258 model 0 1.1644849260471546 20240811-12:49:45 test_accuracy 258 model 0 val 1537 / 1604 20240811-12:49:48 test_accuracy 258 model 1 val 1494 / 1580 20240811-12:49:50 wrote gpt_000.pth 20240811-12:49:50 wrote gpt_001.pth 20240811-12:50:23 wrote non_validated_0258_00.png 20240811-12:50:56 wrote non_validated_0258_01.png 20240811-12:50:56 wrote state.pth 20240811-12:50:56 --- epoch 259 ---------------------------------------- 20240811-12:50:56 current_test_accuracies 0.9582 0.9456 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240811-12:50:56 training model 2 20240811-12:50:56 training model 3 20240811-12:54:38 train_perplexity 259 model 2 1.1639896854428398 20240811-12:54:38 train_perplexity 259 model 3 1.1645275574621898 20240811-12:54:45 test_perplexity 259 model 2 1.1665919506168407 20240811-12:54:46 test_perplexity 259 model 3 1.1646258331691433 20240811-13:00:38 test_accuracy 259 model 3 val 1521 / 1605 20240811-13:00:41 test_accuracy 259 model 2 val 1501 / 1594 20240811-13:00:42 wrote gpt_002.pth 20240811-13:00:43 wrote gpt_003.pth 20240811-13:01:16 wrote non_validated_0259_02.png 20240811-13:01:48 wrote non_validated_0259_03.png 20240811-13:01:49 wrote state.pth 20240811-13:01:49 --- epoch 260 ---------------------------------------- 20240811-13:01:49 current_test_accuracies 0.9582 0.9456 0.9417 0.9477 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240811-13:01:49 training model 4 20240811-13:01:49 training model 5 20240811-13:05:31 train_perplexity 260 model 5 1.1643309848623127 20240811-13:05:32 train_perplexity 260 model 4 1.164153844271704 20240811-13:05:38 test_perplexity 260 model 5 1.1650592199366294 20240811-13:05:39 test_perplexity 260 model 4 1.1658635666456427 20240811-13:11:32 test_accuracy 260 model 4 val 1526 / 1613 20240811-13:11:33 test_accuracy 260 model 5 val 1521 / 1602 20240811-13:11:35 wrote gpt_004.pth 20240811-13:11:36 wrote gpt_005.pth 20240811-13:12:08 wrote non_validated_0260_04.png 20240811-13:12:41 wrote non_validated_0260_05.png 20240811-13:12:41 wrote state.pth 20240811-13:12:41 --- epoch 261 ---------------------------------------- 20240811-13:12:41 current_test_accuracies 0.9582 0.9456 0.9417 0.9477 0.9461 0.9494 0.0000 0.0000 0.0000 0.0000 20240811-13:12:41 training model 6 20240811-13:12:41 training model 7 20240811-13:16:24 train_perplexity 261 model 7 1.1642883968058135 20240811-13:16:24 train_perplexity 261 model 6 1.1639328394183752 20240811-13:16:31 test_perplexity 261 model 6 1.1643376756689952 20240811-13:16:31 test_perplexity 261 model 7 1.161776963193053 20240811-13:22:26 test_accuracy 261 model 6 val 1518 / 1595 20240811-13:22:27 test_accuracy 261 model 7 val 1494 / 1582 20240811-13:22:29 wrote gpt_006.pth 20240811-13:22:30 wrote gpt_007.pth 20240811-13:23:03 wrote non_validated_0261_06.png 20240811-13:23:36 wrote non_validated_0261_07.png 20240811-13:23:36 wrote state.pth 20240811-13:23:36 --- epoch 262 ---------------------------------------- 20240811-13:23:36 current_test_accuracies 0.9582 0.9456 0.9417 0.9477 0.9461 0.9494 0.9517 0.9444 0.0000 0.0000 20240811-13:23:36 training model 8 20240811-13:23:36 training model 9 20240811-13:27:18 train_perplexity 262 model 9 1.1638205800400205 20240811-13:27:19 train_perplexity 262 model 8 1.1643065843716973 20240811-13:27:25 test_perplexity 262 model 9 1.1649138758136222 20240811-13:27:26 test_perplexity 262 model 8 1.164484973982546 20240811-13:33:13 test_accuracy 262 model 8 val 1514 / 1616 20240811-13:33:16 test_accuracy 262 model 9 val 1504 / 1593 20240811-13:33:18 wrote gpt_008.pth 20240811-13:33:19 wrote gpt_009.pth 20240811-13:33:51 wrote non_validated_0262_08.png 20240811-13:34:24 wrote non_validated_0262_09.png 20240811-13:34:25 wrote state.pth 20240811-13:34:25 --- epoch 263 ---------------------------------------- 20240811-13:34:25 current_test_accuracies 0.9582 0.9456 0.9417 0.9477 0.9461 0.9494 0.9517 0.9444 0.9369 0.9441 20240811-13:34:25 training model 8 20240811-13:34:25 training model 2 20240811-13:38:07 train_perplexity 263 model 2 1.1637140966313035 20240811-13:38:09 train_perplexity 263 model 8 1.164026941553864 20240811-13:38:14 test_perplexity 263 model 2 1.165222878277804 20240811-13:38:15 test_perplexity 263 model 8 1.1632502801552231 20240811-13:44:08 test_accuracy 263 model 8 val 1527 / 1611 20240811-13:44:13 test_accuracy 263 model 2 val 1519 / 1589 20240811-13:44:15 wrote gpt_008.pth 20240811-13:44:15 wrote gpt_002.pth 20240811-13:44:48 wrote non_validated_0263_08.png 20240811-13:45:21 wrote non_validated_0263_02.png 20240811-13:45:21 wrote state.pth 20240811-13:45:21 --- epoch 264 ---------------------------------------- 20240811-13:45:21 current_test_accuracies 0.9582 0.9456 0.9559 0.9477 0.9461 0.9494 0.9517 0.9444 0.9479 0.9441 20240811-13:45:21 training model 9 20240811-13:45:21 training model 7 20240811-13:49:03 train_perplexity 264 model 7 1.1640344048766929 20240811-13:49:06 train_perplexity 264 model 9 1.164089621688403 20240811-13:49:10 test_perplexity 264 model 7 1.1629370487137114 20240811-13:49:12 test_perplexity 264 model 9 1.1652486989057653 20240811-13:55:00 test_accuracy 264 model 9 val 1521 / 1601 20240811-13:55:06 test_accuracy 264 model 7 val 1494 / 1569 20240811-13:55:08 wrote gpt_009.pth 20240811-13:55:08 wrote gpt_007.pth 20240811-13:55:41 wrote non_validated_0264_09.png 20240811-13:56:14 wrote non_validated_0264_07.png 20240811-13:56:14 wrote state.pth 20240811-13:56:14 --- epoch 265 ---------------------------------------- 20240811-13:56:14 current_test_accuracies 0.9582 0.9456 0.9559 0.9477 0.9461 0.9494 0.9517 0.9522 0.9479 0.9500 20240811-13:56:14 training model 1 20240811-13:56:14 training model 4 20240811-13:59:56 train_perplexity 265 model 4 1.1634759715349545 20240811-13:59:59 train_perplexity 265 model 1 1.164001126398794 20240811-14:00:03 test_perplexity 265 model 4 1.1646262715816553 20240811-14:00:05 test_perplexity 265 model 1 1.1639361611149037 20240811-14:05:55 test_accuracy 265 model 1 val 1554 / 1624 20240811-14:06:02 test_accuracy 265 model 4 val 1516 / 1579 20240811-14:06:04 wrote gpt_001.pth 20240811-14:06:05 wrote gpt_004.pth 20240811-14:06:38 wrote non_validated_0265_01.png 20240811-14:07:11 wrote non_validated_0265_04.png 20240811-14:07:11 wrote state.pth 20240811-14:07:11 --- epoch 266 ---------------------------------------- 20240811-14:07:11 current_test_accuracies 0.9582 0.9569 0.9559 0.9477 0.9601 0.9494 0.9517 0.9522 0.9479 0.9500 20240811-14:07:11 training model 3 20240811-14:07:11 training model 8 20240811-14:10:52 train_perplexity 266 model 8 1.1641158764735704 20240811-14:10:58 test_perplexity 266 model 8 1.1637927185947528 20240811-14:10:58 train_perplexity 266 model 3 1.1647721405861298 20240811-14:11:02 test_perplexity 266 model 3 1.163664507076874 20240811-14:16:52 test_accuracy 266 model 8 val 1547 / 1619 20240811-14:16:56 test_accuracy 266 model 3 val 1526 / 1598 20240811-14:16:58 wrote gpt_003.pth 20240811-14:16:59 wrote gpt_008.pth 20240811-14:17:31 wrote non_validated_0266_03.png 20240811-14:18:04 wrote non_validated_0266_08.png 20240811-14:18:04 wrote state.pth 20240811-14:18:04 --- epoch 267 ---------------------------------------- 20240811-14:18:04 current_test_accuracies 0.9582 0.9569 0.9559 0.9549 0.9601 0.9494 0.9517 0.9522 0.9555 0.9500 20240811-14:18:04 training model 5 20240811-14:18:04 training model 9 20240811-14:21:46 train_perplexity 267 model 9 1.1636099143193661 20240811-14:21:49 train_perplexity 267 model 5 1.163579826041043 20240811-14:21:53 test_perplexity 267 model 9 1.1643814077740127 20240811-14:21:55 test_perplexity 267 model 5 1.1643665171204634 20240811-14:27:47 test_accuracy 267 model 9 val 1518 / 1599 20240811-14:27:52 test_accuracy 267 model 5 val 1476 / 1567 20240811-14:27:53 wrote gpt_005.pth 20240811-14:27:54 wrote gpt_009.pth 20240811-14:28:27 wrote non_validated_0267_05.png 20240811-14:28:59 wrote non_validated_0267_09.png 20240811-14:28:59 wrote state.pth 20240811-14:28:59 --- epoch 268 ---------------------------------------- 20240811-14:28:59 current_test_accuracies 0.9582 0.9569 0.9559 0.9549 0.9601 0.9419 0.9517 0.9522 0.9555 0.9493 20240811-14:28:59 training model 5 20240811-14:28:59 training model 9 20240811-14:32:42 train_perplexity 268 model 9 1.1636819986468891 20240811-14:32:44 train_perplexity 268 model 5 1.1636071899103557 20240811-14:32:49 test_perplexity 268 model 9 1.1640644754162117 20240811-14:32:50 test_perplexity 268 model 5 1.1636201579592602 20240811-14:38:39 test_accuracy 268 model 9 val 1520 / 1600 20240811-14:38:41 test_accuracy 268 model 5 val 1531 / 1609 20240811-14:38:43 wrote gpt_005.pth 20240811-14:38:44 wrote gpt_009.pth 20240811-14:39:17 wrote non_validated_0268_05.png 20240811-14:39:49 wrote non_validated_0268_09.png 20240811-14:39:49 wrote state.pth 20240811-14:39:49 --- epoch 269 ---------------------------------------- 20240811-14:39:49 current_test_accuracies 0.9582 0.9569 0.9559 0.9549 0.9601 0.9515 0.9517 0.9522 0.9555 0.9500 20240811-14:43:13 keep c_quizzes model 3 validated 17 / 420 (4.05%) nb_accumulated 17 / 420 (finishes Sun 16:03 -- 300/h) 20240811-14:46:14 keep c_quizzes model 2 validated 26 / 420 (6.19%) nb_accumulated 43 / 420 (finishes Sun 15:42 -- 402/h) 20240811-14:49:14 keep c_quizzes model 3 validated 22 / 420 (5.24%) nb_accumulated 65 / 420 (finishes Sun 15:40 -- 414/h) 20240811-14:52:14 keep c_quizzes model 6 validated 23 / 420 (5.48%) nb_accumulated 88 / 420 (finishes Sun 15:39 -- 425/h) 20240811-14:55:14 keep c_quizzes model 5 validated 20 / 420 (4.76%) nb_accumulated 108 / 420 (finishes Sun 15:39 -- 420/h) 20240811-14:58:14 keep c_quizzes model 5 validated 22 / 420 (5.24%) nb_accumulated 130 / 420 (finishes Sun 15:39 -- 423/h) 20240811-15:01:14 keep c_quizzes model 2 validated 26 / 420 (6.19%) nb_accumulated 156 / 420 (finishes Sun 15:37 -- 437/h) 20240811-15:04:14 keep c_quizzes model 0 validated 29 / 420 (6.90%) nb_accumulated 185 / 420 (finishes Sun 15:35 -- 454/h) 20240811-15:07:15 keep c_quizzes model 7 validated 24 / 420 (5.71%) nb_accumulated 209 / 420 (finishes Sun 15:34 -- 457/h) 20240811-15:10:15 keep c_quizzes model 0 validated 20 / 420 (4.76%) nb_accumulated 229 / 420 (finishes Sun 15:35 -- 451/h) 20240811-15:13:14 keep c_quizzes model 2 validated 27 / 420 (6.43%) nb_accumulated 256 / 420 (finishes Sun 15:34 -- 459/h) 20240811-15:16:14 keep c_quizzes model 0 validated 17 / 420 (4.05%) nb_accumulated 273 / 420 (finishes Sun 15:35 -- 449/h) 20240811-15:19:15 keep c_quizzes model 8 validated 23 / 420 (5.48%) nb_accumulated 296 / 420 (finishes Sun 15:35 -- 450/h) 20240811-15:22:15 keep c_quizzes model 5 validated 15 / 420 (3.57%) nb_accumulated 311 / 420 (finishes Sun 15:37 -- 439/h) 20240811-15:25:15 keep c_quizzes model 8 validated 23 / 420 (5.48%) nb_accumulated 334 / 420 (finishes Sun 15:36 -- 441/h) 20240811-15:28:15 keep c_quizzes model 3 validated 16 / 420 (3.81%) nb_accumulated 350 / 420 (finishes Sun 15:37 -- 433/h) 20240811-15:31:15 keep c_quizzes model 4 validated 15 / 420 (3.57%) nb_accumulated 365 / 420 (finishes Sun 15:39 -- 425/h) 20240811-15:34:15 keep c_quizzes model 4 validated 27 / 420 (6.43%) nb_accumulated 392 / 420 (finishes Sun 15:38 -- 432/h) 20240811-15:37:16 keep c_quizzes model 9 validated 18 / 420 (4.29%) nb_accumulated 410 / 420 (finishes Sun 15:38 -- 428/h) 20240811-15:40:16 keep c_quizzes model 7 validated 22 / 420 (5.24%) nb_accumulated 432 / 420 (finishes now! -- 428/h) 20240811-15:40:29 wrote c_quizzes.pth 20240811-15:40:29 training model 0 20240811-15:40:29 training model 1 20240811-15:44:11 train_perplexity 269 model 1 1.1650808723274617 20240811-15:44:12 train_perplexity 269 model 0 1.1646067956174992 20240811-15:44:18 test_perplexity 269 model 1 1.1645009127181047 20240811-15:44:19 test_perplexity 269 model 0 1.164683296985014 20240811-15:50:16 test_accuracy 269 model 1 val 1535 / 1594 20240811-15:50:17 test_accuracy 269 model 0 val 1507 / 1590 20240811-15:50:19 wrote gpt_000.pth 20240811-15:50:20 wrote gpt_001.pth 20240811-15:50:52 wrote non_validated_0269_00.png 20240811-15:51:25 wrote non_validated_0269_01.png 20240811-15:51:25 wrote state.pth 20240811-15:51:25 --- epoch 270 ---------------------------------------- 20240811-15:51:25 current_test_accuracies 0.9478 0.9630 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240811-15:51:25 training model 2 20240811-15:51:25 training model 3 20240811-15:55:08 train_perplexity 270 model 3 1.1650865575018534 20240811-15:55:10 train_perplexity 270 model 2 1.1646550071484303 20240811-15:55:15 test_perplexity 270 model 3 1.1657508203156874 20240811-15:55:16 test_perplexity 270 model 2 1.1645975575376546 20240811-16:01:07 test_accuracy 270 model 3 val 1502 / 1603 20240811-16:01:12 test_accuracy 270 model 2 val 1497 / 1589 20240811-16:01:14 wrote gpt_002.pth 20240811-16:01:15 wrote gpt_003.pth 20240811-16:01:47 wrote non_validated_0270_02.png 20240811-16:02:20 wrote non_validated_0270_03.png 20240811-16:02:20 wrote state.pth 20240811-16:02:20 --- epoch 271 ---------------------------------------- 20240811-16:02:20 current_test_accuracies 0.9478 0.9630 0.9421 0.9370 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240811-16:02:20 training model 4 20240811-16:02:20 training model 5 20240811-16:06:02 train_perplexity 271 model 5 1.1641475347871297 20240811-16:06:03 train_perplexity 271 model 4 1.1644142445415755 20240811-16:06:10 test_perplexity 271 model 5 1.1650972203846552 20240811-16:06:10 test_perplexity 271 model 4 1.166346577064716 20240811-16:12:00 test_accuracy 271 model 5 val 1541 / 1621 20240811-16:12:06 test_accuracy 271 model 4 val 1498 / 1588 20240811-16:12:08 wrote gpt_004.pth 20240811-16:12:08 wrote gpt_005.pth 20240811-16:12:41 wrote non_validated_0271_04.png 20240811-16:13:14 wrote non_validated_0271_05.png 20240811-16:13:14 wrote state.pth 20240811-16:13:14 --- epoch 272 ---------------------------------------- 20240811-16:13:14 current_test_accuracies 0.9478 0.9630 0.9421 0.9370 0.9433 0.9506 0.0000 0.0000 0.0000 0.0000 20240811-16:13:14 training model 6 20240811-16:13:14 training model 7 20240811-16:16:56 train_perplexity 272 model 7 1.164781598993461 20240811-16:16:57 train_perplexity 272 model 6 1.1646775043873414 20240811-16:17:03 test_perplexity 272 model 7 1.1646132418413355 20240811-16:17:04 test_perplexity 272 model 6 1.1652886568291645 20240811-16:23:04 test_accuracy 272 model 7 val 1470 / 1569 20240811-16:23:05 test_accuracy 272 model 6 val 1476 / 1569 20240811-16:23:07 wrote gpt_006.pth 20240811-16:23:08 wrote gpt_007.pth 20240811-16:23:40 wrote non_validated_0272_06.png 20240811-16:24:13 wrote non_validated_0272_07.png 20240811-16:24:13 wrote state.pth 20240811-16:24:13 --- epoch 273 ---------------------------------------- 20240811-16:24:13 current_test_accuracies 0.9478 0.9630 0.9421 0.9370 0.9433 0.9506 0.9407 0.9369 0.0000 0.0000 20240811-16:24:13 training model 8 20240811-16:24:13 training model 9 20240811-16:27:56 train_perplexity 273 model 8 1.1648278996920503 20240811-16:27:56 train_perplexity 273 model 9 1.1644431262095312 20240811-16:28:03 test_perplexity 273 model 8 1.1642448109839323 20240811-16:28:04 test_perplexity 273 model 9 1.1645218583568846 20240811-16:33:56 test_accuracy 273 model 8 val 1520 / 1595 20240811-16:34:00 test_accuracy 273 model 9 val 1500 / 1567 20240811-16:34:02 wrote gpt_008.pth 20240811-16:34:03 wrote gpt_009.pth 20240811-16:34:36 wrote non_validated_0273_08.png 20240811-16:35:08 wrote non_validated_0273_09.png 20240811-16:35:08 wrote state.pth 20240811-16:35:08 --- epoch 274 ---------------------------------------- 20240811-16:35:08 current_test_accuracies 0.9478 0.9630 0.9421 0.9370 0.9433 0.9506 0.9407 0.9369 0.9530 0.9572 20240811-16:35:08 training model 7 20240811-16:35:08 training model 3 20240811-16:38:51 train_perplexity 274 model 3 1.164986714528757 20240811-16:38:51 train_perplexity 274 model 7 1.1649298236202486 20240811-16:38:58 test_perplexity 274 model 3 1.1662660319626172 20240811-16:38:59 test_perplexity 274 model 7 1.1670353242616869 20240811-16:44:55 test_accuracy 274 model 3 val 1524 / 1609 20240811-16:44:55 test_accuracy 274 model 7 val 1549 / 1609 20240811-16:44:57 wrote gpt_007.pth 20240811-16:44:58 wrote gpt_003.pth 20240811-16:45:31 wrote non_validated_0274_07.png 20240811-16:46:03 wrote non_validated_0274_03.png 20240811-16:46:03 wrote state.pth 20240811-16:46:03 --- epoch 275 ---------------------------------------- 20240811-16:46:03 current_test_accuracies 0.9478 0.9630 0.9421 0.9472 0.9433 0.9506 0.9407 0.9627 0.9530 0.9572 20240811-16:46:03 training model 6 20240811-16:46:03 training model 2 20240811-16:49:45 train_perplexity 275 model 2 1.1641155327546109 20240811-16:49:50 train_perplexity 275 model 6 1.1642565691108984 20240811-16:49:51 test_perplexity 275 model 2 1.1669414792034767 20240811-16:49:55 test_perplexity 275 model 6 1.1650452554789135 20240811-16:55:52 test_accuracy 275 model 2 val 1513 / 1590 20240811-16:55:56 test_accuracy 275 model 6 val 1502 / 1586 20240811-16:55:57 wrote gpt_006.pth 20240811-16:55:58 wrote gpt_002.pth 20240811-16:56:31 wrote non_validated_0275_06.png 20240811-16:57:03 wrote non_validated_0275_02.png 20240811-16:57:03 wrote state.pth 20240811-16:57:03 --- epoch 276 ---------------------------------------- 20240811-16:57:03 current_test_accuracies 0.9478 0.9630 0.9516 0.9472 0.9433 0.9506 0.9470 0.9627 0.9530 0.9572 20240811-16:57:03 training model 4 20240811-16:57:03 training model 6 20240811-17:00:46 train_perplexity 276 model 6 1.1641802835066462 20240811-17:00:46 train_perplexity 276 model 4 1.1647076102026528 20240811-17:00:53 test_perplexity 276 model 6 1.1664551140561183 20240811-17:00:53 test_perplexity 276 model 4 1.1663287128179487 20240811-17:06:44 test_accuracy 276 model 4 val 1538 / 1623 20240811-17:06:47 test_accuracy 276 model 6 val 1523 / 1604 20240811-17:06:49 wrote gpt_004.pth 20240811-17:06:50 wrote gpt_006.pth 20240811-17:07:23 wrote non_validated_0276_04.png 20240811-17:07:55 wrote non_validated_0276_06.png 20240811-17:07:55 wrote state.pth 20240811-17:07:55 --- epoch 277 ---------------------------------------- 20240811-17:07:55 current_test_accuracies 0.9478 0.9630 0.9516 0.9472 0.9476 0.9506 0.9495 0.9627 0.9530 0.9572 20240811-17:07:55 training model 3 20240811-17:07:55 training model 4 20240811-17:11:37 train_perplexity 277 model 4 1.1640934913804504 20240811-17:11:41 train_perplexity 277 model 3 1.1644269613639227 20240811-17:11:44 test_perplexity 277 model 4 1.1653845792790058 20240811-17:11:46 test_perplexity 277 model 3 1.165611955583661 20240811-17:17:44 test_accuracy 277 model 3 val 1527 / 1602 20240811-17:17:48 test_accuracy 277 model 4 val 1500 / 1579 20240811-17:17:50 wrote gpt_003.pth 20240811-17:17:50 wrote gpt_004.pth 20240811-17:18:23 wrote non_validated_0277_03.png 20240811-17:18:56 wrote non_validated_0277_04.png 20240811-17:18:56 wrote state.pth 20240811-17:18:56 --- epoch 278 ---------------------------------------- 20240811-17:18:56 current_test_accuracies 0.9478 0.9630 0.9516 0.9532 0.9500 0.9506 0.9495 0.9627 0.9530 0.9572 20240811-17:18:56 training model 0 20240811-17:18:56 training model 6 20240811-17:22:38 train_perplexity 278 model 6 1.164045055560864 20240811-17:22:41 train_perplexity 278 model 0 1.164570971571139 20240811-17:22:45 test_perplexity 278 model 6 1.1658977781611357 20240811-17:22:47 test_perplexity 278 model 0 1.165085418882723 20240811-17:28:36 test_accuracy 278 model 6 val 1533 / 1614 20240811-17:28:39 test_accuracy 278 model 0 val 1528 / 1607 20240811-17:28:41 wrote gpt_000.pth 20240811-17:28:42 wrote gpt_006.pth 20240811-17:29:14 wrote non_validated_0278_00.png 20240811-17:29:47 wrote non_validated_0278_06.png 20240811-17:29:47 wrote state.pth 20240811-17:29:47 --- epoch 279 ---------------------------------------- 20240811-17:29:47 current_test_accuracies 0.9508 0.9630 0.9516 0.9532 0.9500 0.9506 0.9498 0.9627 0.9530 0.9572 20240811-17:29:47 training model 6 20240811-17:29:47 training model 4 20240811-17:33:29 train_perplexity 279 model 4 1.1646290030786273 20240811-17:33:31 train_perplexity 279 model 6 1.164003879595988 20240811-17:33:36 test_perplexity 279 model 4 1.165139689723318 20240811-17:33:37 test_perplexity 279 model 6 1.1647432856812157 20240811-17:39:31 test_accuracy 279 model 6 val 1554 / 1627 20240811-17:39:34 test_accuracy 279 model 4 val 1528 / 1602 20240811-17:39:36 wrote gpt_006.pth 20240811-17:39:36 wrote gpt_004.pth 20240811-17:40:09 wrote non_validated_0279_06.png 20240811-17:40:42 wrote non_validated_0279_04.png 20240811-17:40:42 wrote state.pth 20240811-17:40:42 --- epoch 280 ---------------------------------------- 20240811-17:40:42 current_test_accuracies 0.9508 0.9630 0.9516 0.9532 0.9538 0.9506 0.9551 0.9627 0.9530 0.9572 20240811-17:44:07 keep c_quizzes model 3 validated 25 / 420 (5.95%) nb_accumulated 25 / 420 (finishes Sun 18:38 -- 439/h) 20240811-17:47:09 keep c_quizzes model 8 validated 29 / 420 (6.90%) nb_accumulated 54 / 420 (finishes Sun 18:30 -- 503/h) 20240811-17:50:11 keep c_quizzes model 3 validated 33 / 420 (7.86%) nb_accumulated 87 / 420 (finishes Sun 18:26 -- 550/h) 20240811-17:53:14 keep c_quizzes model 5 validated 34 / 420 (8.10%) nb_accumulated 121 / 420 (finishes Sun 18:24 -- 579/h) 20240811-17:56:19 keep c_quizzes model 0 validated 29 / 420 (6.90%) nb_accumulated 150 / 420 (finishes Sun 18:24 -- 576/h) 20240811-17:59:21 keep c_quizzes model 4 validated 33 / 420 (7.86%) nb_accumulated 183 / 420 (finishes Sun 18:23 -- 588/h) 20240811-18:02:23 keep c_quizzes model 2 validated 31 / 420 (7.38%) nb_accumulated 214 / 420 (finishes Sun 18:23 -- 592/h) 20240811-18:05:25 keep c_quizzes model 0 validated 34 / 420 (8.10%) nb_accumulated 248 / 420 (finishes Sun 18:22 -- 601/h) 20240811-18:08:27 keep c_quizzes model 0 validated 29 / 420 (6.90%) nb_accumulated 277 / 420 (finishes Sun 18:22 -- 598/h) 20240811-18:11:29 keep c_quizzes model 2 validated 24 / 420 (5.71%) nb_accumulated 301 / 420 (finishes Sun 18:23 -- 586/h) 20240811-18:14:31 keep c_quizzes model 1 validated 22 / 420 (5.24%) nb_accumulated 323 / 420 (finishes Sun 18:24 -- 573/h) 20240811-18:17:34 keep c_quizzes model 9 validated 28 / 420 (6.67%) nb_accumulated 351 / 420 (finishes Sun 18:24 -- 571/h) 20240811-18:20:36 keep c_quizzes model 1 validated 28 / 420 (6.67%) nb_accumulated 379 / 420 (finishes Sun 18:24 -- 569/h) 20240811-18:23:39 keep c_quizzes model 2 validated 30 / 420 (7.14%) nb_accumulated 409 / 420 (finishes Sun 18:24 -- 571/h) 20240811-18:26:40 keep c_quizzes model 6 validated 29 / 420 (6.90%) nb_accumulated 438 / 420 (finishes now! -- 571/h) 20240811-18:26:54 wrote c_quizzes.pth 20240811-18:26:54 training model 0 20240811-18:26:54 training model 1 20240811-18:30:35 train_perplexity 280 model 1 1.1652649045254522 20240811-18:30:37 train_perplexity 280 model 0 1.1645563373711596 20240811-18:30:42 test_perplexity 280 model 1 1.1658695246355115 20240811-18:30:43 test_perplexity 280 model 0 1.1652622846673744 20240811-18:36:41 test_accuracy 280 model 1 val 1530 / 1608 20240811-18:36:45 test_accuracy 280 model 0 val 1500 / 1588 20240811-18:36:47 wrote gpt_000.pth 20240811-18:36:47 wrote gpt_001.pth 20240811-18:37:20 wrote non_validated_0280_00.png 20240811-18:37:52 wrote non_validated_0280_01.png 20240811-18:37:52 wrote state.pth 20240811-18:37:52 --- epoch 281 ---------------------------------------- 20240811-18:37:52 current_test_accuracies 0.9446 0.9515 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240811-18:37:52 training model 2 20240811-18:37:52 training model 3 20240811-18:41:35 train_perplexity 281 model 3 1.1657820226403244 20240811-18:41:36 train_perplexity 281 model 2 1.1646950312837958 20240811-18:41:42 test_perplexity 281 model 3 1.1658430448653347 20240811-18:41:42 test_perplexity 281 model 2 1.167400655237746 20240811-18:47:31 test_accuracy 281 model 3 val 1534 / 1623 20240811-18:47:33 test_accuracy 281 model 2 val 1553 / 1629 20240811-18:47:34 wrote gpt_002.pth 20240811-18:47:35 wrote gpt_003.pth 20240811-18:48:07 wrote non_validated_0281_02.png 20240811-18:48:40 wrote non_validated_0281_03.png 20240811-18:48:40 wrote state.pth 20240811-18:48:40 --- epoch 282 ---------------------------------------- 20240811-18:48:40 current_test_accuracies 0.9446 0.9515 0.9533 0.9452 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240811-18:48:40 training model 4 20240811-18:48:40 training model 5 20240811-18:52:23 train_perplexity 282 model 5 1.1652087659878636 20240811-18:52:24 train_perplexity 282 model 4 1.1654346259355393 20240811-18:52:30 test_perplexity 282 model 5 1.1682933565207878 20240811-18:52:31 test_perplexity 282 model 4 1.1654397563620802 20240811-18:58:28 test_accuracy 282 model 4 val 1519 / 1583 20240811-18:58:28 test_accuracy 282 model 5 val 1494 / 1575 20240811-18:58:30 wrote gpt_004.pth 20240811-18:58:31 wrote gpt_005.pth 20240811-18:59:04 wrote non_validated_0282_04.png 20240811-18:59:36 wrote non_validated_0282_05.png 20240811-18:59:36 wrote state.pth 20240811-18:59:36 --- epoch 283 ---------------------------------------- 20240811-18:59:36 current_test_accuracies 0.9446 0.9515 0.9533 0.9452 0.9596 0.9486 0.0000 0.0000 0.0000 0.0000 20240811-18:59:36 training model 6 20240811-18:59:36 training model 7 20240811-19:03:19 train_perplexity 283 model 7 1.1649490516684442 20240811-19:03:21 train_perplexity 283 model 6 1.1643522983918795 20240811-19:03:25 test_perplexity 283 model 7 1.166869216592847 20240811-19:03:27 test_perplexity 283 model 6 1.1669367769599648 20240811-19:09:15 test_accuracy 283 model 7 val 1518 / 1604 20240811-19:09:20 test_accuracy 283 model 6 val 1485 / 1576 20240811-19:09:22 wrote gpt_006.pth 20240811-19:09:23 wrote gpt_007.pth 20240811-19:09:56 wrote non_validated_0283_06.png 20240811-19:10:29 wrote non_validated_0283_07.png 20240811-19:10:29 wrote state.pth 20240811-19:10:29 --- epoch 284 ---------------------------------------- 20240811-19:10:29 current_test_accuracies 0.9446 0.9515 0.9533 0.9452 0.9596 0.9486 0.9423 0.9464 0.0000 0.0000 20240811-19:10:29 training model 8 20240811-19:10:29 training model 9 20240811-19:14:11 train_perplexity 284 model 9 1.1653254844052527 20240811-19:14:14 train_perplexity 284 model 8 1.1651630102356092 20240811-19:14:18 test_perplexity 284 model 9 1.1682158551985506 20240811-19:14:19 test_perplexity 284 model 8 1.1635935927339327 20240811-19:20:09 test_accuracy 284 model 8 val 1542 / 1604 20240811-19:20:12 test_accuracy 284 model 9 val 1488 / 1588 20240811-19:20:14 wrote gpt_008.pth 20240811-19:20:14 wrote gpt_009.pth 20240811-19:20:47 wrote non_validated_0284_08.png 20240811-19:21:19 wrote non_validated_0284_09.png 20240811-19:21:19 wrote state.pth 20240811-19:21:19 --- epoch 285 ---------------------------------------- 20240811-19:21:19 current_test_accuracies 0.9446 0.9515 0.9533 0.9452 0.9596 0.9486 0.9423 0.9464 0.9613 0.9370 20240811-19:21:19 training model 9 20240811-19:21:19 training model 6 20240811-19:25:02 train_perplexity 285 model 6 1.1644367781768057 20240811-19:25:04 train_perplexity 285 model 9 1.1651321347888979 20240811-19:25:08 test_perplexity 285 model 6 1.1661072141102475 20240811-19:25:10 test_perplexity 285 model 9 1.1662731841959975 20240811-19:31:00 test_accuracy 285 model 9 val 1516 / 1608 20240811-19:31:04 test_accuracy 285 model 6 val 1523 / 1586 20240811-19:31:06 wrote gpt_009.pth 20240811-19:31:07 wrote gpt_006.pth 20240811-19:31:40 wrote non_validated_0285_09.png 20240811-19:32:12 wrote non_validated_0285_06.png 20240811-19:32:12 wrote state.pth 20240811-19:32:12 --- epoch 286 ---------------------------------------- 20240811-19:32:12 current_test_accuracies 0.9446 0.9515 0.9533 0.9452 0.9596 0.9486 0.9603 0.9464 0.9613 0.9428 20240811-19:32:12 training model 9 20240811-19:32:12 training model 0 20240811-19:35:55 train_perplexity 286 model 0 1.1648886292739011 20240811-19:35:57 train_perplexity 286 model 9 1.164897708192212 20240811-19:36:02 test_perplexity 286 model 0 1.1655234861761594 20240811-19:36:03 test_perplexity 286 model 9 1.1685364298011112 20240811-19:41:57 test_accuracy 286 model 9 val 1541 / 1616 20240811-19:41:58 test_accuracy 286 model 0 val 1507 / 1608 20240811-19:42:00 wrote gpt_009.pth 20240811-19:42:01 wrote gpt_000.pth 20240811-19:42:33 wrote non_validated_0286_09.png 20240811-19:43:06 wrote non_validated_0286_00.png 20240811-19:43:06 wrote state.pth 20240811-19:43:06 --- epoch 287 ---------------------------------------- 20240811-19:43:06 current_test_accuracies 0.9372 0.9515 0.9533 0.9452 0.9596 0.9486 0.9603 0.9464 0.9613 0.9536 20240811-19:43:06 training model 0 20240811-19:43:06 training model 3 20240811-19:46:48 train_perplexity 287 model 3 1.1655343662639341 20240811-19:46:53 train_perplexity 287 model 0 1.1647589187071712 20240811-19:46:54 test_perplexity 287 model 3 1.1669566667741345 20240811-19:46:57 test_perplexity 287 model 0 1.1664510185389636 20240811-19:52:49 test_accuracy 287 model 0 val 1552 / 1622 20240811-19:52:58 test_accuracy 287 model 3 val 1455 / 1566 20240811-19:52:59 wrote gpt_000.pth 20240811-19:53:00 wrote gpt_003.pth 20240811-19:53:33 wrote non_validated_0287_00.png 20240811-19:54:05 wrote non_validated_0287_03.png 20240811-19:54:05 wrote state.pth 20240811-19:54:05 --- epoch 288 ---------------------------------------- 20240811-19:54:05 current_test_accuracies 0.9568 0.9515 0.9533 0.9291 0.9596 0.9486 0.9603 0.9464 0.9613 0.9536 20240811-19:54:05 training model 3 20240811-19:54:05 training model 7 20240811-19:57:47 train_perplexity 288 model 7 1.1646668322590785 20240811-19:57:52 train_perplexity 288 model 3 1.1649587755626245 20240811-19:57:53 test_perplexity 288 model 7 1.1647299055626077 20240811-19:57:57 test_perplexity 288 model 3 1.1673295682222338 20240811-20:03:32 test_accuracy 288 model 3 val 1583 / 1664 20240811-20:03:41 test_accuracy 288 model 7 val 1512 / 1590 20240811-20:03:43 wrote gpt_003.pth 20240811-20:03:44 wrote gpt_007.pth 20240811-20:04:16 wrote non_validated_0288_03.png 20240811-20:04:49 wrote non_validated_0288_07.png 20240811-20:04:49 wrote state.pth 20240811-20:04:49 --- epoch 289 ---------------------------------------- 20240811-20:04:49 current_test_accuracies 0.9568 0.9515 0.9533 0.9513 0.9596 0.9486 0.9603 0.9509 0.9613 0.9536 20240811-20:04:49 training model 5 20240811-20:04:49 training model 7 20240811-20:08:31 train_perplexity 289 model 7 1.164720952115826 20240811-20:08:33 train_perplexity 289 model 5 1.164649018540818 20240811-20:08:38 test_perplexity 289 model 7 1.1654225922928885 20240811-20:08:40 test_perplexity 289 model 5 1.1672355569876658 20240811-20:14:28 test_accuracy 289 model 7 val 1521 / 1616 20240811-20:14:29 test_accuracy 289 model 5 val 1516 / 1608 20240811-20:14:31 wrote gpt_005.pth 20240811-20:14:32 wrote gpt_007.pth 20240811-20:15:05 wrote non_validated_0289_05.png 20240811-20:15:37 wrote non_validated_0289_07.png 20240811-20:15:38 wrote state.pth 20240811-20:15:38 --- epoch 290 ---------------------------------------- 20240811-20:15:38 current_test_accuracies 0.9568 0.9515 0.9533 0.9513 0.9596 0.9428 0.9603 0.9412 0.9613 0.9536 20240811-20:15:38 training model 7 20240811-20:15:38 training model 5 20240811-20:19:20 train_perplexity 290 model 5 1.1646496839985945 20240811-20:19:21 train_perplexity 290 model 7 1.1645669393770144 20240811-20:19:28 test_perplexity 290 model 5 1.1660069019238268 20240811-20:19:28 test_perplexity 290 model 7 1.1639554202690339 20240811-20:25:21 test_accuracy 290 model 7 val 1497 / 1602 20240811-20:25:21 test_accuracy 290 model 5 val 1523 / 1609 20240811-20:25:24 wrote gpt_007.pth 20240811-20:25:24 wrote gpt_005.pth 20240811-20:25:57 wrote non_validated_0290_07.png 20240811-20:26:30 wrote non_validated_0290_05.png 20240811-20:26:30 wrote state.pth 20240811-20:26:30 --- epoch 291 ---------------------------------------- 20240811-20:26:30 current_test_accuracies 0.9568 0.9515 0.9533 0.9513 0.9596 0.9466 0.9603 0.9345 0.9613 0.9536 20240811-20:26:30 training model 7 20240811-20:26:30 training model 5 20240811-20:30:12 train_perplexity 291 model 5 1.1644320185508832 20240811-20:30:15 train_perplexity 291 model 7 1.1647375131079096 20240811-20:30:18 test_perplexity 291 model 5 1.1659770946520565 20240811-20:30:20 test_perplexity 291 model 7 1.1648048055281917 20240811-20:36:14 test_accuracy 291 model 5 val 1540 / 1618 20240811-20:36:16 test_accuracy 291 model 7 val 1500 / 1601 20240811-20:36:18 wrote gpt_007.pth 20240811-20:36:18 wrote gpt_005.pth 20240811-20:36:52 wrote non_validated_0291_07.png 20240811-20:37:25 wrote non_validated_0291_05.png 20240811-20:37:25 wrote state.pth 20240811-20:37:25 --- epoch 292 ---------------------------------------- 20240811-20:37:25 current_test_accuracies 0.9568 0.9515 0.9533 0.9513 0.9596 0.9518 0.9603 0.9369 0.9613 0.9536 20240811-20:37:25 training model 7 20240811-20:37:25 training model 3 20240811-20:41:08 train_perplexity 292 model 3 1.1652911191773452 20240811-20:41:09 train_perplexity 292 model 7 1.1639821154875856 20240811-20:41:15 test_perplexity 292 model 3 1.1657210074308535 20240811-20:41:16 test_perplexity 292 model 7 1.1676103635483832 20240811-20:47:06 test_accuracy 292 model 7 val 1534 / 1610 20240811-20:47:10 test_accuracy 292 model 3 val 1527 / 1597 20240811-20:47:11 wrote gpt_007.pth 20240811-20:47:12 wrote gpt_003.pth 20240811-20:47:45 wrote non_validated_0292_07.png 20240811-20:48:18 wrote non_validated_0292_03.png 20240811-20:48:18 wrote state.pth 20240811-20:48:18 --- epoch 293 ---------------------------------------- 20240811-20:48:18 current_test_accuracies 0.9568 0.9515 0.9533 0.9562 0.9596 0.9518 0.9603 0.9528 0.9613 0.9536 20240811-20:51:43 keep c_quizzes model 0 validated 17 / 420 (4.05%) nb_accumulated 17 / 420 (finishes Sun 22:12 -- 297/h) 20240811-20:54:46 keep c_quizzes model 4 validated 20 / 420 (4.76%) nb_accumulated 37 / 420 (finishes Sun 22:01 -- 343/h) 20240811-20:57:48 keep c_quizzes model 4 validated 27 / 420 (6.43%) nb_accumulated 64 / 420 (finishes Sun 21:50 -- 404/h) 20240811-21:00:50 keep c_quizzes model 1 validated 26 / 420 (6.19%) nb_accumulated 90 / 420 (finishes Sun 21:46 -- 430/h) 20240811-21:03:50 keep c_quizzes model 0 validated 25 / 420 (5.95%) nb_accumulated 115 / 420 (finishes Sun 21:45 -- 443/h) 20240811-21:06:51 keep c_quizzes model 0 validated 22 / 420 (5.24%) nb_accumulated 137 / 420 (finishes Sun 21:45 -- 442/h) 20240811-21:09:52 keep c_quizzes model 9 validated 31 / 420 (7.38%) nb_accumulated 168 / 420 (finishes Sun 21:42 -- 467/h) 20240811-21:12:52 keep c_quizzes model 7 validated 20 / 420 (4.76%) nb_accumulated 188 / 420 (finishes Sun 21:43 -- 458/h) 20240811-21:15:53 keep c_quizzes model 8 validated 18 / 420 (4.29%) nb_accumulated 206 / 420 (finishes Sun 21:44 -- 447/h) 20240811-21:18:55 keep c_quizzes model 3 validated 23 / 420 (5.48%) nb_accumulated 229 / 420 (finishes Sun 21:44 -- 448/h) 20240811-21:21:56 keep c_quizzes model 8 validated 18 / 420 (4.29%) nb_accumulated 247 / 420 (finishes Sun 21:45 -- 440/h) 20240811-21:24:57 keep c_quizzes model 4 validated 32 / 420 (7.62%) nb_accumulated 279 / 420 (finishes Sun 21:43 -- 456/h) 20240811-21:27:59 keep c_quizzes model 1 validated 26 / 420 (6.19%) nb_accumulated 305 / 420 (finishes Sun 21:42 -- 461/h) 20240811-21:30:59 keep c_quizzes model 0 validated 29 / 420 (6.90%) nb_accumulated 334 / 420 (finishes Sun 21:41 -- 469/h) 20240811-21:34:00 keep c_quizzes model 3 validated 21 / 420 (5.00%) nb_accumulated 355 / 420 (finishes Sun 21:42 -- 466/h) 20240811-21:37:00 keep c_quizzes model 6 validated 20 / 420 (4.76%) nb_accumulated 375 / 420 (finishes Sun 21:42 -- 461/h) 20240811-21:40:12 keep c_quizzes model 7 validated 24 / 420 (5.71%) nb_accumulated 399 / 420 (finishes Sun 21:42 -- 461/h) 20240811-21:43:17 keep c_quizzes model 5 validated 35 / 420 (8.33%) nb_accumulated 434 / 420 (finishes now! -- 473/h) 20240811-21:43:31 wrote c_quizzes.pth 20240811-21:43:31 training model 0 20240811-21:43:31 training model 1 20240811-21:47:12 train_perplexity 293 model 1 1.166434741645663 20240811-21:47:16 train_perplexity 293 model 0 1.1655778480251673 20240811-21:47:18 test_perplexity 293 model 1 1.168056229648214 20240811-21:47:21 test_perplexity 293 model 0 1.1666872797337098 20240811-21:53:17 test_accuracy 293 model 1 val 1535 / 1609 20240811-21:53:20 test_accuracy 293 model 0 val 1508 / 1601 20240811-21:53:21 wrote gpt_000.pth 20240811-21:53:22 wrote gpt_001.pth 20240811-21:53:55 wrote non_validated_0293_00.png 20240811-21:54:28 wrote non_validated_0293_01.png 20240811-21:54:28 wrote state.pth 20240811-21:54:28 --- epoch 294 ---------------------------------------- 20240811-21:54:28 current_test_accuracies 0.9419 0.9540 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240811-21:54:28 training model 2 20240811-21:54:28 training model 3 20240811-21:58:11 train_perplexity 294 model 2 1.1653400596357397 20240811-21:58:11 train_perplexity 294 model 3 1.16591422129075 20240811-21:58:18 test_perplexity 294 model 2 1.1651121236381843 20240811-21:58:18 test_perplexity 294 model 3 1.1684919074126905 20240811-22:04:19 test_accuracy 294 model 3 val 1528 / 1601 20240811-22:04:21 test_accuracy 294 model 2 val 1517 / 1592 20240811-22:04:23 wrote gpt_002.pth 20240811-22:04:24 wrote gpt_003.pth 20240811-22:04:57 wrote non_validated_0294_02.png 20240811-22:05:30 wrote non_validated_0294_03.png 20240811-22:05:30 wrote state.pth 20240811-22:05:30 --- epoch 295 ---------------------------------------- 20240811-22:05:30 current_test_accuracies 0.9419 0.9540 0.9529 0.9544 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240811-22:05:30 training model 4 20240811-22:05:30 training model 5 20240811-22:09:12 train_perplexity 295 model 5 1.1652869670974213 20240811-22:09:17 train_perplexity 295 model 4 1.1656912884233304 20240811-22:09:18 test_perplexity 295 model 5 1.1675179074230357 20240811-22:09:22 test_perplexity 295 model 4 1.1688422446314446 20240811-22:15:16 test_accuracy 295 model 5 val 1536 / 1623 20240811-22:15:19 test_accuracy 295 model 4 val 1547 / 1618 20240811-22:15:21 wrote gpt_004.pth 20240811-22:15:21 wrote gpt_005.pth 20240811-22:15:56 wrote non_validated_0295_04.png 20240811-22:16:30 wrote non_validated_0295_05.png 20240811-22:16:30 wrote state.pth 20240811-22:16:30 --- epoch 296 ---------------------------------------- 20240811-22:16:30 current_test_accuracies 0.9419 0.9540 0.9529 0.9544 0.9561 0.9464 0.0000 0.0000 0.0000 0.0000 20240811-22:16:30 training model 6 20240811-22:16:30 training model 7 20240811-22:20:12 train_perplexity 296 model 7 1.16545934198993 20240811-22:20:17 train_perplexity 296 model 6 1.1655221932093591 20240811-22:20:18 test_perplexity 296 model 7 1.1683670671575026 20240811-22:20:21 test_perplexity 296 model 6 1.1677478087152462 20240811-22:26:11 test_accuracy 296 model 7 val 1534 / 1615 20240811-22:26:14 test_accuracy 296 model 6 val 1522 / 1608 20240811-22:26:16 wrote gpt_006.pth 20240811-22:26:17 wrote gpt_007.pth 20240811-22:26:50 wrote non_validated_0296_06.png 20240811-22:27:22 wrote non_validated_0296_07.png 20240811-22:27:23 wrote state.pth 20240811-22:27:23 --- epoch 297 ---------------------------------------- 20240811-22:27:23 current_test_accuracies 0.9419 0.9540 0.9529 0.9544 0.9561 0.9464 0.9465 0.9498 0.0000 0.0000 20240811-22:27:23 training model 8 20240811-22:27:23 training model 9 20240811-22:31:04 train_perplexity 297 model 9 1.165545625172942 20240811-22:31:08 train_perplexity 297 model 8 1.1659501955383529 20240811-22:31:11 test_perplexity 297 model 9 1.1701418760942544 20240811-22:31:13 test_perplexity 297 model 8 1.1660335335397136 20240811-22:36:56 test_accuracy 297 model 9 val 1555 / 1640 20240811-22:37:01 test_accuracy 297 model 8 val 1529 / 1612 20240811-22:37:03 wrote gpt_008.pth 20240811-22:37:04 wrote gpt_009.pth 20240811-22:37:37 wrote non_validated_0297_08.png 20240811-22:38:10 wrote non_validated_0297_09.png 20240811-22:38:10 wrote state.pth 20240811-22:38:10 --- epoch 298 ---------------------------------------- 20240811-22:38:10 current_test_accuracies 0.9419 0.9540 0.9529 0.9544 0.9561 0.9464 0.9465 0.9498 0.9485 0.9482 20240811-22:38:10 training model 0 20240811-22:38:10 training model 5 20240811-22:41:53 train_perplexity 298 model 5 1.1646553285577408 20240811-22:41:54 train_perplexity 298 model 0 1.1651679720879076 20240811-22:42:00 test_perplexity 298 model 5 1.1664438747747572 20240811-22:42:01 test_perplexity 298 model 0 1.167034068908636 20240811-22:48:00 test_accuracy 298 model 0 val 1525 / 1604 20240811-22:48:02 test_accuracy 298 model 5 val 1497 / 1584 20240811-22:48:04 wrote gpt_000.pth 20240811-22:48:04 wrote gpt_005.pth 20240811-22:48:37 wrote non_validated_0298_00.png 20240811-22:49:11 wrote non_validated_0298_05.png 20240811-22:49:11 wrote state.pth 20240811-22:49:11 --- epoch 299 ---------------------------------------- 20240811-22:49:11 current_test_accuracies 0.9507 0.9540 0.9529 0.9544 0.9561 0.9451 0.9465 0.9498 0.9485 0.9482 20240811-22:49:11 training model 5 20240811-22:49:11 training model 6 20240811-22:52:53 train_perplexity 299 model 6 1.164772738961001 20240811-22:52:55 train_perplexity 299 model 5 1.1648973029349807 20240811-22:53:00 test_perplexity 299 model 6 1.1672006285559504 20240811-22:53:02 test_perplexity 299 model 5 1.1682794634087026 20240811-22:58:56 test_accuracy 299 model 6 val 1517 / 1605 20240811-22:58:57 test_accuracy 299 model 5 val 1515 / 1603 20240811-22:58:59 wrote gpt_005.pth 20240811-22:59:00 wrote gpt_006.pth 20240811-22:59:32 wrote non_validated_0299_05.png 20240811-23:00:06 wrote non_validated_0299_06.png 20240811-23:00:06 wrote state.pth 20240811-23:00:06 --- epoch 300 ---------------------------------------- 20240811-23:00:06 current_test_accuracies 0.9507 0.9540 0.9529 0.9544 0.9561 0.9451 0.9452 0.9498 0.9485 0.9482 20240811-23:00:06 training model 5 20240811-23:00:06 training model 6 20240811-23:03:48 train_perplexity 300 model 6 1.1647301727556114 20240811-23:03:54 train_perplexity 300 model 5 1.1646527062412932 20240811-23:03:54 test_perplexity 300 model 6 1.1676184487756125 20240811-23:03:58 test_perplexity 300 model 5 1.167475582340069 20240811-23:09:53 test_accuracy 300 model 5 val 1532 / 1615 20240811-23:09:55 test_accuracy 300 model 6 val 1490 / 1581 20240811-23:09:57 wrote gpt_005.pth 20240811-23:09:58 wrote gpt_006.pth 20240811-23:10:33 wrote non_validated_0300_05.png 20240811-23:11:07 wrote non_validated_0300_06.png 20240811-23:11:07 wrote state.pth 20240811-23:11:07 --- epoch 301 ---------------------------------------- 20240811-23:11:07 current_test_accuracies 0.9507 0.9540 0.9529 0.9544 0.9561 0.9486 0.9424 0.9498 0.9485 0.9482 20240811-23:11:07 training model 6 20240811-23:11:07 training model 9 20240811-23:14:49 train_perplexity 301 model 9 1.1656234030731458 20240811-23:14:55 test_perplexity 301 model 9 1.1692078934134464 20240811-23:14:55 train_perplexity 301 model 6 1.1648580785817952 20240811-23:14:59 test_perplexity 301 model 6 1.1664872990206776 20240811-23:20:52 test_accuracy 301 model 6 val 1515 / 1601 20240811-23:20:53 test_accuracy 301 model 9 val 1521 / 1608 20240811-23:20:55 wrote gpt_006.pth 20240811-23:20:56 wrote gpt_009.pth 20240811-23:21:29 wrote non_validated_0301_06.png 20240811-23:22:01 wrote non_validated_0301_09.png 20240811-23:22:01 wrote state.pth 20240811-23:22:01 --- epoch 302 ---------------------------------------- 20240811-23:22:01 current_test_accuracies 0.9507 0.9540 0.9529 0.9544 0.9561 0.9486 0.9463 0.9498 0.9485 0.9459 20240811-23:22:01 training model 9 20240811-23:22:01 training model 6 20240811-23:25:44 train_perplexity 302 model 6 1.1647141391903575 20240811-23:25:45 train_perplexity 302 model 9 1.165228271313813 20240811-23:25:52 test_perplexity 302 model 6 1.167899040145059 20240811-23:25:52 test_perplexity 302 model 9 1.1656738817376224 20240811-23:31:45 test_accuracy 302 model 6 val 1511 / 1601 20240811-23:31:52 test_accuracy 302 model 9 val 1444 / 1554 20240811-23:31:54 wrote gpt_009.pth 20240811-23:31:55 wrote gpt_006.pth 20240811-23:32:28 wrote non_validated_0302_09.png 20240811-23:33:01 wrote non_validated_0302_06.png 20240811-23:33:02 wrote state.pth 20240811-23:33:02 --- epoch 303 ---------------------------------------- 20240811-23:33:02 current_test_accuracies 0.9507 0.9540 0.9529 0.9544 0.9561 0.9486 0.9438 0.9498 0.9485 0.9292 20240811-23:33:02 training model 9 20240811-23:33:02 training model 6 20240811-23:36:44 train_perplexity 303 model 6 1.164427050262298 20240811-23:36:48 train_perplexity 303 model 9 1.1656138480757803 20240811-23:36:50 test_perplexity 303 model 6 1.167485220392292 20240811-23:36:53 test_perplexity 303 model 9 1.1677509269424706 20240811-23:42:48 test_accuracy 303 model 6 val 1530 / 1608 20240811-23:42:49 test_accuracy 303 model 9 val 1521 / 1608 20240811-23:42:51 wrote gpt_009.pth 20240811-23:42:52 wrote gpt_006.pth 20240811-23:43:24 wrote non_validated_0303_09.png 20240811-23:43:58 wrote non_validated_0303_06.png 20240811-23:43:58 wrote state.pth 20240811-23:43:58 --- epoch 304 ---------------------------------------- 20240811-23:43:58 current_test_accuracies 0.9507 0.9540 0.9529 0.9544 0.9561 0.9486 0.9515 0.9498 0.9485 0.9459 20240811-23:43:58 training model 9 20240811-23:43:58 training model 8 20240811-23:47:40 train_perplexity 304 model 8 1.1656703518927336 20240811-23:47:43 train_perplexity 304 model 9 1.1651060131629056 20240811-23:47:47 test_perplexity 304 model 8 1.1666150429687325 20240811-23:47:49 test_perplexity 304 model 9 1.1665008323017232 20240811-23:53:46 test_accuracy 304 model 9 val 1515 / 1598 20240811-23:53:48 test_accuracy 304 model 8 val 1506 / 1589 20240811-23:53:50 wrote gpt_009.pth 20240811-23:53:51 wrote gpt_008.pth 20240811-23:54:24 wrote non_validated_0304_09.png 20240811-23:54:57 wrote non_validated_0304_08.png 20240811-23:54:57 wrote state.pth 20240811-23:54:57 --- epoch 305 ---------------------------------------- 20240811-23:54:57 current_test_accuracies 0.9507 0.9540 0.9529 0.9544 0.9561 0.9486 0.9515 0.9498 0.9478 0.9481 20240811-23:54:57 training model 8 20240811-23:54:57 training model 9 20240811-23:58:40 train_perplexity 305 model 9 1.1645895502032095 20240811-23:58:40 train_perplexity 305 model 8 1.1650307324902418 20240811-23:58:47 test_perplexity 305 model 9 1.165641915924991 20240811-23:58:47 test_perplexity 305 model 8 1.1662813731269515 20240812-00:04:44 test_accuracy 305 model 9 val 1517 / 1590 20240812-00:04:46 test_accuracy 305 model 8 val 1498 / 1594 20240812-00:04:48 wrote gpt_008.pth 20240812-00:04:48 wrote gpt_009.pth 20240812-00:05:23 wrote non_validated_0305_08.png 20240812-00:05:57 wrote non_validated_0305_09.png 20240812-00:05:57 wrote state.pth 20240812-00:05:57 --- epoch 306 ---------------------------------------- 20240812-00:05:57 current_test_accuracies 0.9507 0.9540 0.9529 0.9544 0.9561 0.9486 0.9515 0.9498 0.9398 0.9541 20240812-00:05:57 training model 8 20240812-00:05:57 training model 5 20240812-00:09:39 train_perplexity 306 model 5 1.1648296105469726 20240812-00:09:41 train_perplexity 306 model 8 1.1658703827549053 20240812-00:09:46 test_perplexity 306 model 5 1.1661064373875913 20240812-00:09:47 test_perplexity 306 model 8 1.1666525215612917 20240812-00:15:44 test_accuracy 306 model 8 val 1511 / 1601 20240812-00:15:47 test_accuracy 306 model 5 val 1486 / 1580 20240812-00:15:49 wrote gpt_008.pth 20240812-00:15:50 wrote gpt_005.pth 20240812-00:16:22 wrote non_validated_0306_08.png 20240812-00:16:55 wrote non_validated_0306_05.png 20240812-00:16:55 wrote state.pth 20240812-00:16:55 --- epoch 307 ---------------------------------------- 20240812-00:16:55 current_test_accuracies 0.9507 0.9540 0.9529 0.9544 0.9561 0.9405 0.9515 0.9498 0.9438 0.9541 20240812-00:16:55 training model 5 20240812-00:16:55 training model 8 20240812-00:20:39 train_perplexity 307 model 5 1.164518603022212 20240812-00:20:39 train_perplexity 307 model 8 1.1652288570416398 20240812-00:20:46 test_perplexity 307 model 5 1.1675526387493458 20240812-00:20:46 test_perplexity 307 model 8 1.1663320188669655 20240812-00:26:38 test_accuracy 307 model 8 val 1504 / 1601 20240812-00:26:42 test_accuracy 307 model 5 val 1496 / 1589 20240812-00:26:44 wrote gpt_005.pth 20240812-00:26:44 wrote gpt_008.pth 20240812-00:27:17 wrote non_validated_0307_05.png 20240812-00:27:49 wrote non_validated_0307_08.png 20240812-00:27:49 wrote state.pth 20240812-00:27:49 --- epoch 308 ---------------------------------------- 20240812-00:27:49 current_test_accuracies 0.9507 0.9540 0.9529 0.9544 0.9561 0.9415 0.9515 0.9498 0.9394 0.9541 20240812-00:27:49 training model 8 20240812-00:27:49 training model 5 20240812-00:31:31 train_perplexity 308 model 5 1.1639566696329224 20240812-00:31:37 train_perplexity 308 model 8 1.165008063074523 20240812-00:31:37 test_perplexity 308 model 5 1.1664710052333593 20240812-00:31:41 test_perplexity 308 model 8 1.1657958773237025 20240812-00:37:39 test_accuracy 308 model 5 val 1522 / 1599 20240812-00:37:42 test_accuracy 308 model 8 val 1486 / 1570 20240812-00:37:44 wrote gpt_008.pth 20240812-00:37:45 wrote gpt_005.pth 20240812-00:38:18 wrote non_validated_0308_08.png 20240812-00:38:53 wrote non_validated_0308_05.png 20240812-00:38:54 wrote state.pth 20240812-00:38:54 --- epoch 309 ---------------------------------------- 20240812-00:38:54 current_test_accuracies 0.9507 0.9540 0.9529 0.9544 0.9561 0.9518 0.9515 0.9498 0.9465 0.9541 20240812-00:38:54 training model 8 20240812-00:38:54 training model 7 20240812-00:42:35 train_perplexity 309 model 7 1.164865844264386 20240812-00:42:41 test_perplexity 309 model 7 1.1667131315080101 20240812-00:42:42 train_perplexity 309 model 8 1.1649636461085993 20240812-00:42:46 test_perplexity 309 model 8 1.1672604531775732 20240812-00:48:35 test_accuracy 309 model 7 val 1525 / 1626 20240812-00:48:36 test_accuracy 309 model 8 val 1542 / 1629 20240812-00:48:38 wrote gpt_008.pth 20240812-00:48:38 wrote gpt_007.pth 20240812-00:49:11 wrote non_validated_0309_08.png 20240812-00:49:45 wrote non_validated_0309_07.png 20240812-00:49:45 wrote state.pth 20240812-00:49:45 --- epoch 310 ---------------------------------------- 20240812-00:49:45 current_test_accuracies 0.9507 0.9540 0.9529 0.9544 0.9561 0.9518 0.9515 0.9379 0.9466 0.9541 20240812-00:49:45 training model 7 20240812-00:49:45 training model 8 20240812-00:53:28 train_perplexity 310 model 8 1.1648802035722823 20240812-00:53:30 train_perplexity 310 model 7 1.1647947490340904 20240812-00:53:34 test_perplexity 310 model 8 1.1679566541517288 20240812-00:53:36 test_perplexity 310 model 7 1.1665165085593991 20240812-00:59:30 test_accuracy 310 model 8 val 1536 / 1608 20240812-00:59:31 test_accuracy 310 model 7 val 1522 / 1609 20240812-00:59:33 wrote gpt_007.pth 20240812-00:59:33 wrote gpt_008.pth 20240812-01:00:06 wrote non_validated_0310_07.png 20240812-01:00:40 wrote non_validated_0310_08.png 20240812-01:00:41 wrote state.pth 20240812-01:00:41 --- epoch 311 ---------------------------------------- 20240812-01:00:41 current_test_accuracies 0.9507 0.9540 0.9529 0.9544 0.9561 0.9518 0.9515 0.9459 0.9552 0.9541 20240812-01:00:41 training model 7 20240812-01:00:41 training model 0 20240812-01:04:22 train_perplexity 311 model 0 1.1651367652176625 20240812-01:04:28 train_perplexity 311 model 7 1.1644484221817721 20240812-01:04:28 test_perplexity 311 model 0 1.1667382977695653 20240812-01:04:32 test_perplexity 311 model 7 1.168708324985973 20240812-01:10:20 test_accuracy 311 model 0 val 1551 / 1626 20240812-01:10:24 test_accuracy 311 model 7 val 1540 / 1618 20240812-01:10:26 wrote gpt_007.pth 20240812-01:10:26 wrote gpt_000.pth 20240812-01:11:01 wrote non_validated_0311_07.png 20240812-01:11:34 wrote non_validated_0311_00.png 20240812-01:11:34 wrote state.pth 20240812-01:11:34 --- epoch 312 ---------------------------------------- 20240812-01:11:34 current_test_accuracies 0.9539 0.9540 0.9529 0.9544 0.9561 0.9518 0.9515 0.9518 0.9552 0.9541 20240812-01:15:04 keep c_quizzes model 4 validated 24 / 420 (5.71%) nb_accumulated 24 / 420 (finishes Mon 02:12 -- 412/h) 20240812-01:18:10 keep c_quizzes model 9 validated 19 / 420 (4.52%) nb_accumulated 43 / 420 (finishes Mon 02:15 -- 391/h) 20240812-01:21:14 keep c_quizzes model 6 validated 29 / 420 (6.90%) nb_accumulated 72 / 420 (finishes Mon 02:07 -- 447/h) 20240812-01:24:21 keep c_quizzes model 9 validated 30 / 420 (7.14%) nb_accumulated 102 / 420 (finishes Mon 02:04 -- 478/h) 20240812-01:27:22 keep c_quizzes model 5 validated 24 / 420 (5.71%) nb_accumulated 126 / 420 (finishes Mon 02:04 -- 478/h) 20240812-01:30:23 keep c_quizzes model 0 validated 32 / 420 (7.62%) nb_accumulated 158 / 420 (finishes Mon 02:01 -- 503/h) 20240812-01:33:26 keep c_quizzes model 8 validated 23 / 420 (5.48%) nb_accumulated 181 / 420 (finishes Mon 02:02 -- 496/h) 20240812-01:36:27 keep c_quizzes model 5 validated 25 / 420 (5.95%) nb_accumulated 206 / 420 (finishes Mon 02:02 -- 496/h) 20240812-01:39:26 keep c_quizzes model 7 validated 26 / 420 (6.19%) nb_accumulated 232 / 420 (finishes Mon 02:02 -- 499/h) 20240812-01:42:34 keep c_quizzes model 2 validated 24 / 420 (5.71%) nb_accumulated 256 / 420 (finishes Mon 02:02 -- 495/h) 20240812-01:45:37 keep c_quizzes model 9 validated 20 / 420 (4.76%) nb_accumulated 276 / 420 (finishes Mon 02:03 -- 486/h) 20240812-01:48:44 keep c_quizzes model 4 validated 19 / 420 (4.52%) nb_accumulated 295 / 420 (finishes Mon 02:04 -- 476/h) 20240812-01:51:45 keep c_quizzes model 5 validated 28 / 420 (6.67%) nb_accumulated 323 / 420 (finishes Mon 02:03 -- 482/h) 20240812-01:54:48 keep c_quizzes model 7 validated 22 / 420 (5.24%) nb_accumulated 345 / 420 (finishes Mon 02:04 -- 478/h) 20240812-01:57:51 keep c_quizzes model 9 validated 29 / 420 (6.90%) nb_accumulated 374 / 420 (finishes Mon 02:03 -- 484/h) 20240812-02:00:56 keep c_quizzes model 4 validated 22 / 420 (5.24%) nb_accumulated 396 / 420 (finishes Mon 02:03 -- 481/h) 20240812-02:03:59 keep c_quizzes model 9 validated 32 / 420 (7.62%) nb_accumulated 428 / 420 (finishes now! -- 489/h) 20240812-02:04:13 wrote c_quizzes.pth 20240812-02:04:13 training model 0 20240812-02:04:13 training model 1 20240812-02:07:55 train_perplexity 312 model 0 1.1656243305748537 20240812-02:07:55 train_perplexity 312 model 1 1.1668002456104791 20240812-02:08:02 test_perplexity 312 model 0 1.168077628470942 20240812-02:08:02 test_perplexity 312 model 1 1.1660951767048295 20240812-02:13:57 test_accuracy 312 model 0 val 1537 / 1615 20240812-02:14:01 test_accuracy 312 model 1 val 1487 / 1595 20240812-02:14:03 wrote gpt_000.pth 20240812-02:14:03 wrote gpt_001.pth 20240812-02:14:36 wrote non_validated_0312_00.png 20240812-02:15:11 wrote non_validated_0312_01.png 20240812-02:15:11 wrote state.pth 20240812-02:15:11 --- epoch 313 ---------------------------------------- 20240812-02:15:11 current_test_accuracies 0.9517 0.9323 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240812-02:15:11 training model 2 20240812-02:15:11 training model 3 20240812-02:18:54 train_perplexity 313 model 3 1.1658443089155188 20240812-02:18:55 train_perplexity 313 model 2 1.1662371904031155 20240812-02:19:01 test_perplexity 313 model 3 1.1668600228793655 20240812-02:19:02 test_perplexity 313 model 2 1.1685754500175547 20240812-02:25:05 test_accuracy 313 model 2 val 1475 / 1580 20240812-02:25:07 test_accuracy 313 model 3 val 1483 / 1571 20240812-02:25:09 wrote gpt_002.pth 20240812-02:25:10 wrote gpt_003.pth 20240812-02:25:44 wrote non_validated_0313_02.png 20240812-02:26:17 wrote non_validated_0313_03.png 20240812-02:26:17 wrote state.pth 20240812-02:26:17 --- epoch 314 ---------------------------------------- 20240812-02:26:17 current_test_accuracies 0.9517 0.9323 0.9335 0.9440 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240812-02:26:17 training model 4 20240812-02:26:17 training model 5 20240812-02:29:59 train_perplexity 314 model 5 1.1651101836197528 20240812-02:30:05 test_perplexity 314 model 5 1.1686806899259048 20240812-02:30:05 train_perplexity 314 model 4 1.1665107141121787 20240812-02:30:09 test_perplexity 314 model 4 1.1678072030655466 20240812-02:36:03 test_accuracy 314 model 4 val 1515 / 1611 20240812-02:36:05 test_accuracy 314 model 5 val 1533 / 1608 20240812-02:36:07 wrote gpt_004.pth 20240812-02:36:08 wrote gpt_005.pth 20240812-02:36:41 wrote non_validated_0314_04.png 20240812-02:37:14 wrote non_validated_0314_05.png 20240812-02:37:14 wrote state.pth 20240812-02:37:14 --- epoch 315 ---------------------------------------- 20240812-02:37:14 current_test_accuracies 0.9517 0.9323 0.9335 0.9440 0.9404 0.9534 0.0000 0.0000 0.0000 0.0000 20240812-02:37:14 training model 6 20240812-02:37:14 training model 7 20240812-02:40:56 train_perplexity 315 model 7 1.164832736267082 20240812-02:41:01 train_perplexity 315 model 6 1.165201690221847 20240812-02:41:02 test_perplexity 315 model 7 1.167073586983601 20240812-02:41:05 test_perplexity 315 model 6 1.1686573302993473 20240812-02:46:55 test_accuracy 315 model 7 val 1516 / 1604 20240812-02:46:58 test_accuracy 315 model 6 val 1514 / 1609 20240812-02:46:59 wrote gpt_006.pth 20240812-02:47:00 wrote gpt_007.pth 20240812-02:47:33 wrote non_validated_0315_06.png 20240812-02:48:06 wrote non_validated_0315_07.png 20240812-02:48:06 wrote state.pth 20240812-02:48:06 --- epoch 316 ---------------------------------------- 20240812-02:48:06 current_test_accuracies 0.9517 0.9323 0.9335 0.9440 0.9404 0.9534 0.9410 0.9451 0.0000 0.0000 20240812-02:48:06 training model 8 20240812-02:48:06 training model 9 20240812-02:51:48 train_perplexity 316 model 9 1.165506944656647 20240812-02:51:52 train_perplexity 316 model 8 1.1659030200023779 20240812-02:51:55 test_perplexity 316 model 9 1.1685683657242851 20240812-02:51:57 test_perplexity 316 model 8 1.1701491584597448 20240812-02:57:53 test_accuracy 316 model 8 val 1531 / 1612 20240812-02:57:57 test_accuracy 316 model 9 val 1510 / 1596 20240812-02:57:59 wrote gpt_008.pth 20240812-02:58:00 wrote gpt_009.pth 20240812-02:58:33 wrote non_validated_0316_08.png 20240812-02:59:05 wrote non_validated_0316_09.png 20240812-02:59:06 wrote state.pth 20240812-02:59:06 --- epoch 317 ---------------------------------------- 20240812-02:59:06 current_test_accuracies 0.9517 0.9323 0.9335 0.9440 0.9404 0.9534 0.9410 0.9451 0.9498 0.9461 20240812-02:59:06 training model 1 20240812-02:59:06 training model 2 20240812-03:02:48 train_perplexity 317 model 2 1.1660681160741513 20240812-03:02:50 train_perplexity 317 model 1 1.1664164295804862 20240812-03:02:55 test_perplexity 317 model 2 1.1695404922104486 20240812-03:02:56 test_perplexity 317 model 1 1.1693569032141369 20240812-03:08:46 test_accuracy 317 model 2 val 1537 / 1634 20240812-03:08:56 test_accuracy 317 model 1 val 1483 / 1570 20240812-03:08:58 wrote gpt_001.pth 20240812-03:08:58 wrote gpt_002.pth 20240812-03:09:31 wrote non_validated_0317_01.png 20240812-03:10:04 wrote non_validated_0317_02.png 20240812-03:10:04 wrote state.pth 20240812-03:10:04 --- epoch 318 ---------------------------------------- 20240812-03:10:04 current_test_accuracies 0.9517 0.9446 0.9406 0.9440 0.9404 0.9534 0.9410 0.9451 0.9498 0.9461 20240812-03:10:04 training model 4 20240812-03:10:04 training model 2 20240812-03:13:46 train_perplexity 318 model 2 1.1659892759000967 20240812-03:13:51 train_perplexity 318 model 4 1.1658940538350382 20240812-03:13:52 test_perplexity 318 model 2 1.1683973701359736 20240812-03:13:55 test_perplexity 318 model 4 1.1670206911678391 20240812-03:19:57 test_accuracy 318 model 2 val 1506 / 1605 20240812-03:19:59 test_accuracy 318 model 4 val 1488 / 1583 20240812-03:20:01 wrote gpt_004.pth 20240812-03:20:02 wrote gpt_002.pth 20240812-03:20:35 wrote non_validated_0318_04.png 20240812-03:21:08 wrote non_validated_0318_02.png 20240812-03:21:08 wrote state.pth 20240812-03:21:08 --- epoch 319 ---------------------------------------- 20240812-03:21:08 current_test_accuracies 0.9517 0.9446 0.9383 0.9440 0.9400 0.9534 0.9410 0.9451 0.9498 0.9461 20240812-03:21:08 training model 2 20240812-03:21:08 training model 4 20240812-03:24:49 train_perplexity 319 model 4 1.1661002817485144 20240812-03:24:55 test_perplexity 319 model 4 1.1673304109892637 20240812-03:24:56 train_perplexity 319 model 2 1.1661409213310556 20240812-03:25:00 test_perplexity 319 model 2 1.1669812318341732 20240812-03:31:01 test_accuracy 319 model 4 val 1500 / 1588 20240812-03:31:04 test_accuracy 319 model 2 val 1473 / 1562 20240812-03:31:06 wrote gpt_002.pth 20240812-03:31:07 wrote gpt_004.pth 20240812-03:31:40 wrote non_validated_0319_02.png 20240812-03:32:13 wrote non_validated_0319_04.png 20240812-03:32:13 wrote state.pth 20240812-03:32:13 --- epoch 320 ---------------------------------------- 20240812-03:32:13 current_test_accuracies 0.9517 0.9446 0.9430 0.9440 0.9446 0.9534 0.9410 0.9451 0.9498 0.9461 20240812-03:32:13 training model 6 20240812-03:32:13 training model 2 20240812-03:35:55 train_perplexity 320 model 2 1.1653559483094773 20240812-03:35:58 train_perplexity 320 model 6 1.1653342149873038 20240812-03:36:02 test_perplexity 320 model 2 1.1683805745866291 20240812-03:36:04 test_perplexity 320 model 6 1.1692566253161198 20240812-03:41:54 test_accuracy 320 model 2 val 1555 / 1645 20240812-03:42:01 test_accuracy 320 model 6 val 1501 / 1592 20240812-03:42:03 wrote gpt_006.pth 20240812-03:42:03 wrote gpt_002.pth 20240812-03:42:36 wrote non_validated_0320_06.png 20240812-03:43:09 wrote non_validated_0320_02.png 20240812-03:43:09 wrote state.pth 20240812-03:43:09 --- epoch 321 ---------------------------------------- 20240812-03:43:09 current_test_accuracies 0.9517 0.9446 0.9453 0.9440 0.9446 0.9534 0.9428 0.9451 0.9498 0.9461 20240812-03:43:09 training model 6 20240812-03:43:09 training model 3 20240812-03:46:52 train_perplexity 321 model 3 1.1663399988493448 20240812-03:46:53 train_perplexity 321 model 6 1.1648702398670674 20240812-03:46:59 test_perplexity 321 model 3 1.16719799096436 20240812-03:47:00 test_perplexity 321 model 6 1.1675654203967232 20240812-03:52:56 test_accuracy 321 model 6 val 1526 / 1617 20240812-03:52:57 test_accuracy 321 model 3 val 1533 / 1609 20240812-03:52:59 wrote gpt_006.pth 20240812-03:53:00 wrote gpt_003.pth 20240812-03:53:33 wrote non_validated_0321_06.png 20240812-03:54:07 wrote non_validated_0321_03.png 20240812-03:54:07 wrote state.pth 20240812-03:54:07 --- epoch 322 ---------------------------------------- 20240812-03:54:07 current_test_accuracies 0.9517 0.9446 0.9453 0.9528 0.9446 0.9534 0.9437 0.9451 0.9498 0.9461 20240812-03:54:07 training model 6 20240812-03:54:07 training model 4 20240812-03:57:49 train_perplexity 322 model 4 1.165581778824438 20240812-03:57:55 test_perplexity 322 model 4 1.1669180598993814 20240812-03:57:55 train_perplexity 322 model 6 1.1649191323908765 20240812-03:57:59 test_perplexity 322 model 6 1.1686681642528416 20240812-04:03:54 test_accuracy 322 model 6 val 1521 / 1593 20240812-04:03:57 test_accuracy 322 model 4 val 1489 / 1574 20240812-04:03:59 wrote gpt_006.pth 20240812-04:03:59 wrote gpt_004.pth 20240812-04:04:32 wrote non_validated_0322_06.png 20240812-04:05:05 wrote non_validated_0322_04.png 20240812-04:05:05 wrote state.pth 20240812-04:05:05 --- epoch 323 ---------------------------------------- 20240812-04:05:05 current_test_accuracies 0.9517 0.9446 0.9453 0.9528 0.9460 0.9534 0.9548 0.9451 0.9498 0.9461 20240812-04:05:05 training model 1 20240812-04:05:05 training model 7 20240812-04:08:47 train_perplexity 323 model 7 1.165426860965476 20240812-04:08:51 train_perplexity 323 model 1 1.1660216245908235 20240812-04:08:54 test_perplexity 323 model 7 1.1666561264555313 20240812-04:08:56 test_perplexity 323 model 1 1.1684448812483337 20240812-04:14:50 test_accuracy 323 model 7 val 1499 / 1600 20240812-04:14:51 test_accuracy 323 model 1 val 1506 / 1606 20240812-04:14:53 wrote gpt_001.pth 20240812-04:14:53 wrote gpt_007.pth 20240812-04:15:28 wrote non_validated_0323_01.png 20240812-04:16:01 wrote non_validated_0323_07.png 20240812-04:16:01 wrote state.pth 20240812-04:16:01 --- epoch 324 ---------------------------------------- 20240812-04:16:01 current_test_accuracies 0.9517 0.9377 0.9453 0.9528 0.9460 0.9534 0.9548 0.9369 0.9498 0.9461 20240812-04:16:01 training model 7 20240812-04:16:01 training model 1 20240812-04:19:44 train_perplexity 324 model 1 1.1657024711811377 20240812-04:19:47 train_perplexity 324 model 7 1.1651723357281227 20240812-04:19:51 test_perplexity 324 model 1 1.1656532823104846 20240812-04:19:53 test_perplexity 324 model 7 1.1679197477849634 20240812-04:25:49 test_accuracy 324 model 1 val 1512 / 1594 20240812-04:25:51 test_accuracy 324 model 7 val 1485 / 1597 20240812-04:25:53 wrote gpt_007.pth 20240812-04:25:53 wrote gpt_001.pth 20240812-04:26:26 wrote non_validated_0324_07.png 20240812-04:27:02 wrote non_validated_0324_01.png 20240812-04:27:02 wrote state.pth 20240812-04:27:02 --- epoch 325 ---------------------------------------- 20240812-04:27:02 current_test_accuracies 0.9517 0.9486 0.9453 0.9528 0.9460 0.9534 0.9548 0.9299 0.9498 0.9461 20240812-04:27:02 training model 7 20240812-04:27:02 training model 2 20240812-04:30:43 train_perplexity 325 model 2 1.165466847054136 20240812-04:30:49 train_perplexity 325 model 7 1.1651395914819018 20240812-04:30:50 test_perplexity 325 model 2 1.1672450739142093 20240812-04:30:53 test_perplexity 325 model 7 1.1651425502140222 20240812-04:36:47 test_accuracy 325 model 2 val 1508 / 1600 20240812-04:36:53 test_accuracy 325 model 7 val 1500 / 1586 20240812-04:36:55 wrote gpt_007.pth 20240812-04:36:55 wrote gpt_002.pth 20240812-04:37:29 wrote non_validated_0325_07.png 20240812-04:38:02 wrote non_validated_0325_02.png 20240812-04:38:02 wrote state.pth 20240812-04:38:02 --- epoch 326 ---------------------------------------- 20240812-04:38:02 current_test_accuracies 0.9517 0.9486 0.9425 0.9528 0.9460 0.9534 0.9548 0.9458 0.9498 0.9461 20240812-04:38:02 training model 2 20240812-04:38:02 training model 7 20240812-04:41:45 train_perplexity 326 model 7 1.164493828073638 20240812-04:41:45 train_perplexity 326 model 2 1.1658058876964552 20240812-04:41:52 test_perplexity 326 model 7 1.1688327116346828 20240812-04:41:52 test_perplexity 326 model 2 1.1663837048544736 20240812-04:47:44 test_accuracy 326 model 7 val 1518 / 1604 20240812-04:47:47 test_accuracy 326 model 2 val 1504 / 1585 20240812-04:47:49 wrote gpt_002.pth 20240812-04:47:50 wrote gpt_007.pth 20240812-04:48:23 wrote non_validated_0326_02.png 20240812-04:48:56 wrote non_validated_0326_07.png 20240812-04:48:56 wrote state.pth 20240812-04:48:56 --- epoch 327 ---------------------------------------- 20240812-04:48:56 current_test_accuracies 0.9517 0.9486 0.9489 0.9528 0.9460 0.9534 0.9548 0.9464 0.9498 0.9461 20240812-04:48:56 training model 4 20240812-04:48:56 training model 9 20240812-04:52:39 train_perplexity 327 model 9 1.164982298794278 20240812-04:52:41 train_perplexity 327 model 4 1.165628852779098 20240812-04:52:46 test_perplexity 327 model 9 1.171933298244247 20240812-04:52:47 test_perplexity 327 model 4 1.1674650227829848 20240812-04:58:37 test_accuracy 327 model 9 val 1507 / 1620 20240812-04:58:43 test_accuracy 327 model 4 val 1503 / 1582 20240812-04:58:45 wrote gpt_004.pth 20240812-04:58:45 wrote gpt_009.pth 20240812-04:59:21 wrote non_validated_0327_04.png 20240812-04:59:54 wrote non_validated_0327_09.png 20240812-04:59:54 wrote state.pth 20240812-04:59:54 --- epoch 328 ---------------------------------------- 20240812-04:59:54 current_test_accuracies 0.9517 0.9486 0.9489 0.9528 0.9501 0.9534 0.9548 0.9464 0.9498 0.9302 20240812-04:59:54 training model 9 20240812-04:59:54 training model 7 20240812-05:03:37 train_perplexity 328 model 7 1.164587440912665 20240812-05:03:38 train_perplexity 328 model 9 1.1654896796716954 20240812-05:03:43 test_perplexity 328 model 7 1.1674391389093155 20240812-05:03:45 test_perplexity 328 model 9 1.1693909956114932 20240812-05:09:40 test_accuracy 328 model 9 val 1516 / 1597 20240812-05:09:41 test_accuracy 328 model 7 val 1509 / 1595 20240812-05:09:43 wrote gpt_009.pth 20240812-05:09:44 wrote gpt_007.pth 20240812-05:10:17 wrote non_validated_0328_09.png 20240812-05:10:51 wrote non_validated_0328_07.png 20240812-05:10:51 wrote state.pth 20240812-05:10:51 --- epoch 329 ---------------------------------------- 20240812-05:10:51 current_test_accuracies 0.9517 0.9486 0.9489 0.9528 0.9501 0.9534 0.9548 0.9461 0.9498 0.9493 20240812-05:10:51 training model 7 20240812-05:10:51 training model 1 20240812-05:14:32 train_perplexity 329 model 1 1.166374198785486 20240812-05:14:38 test_perplexity 329 model 1 1.1672441007590884 20240812-05:14:39 train_perplexity 329 model 7 1.164055159191613 20240812-05:14:43 test_perplexity 329 model 7 1.1674384508898559 20240812-05:20:35 test_accuracy 329 model 1 val 1535 / 1614 20240812-05:20:44 test_accuracy 329 model 7 val 1469 / 1563 20240812-05:20:46 wrote gpt_007.pth 20240812-05:20:46 wrote gpt_001.pth 20240812-05:21:19 wrote non_validated_0329_07.png 20240812-05:21:54 wrote non_validated_0329_01.png 20240812-05:21:55 wrote state.pth 20240812-05:21:55 --- epoch 330 ---------------------------------------- 20240812-05:21:55 current_test_accuracies 0.9517 0.9511 0.9489 0.9528 0.9501 0.9534 0.9548 0.9399 0.9498 0.9493 20240812-05:21:55 training model 7 20240812-05:21:55 training model 2 20240812-05:25:36 train_perplexity 330 model 2 1.1653310764444846 20240812-05:25:42 test_perplexity 330 model 2 1.1677881864432675 20240812-05:25:42 train_perplexity 330 model 7 1.1646194996018717 20240812-05:25:46 test_perplexity 330 model 7 1.1695716684832664 20240812-05:31:37 test_accuracy 330 model 7 val 1526 / 1622 20240812-05:31:42 test_accuracy 330 model 2 val 1501 / 1586 20240812-05:31:44 wrote gpt_007.pth 20240812-05:31:44 wrote gpt_002.pth 20240812-05:32:18 wrote non_validated_0330_07.png 20240812-05:32:51 wrote non_validated_0330_02.png 20240812-05:32:51 wrote state.pth 20240812-05:32:51 --- epoch 331 ---------------------------------------- 20240812-05:32:51 current_test_accuracies 0.9517 0.9511 0.9464 0.9528 0.9501 0.9534 0.9548 0.9408 0.9498 0.9493 20240812-05:32:51 training model 7 20240812-05:32:51 training model 2 20240812-05:36:32 train_perplexity 331 model 2 1.1648727438472437 20240812-05:36:38 test_perplexity 331 model 2 1.1688584729155522 20240812-05:36:39 train_perplexity 331 model 7 1.1642739415825425 20240812-05:36:43 test_perplexity 331 model 7 1.1682940396037853 20240812-05:42:39 test_accuracy 331 model 2 val 1498 / 1586 20240812-05:42:41 test_accuracy 331 model 7 val 1512 / 1587 20240812-05:42:43 wrote gpt_007.pth 20240812-05:42:44 wrote gpt_002.pth 20240812-05:43:18 wrote non_validated_0331_07.png 20240812-05:43:51 wrote non_validated_0331_02.png 20240812-05:43:51 wrote state.pth 20240812-05:43:51 --- epoch 332 ---------------------------------------- 20240812-05:43:51 current_test_accuracies 0.9517 0.9511 0.9445 0.9528 0.9501 0.9534 0.9548 0.9527 0.9498 0.9493 20240812-05:43:51 training model 2 20240812-05:43:51 training model 9 20240812-05:47:34 train_perplexity 332 model 9 1.165066142767382 20240812-05:47:36 train_perplexity 332 model 2 1.1649991153339818 20240812-05:47:41 test_perplexity 332 model 9 1.1688703605340822 20240812-05:47:42 test_perplexity 332 model 2 1.1686742865584985 20240812-05:53:32 test_accuracy 332 model 9 val 1549 / 1628 20240812-05:53:40 test_accuracy 332 model 2 val 1495 / 1589 20240812-05:53:41 wrote gpt_002.pth 20240812-05:53:42 wrote gpt_009.pth 20240812-05:54:17 wrote non_validated_0332_02.png 20240812-05:54:52 wrote non_validated_0332_09.png 20240812-05:54:52 wrote state.pth 20240812-05:54:52 --- epoch 333 ---------------------------------------- 20240812-05:54:52 current_test_accuracies 0.9517 0.9511 0.9408 0.9528 0.9501 0.9534 0.9548 0.9527 0.9498 0.9515 20240812-05:54:52 training model 2 20240812-05:54:52 training model 8 20240812-05:58:34 train_perplexity 333 model 8 1.165268247354795 20240812-05:58:38 train_perplexity 333 model 2 1.1647079262950875 20240812-05:58:41 test_perplexity 333 model 8 1.1693465945444577 20240812-05:58:43 test_perplexity 333 model 2 1.168371640997911 20240812-06:04:37 test_accuracy 333 model 8 val 1489 / 1603 20240812-06:04:39 test_accuracy 333 model 2 val 1520 / 1610 20240812-06:04:41 wrote gpt_002.pth 20240812-06:04:42 wrote gpt_008.pth 20240812-06:05:14 wrote non_validated_0333_02.png 20240812-06:05:47 wrote non_validated_0333_08.png 20240812-06:05:47 wrote state.pth 20240812-06:05:47 --- epoch 334 ---------------------------------------- 20240812-06:05:47 current_test_accuracies 0.9517 0.9511 0.9441 0.9528 0.9501 0.9534 0.9548 0.9527 0.9289 0.9515 20240812-06:05:47 training model 8 20240812-06:05:47 training model 2 20240812-06:09:28 train_perplexity 334 model 2 1.1643209460134836 20240812-06:09:35 train_perplexity 334 model 8 1.1652303257358727 20240812-06:09:35 test_perplexity 334 model 2 1.1648857047149312 20240812-06:09:39 test_perplexity 334 model 8 1.167413581563611 20240812-06:15:34 test_accuracy 334 model 8 val 1494 / 1592 20240812-06:15:36 test_accuracy 334 model 2 val 1485 / 1587 20240812-06:15:38 wrote gpt_008.pth 20240812-06:15:39 wrote gpt_002.pth 20240812-06:16:13 wrote non_validated_0334_08.png 20240812-06:16:46 wrote non_validated_0334_02.png 20240812-06:16:46 wrote state.pth 20240812-06:16:46 --- epoch 335 ---------------------------------------- 20240812-06:16:46 current_test_accuracies 0.9517 0.9511 0.9357 0.9528 0.9501 0.9534 0.9548 0.9527 0.9384 0.9515 20240812-06:16:46 training model 2 20240812-06:16:46 training model 8 20240812-06:20:28 train_perplexity 335 model 8 1.1654175724552613 20240812-06:20:34 train_perplexity 335 model 2 1.1644584915735163 20240812-06:20:35 test_perplexity 335 model 8 1.1666175605974902 20240812-06:20:38 test_perplexity 335 model 2 1.1668381295268828 20240812-06:26:39 test_accuracy 335 model 2 val 1503 / 1570 20240812-06:26:41 test_accuracy 335 model 8 val 1505 / 1585 20240812-06:26:43 wrote gpt_002.pth 20240812-06:26:44 wrote gpt_008.pth 20240812-06:27:19 wrote non_validated_0335_02.png 20240812-06:27:53 wrote non_validated_0335_08.png 20240812-06:27:53 wrote state.pth 20240812-06:27:53 --- epoch 336 ---------------------------------------- 20240812-06:27:53 current_test_accuracies 0.9517 0.9511 0.9573 0.9528 0.9501 0.9534 0.9548 0.9527 0.9495 0.9515 20240812-06:27:53 training model 8 20240812-06:27:53 training model 4 20240812-06:31:35 train_perplexity 336 model 4 1.1655566414328313 20240812-06:31:38 train_perplexity 336 model 8 1.165235681583637 20240812-06:31:42 test_perplexity 336 model 4 1.1669175128159013 20240812-06:31:44 test_perplexity 336 model 8 1.167095790184812 20240812-06:37:30 test_accuracy 336 model 8 val 1554 / 1636 20240812-06:37:38 test_accuracy 336 model 4 val 1516 / 1583 20240812-06:37:39 wrote gpt_008.pth 20240812-06:37:40 wrote gpt_004.pth 20240812-06:38:15 wrote non_validated_0336_08.png 20240812-06:38:48 wrote non_validated_0336_04.png 20240812-06:38:48 wrote state.pth 20240812-06:38:48 --- epoch 337 ---------------------------------------- 20240812-06:38:48 current_test_accuracies 0.9517 0.9511 0.9573 0.9528 0.9577 0.9534 0.9548 0.9527 0.9499 0.9515 20240812-06:38:48 training model 8 20240812-06:38:48 training model 1 20240812-06:42:30 train_perplexity 337 model 1 1.166283978071049 20240812-06:42:33 train_perplexity 337 model 8 1.1651149840178083 20240812-06:42:37 test_perplexity 337 model 1 1.1672783125622972 20240812-06:42:39 test_perplexity 337 model 8 1.1672090838033937 20240812-06:48:36 test_accuracy 337 model 8 val 1548 / 1619 20240812-06:48:44 test_accuracy 337 model 1 val 1490 / 1579 20240812-06:48:45 wrote gpt_008.pth 20240812-06:48:46 wrote gpt_001.pth 20240812-06:49:21 wrote non_validated_0337_08.png 20240812-06:49:55 wrote non_validated_0337_01.png 20240812-06:49:55 wrote state.pth 20240812-06:49:55 --- epoch 338 ---------------------------------------- 20240812-06:49:55 current_test_accuracies 0.9517 0.9436 0.9573 0.9528 0.9577 0.9534 0.9548 0.9527 0.9561 0.9515 20240812-06:49:55 training model 1 20240812-06:49:55 training model 9 20240812-06:53:37 train_perplexity 338 model 9 1.1651484815470503 20240812-06:53:42 test_perplexity 338 model 9 1.1689156049566438 20240812-06:53:43 train_perplexity 338 model 1 1.1661414186567187 20240812-06:53:47 test_perplexity 338 model 1 1.1675589191870799 20240812-06:59:50 test_accuracy 338 model 9 val 1499 / 1598 20240812-06:59:52 test_accuracy 338 model 1 val 1498 / 1589 20240812-06:59:54 wrote gpt_001.pth 20240812-06:59:55 wrote gpt_009.pth 20240812-07:00:28 wrote non_validated_0338_01.png 20240812-07:01:03 wrote non_validated_0338_09.png 20240812-07:01:03 wrote state.pth 20240812-07:01:03 --- epoch 339 ---------------------------------------- 20240812-07:01:03 current_test_accuracies 0.9517 0.9427 0.9573 0.9528 0.9577 0.9534 0.9548 0.9527 0.9561 0.9380 20240812-07:01:03 training model 9 20240812-07:01:03 training model 1 20240812-07:04:46 train_perplexity 339 model 1 1.1659223400202436 20240812-07:04:48 train_perplexity 339 model 9 1.1651191106578045 20240812-07:04:53 test_perplexity 339 model 1 1.165839433145822 20240812-07:04:54 test_perplexity 339 model 9 1.1693093913900312 20240812-07:10:56 test_accuracy 339 model 9 val 1523 / 1602 20240812-07:10:58 test_accuracy 339 model 1 val 1505 / 1581 20240812-07:11:00 wrote gpt_009.pth 20240812-07:11:01 wrote gpt_001.pth 20240812-07:11:33 wrote non_validated_0339_09.png 20240812-07:12:06 wrote non_validated_0339_01.png 20240812-07:12:06 wrote state.pth 20240812-07:12:06 --- epoch 340 ---------------------------------------- 20240812-07:12:06 current_test_accuracies 0.9517 0.9519 0.9573 0.9528 0.9577 0.9534 0.9548 0.9527 0.9561 0.9507 20240812-07:15:31 keep c_quizzes model 2 validated 23 / 420 (5.48%) nb_accumulated 23 / 420 (finishes Mon 08:14 -- 403/h) 20240812-07:18:32 keep c_quizzes model 0 validated 28 / 420 (6.67%) nb_accumulated 51 / 420 (finishes Mon 08:05 -- 475/h) 20240812-07:21:34 keep c_quizzes model 0 validated 30 / 420 (7.14%) nb_accumulated 81 / 420 (finishes Mon 08:01 -- 513/h) 20240812-07:24:35 keep c_quizzes model 8 validated 22 / 420 (5.24%) nb_accumulated 103 / 420 (finishes Mon 08:03 -- 494/h) 20240812-07:27:36 keep c_quizzes model 3 validated 27 / 420 (6.43%) nb_accumulated 130 / 420 (finishes Mon 08:02 -- 502/h) 20240812-07:30:37 keep c_quizzes model 1 validated 16 / 420 (3.81%) nb_accumulated 146 / 420 (finishes Mon 08:05 -- 472/h) 20240812-07:33:39 keep c_quizzes model 8 validated 31 / 420 (7.38%) nb_accumulated 177 / 420 (finishes Mon 08:03 -- 492/h) 20240812-07:36:43 keep c_quizzes model 9 validated 25 / 420 (5.95%) nb_accumulated 202 / 420 (finishes Mon 08:03 -- 492/h) 20240812-07:39:45 keep c_quizzes model 3 validated 19 / 420 (4.52%) nb_accumulated 221 / 420 (finishes Mon 08:04 -- 479/h) 20240812-07:42:46 keep c_quizzes model 5 validated 21 / 420 (5.00%) nb_accumulated 242 / 420 (finishes Mon 08:05 -- 473/h) 20240812-07:45:46 keep c_quizzes model 9 validated 23 / 420 (5.48%) nb_accumulated 265 / 420 (finishes Mon 08:05 -- 472/h) 20240812-07:48:47 keep c_quizzes model 1 validated 19 / 420 (4.52%) nb_accumulated 284 / 420 (finishes Mon 08:06 -- 464/h) 20240812-07:51:47 keep c_quizzes model 4 validated 19 / 420 (4.52%) nb_accumulated 303 / 420 (finishes Mon 08:07 -- 458/h) 20240812-07:54:47 keep c_quizzes model 3 validated 19 / 420 (4.52%) nb_accumulated 322 / 420 (finishes Mon 08:07 -- 452/h) 20240812-07:57:47 keep c_quizzes model 6 validated 29 / 420 (6.90%) nb_accumulated 351 / 420 (finishes Mon 08:06 -- 461/h) 20240812-08:00:47 keep c_quizzes model 4 validated 22 / 420 (5.24%) nb_accumulated 373 / 420 (finishes Mon 08:06 -- 459/h) 20240812-08:03:47 keep c_quizzes model 0 validated 33 / 420 (7.86%) nb_accumulated 406 / 420 (finishes Mon 08:05 -- 471/h) 20240812-08:06:47 keep c_quizzes model 9 validated 21 / 420 (5.00%) nb_accumulated 427 / 420 (finishes now! -- 468/h) 20240812-08:07:01 wrote c_quizzes.pth 20240812-08:07:01 training model 0 20240812-08:07:01 training model 1 20240812-08:10:43 train_perplexity 340 model 0 1.1669241949028313 20240812-08:10:43 train_perplexity 340 model 1 1.166350674735264 20240812-08:10:50 test_perplexity 340 model 0 1.168341399185248 20240812-08:10:50 test_perplexity 340 model 1 1.169918731882507 20240812-08:16:44 test_accuracy 340 model 0 val 1526 / 1619 20240812-08:16:44 test_accuracy 340 model 1 val 1528 / 1615 20240812-08:16:47 wrote gpt_000.pth 20240812-08:16:47 wrote gpt_001.pth 20240812-08:17:20 wrote non_validated_0340_00.png 20240812-08:17:53 wrote non_validated_0340_01.png 20240812-08:17:53 wrote state.pth 20240812-08:17:53 --- epoch 341 ---------------------------------------- 20240812-08:17:53 current_test_accuracies 0.9426 0.9461 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240812-08:17:53 training model 2 20240812-08:17:53 training model 3 20240812-08:21:36 train_perplexity 341 model 3 1.1667231535165965 20240812-08:21:36 train_perplexity 341 model 2 1.1652882855781823 20240812-08:21:43 test_perplexity 341 model 3 1.1694171149876869 20240812-08:21:43 test_perplexity 341 model 2 1.1686558215644638 20240812-08:27:38 test_accuracy 341 model 2 val 1547 / 1623 20240812-08:27:40 test_accuracy 341 model 3 val 1530 / 1616 20240812-08:27:42 wrote gpt_002.pth 20240812-08:27:43 wrote gpt_003.pth 20240812-08:28:15 wrote non_validated_0341_02.png 20240812-08:28:48 wrote non_validated_0341_03.png 20240812-08:28:48 wrote state.pth 20240812-08:28:48 --- epoch 342 ---------------------------------------- 20240812-08:28:48 current_test_accuracies 0.9426 0.9461 0.9532 0.9468 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240812-08:28:48 training model 4 20240812-08:28:48 training model 5 20240812-08:32:30 train_perplexity 342 model 5 1.1663015478907264 20240812-08:32:31 train_perplexity 342 model 4 1.1670485784159341 20240812-08:32:38 test_perplexity 342 model 5 1.1695638396448178 20240812-08:32:38 test_perplexity 342 model 4 1.1678578511107265 20240812-08:38:29 test_accuracy 342 model 4 val 1525 / 1619 20240812-08:38:34 test_accuracy 342 model 5 val 1504 / 1588 20240812-08:38:36 wrote gpt_004.pth 20240812-08:38:36 wrote gpt_005.pth 20240812-08:39:09 wrote non_validated_0342_04.png 20240812-08:39:42 wrote non_validated_0342_05.png 20240812-08:39:42 wrote state.pth 20240812-08:39:42 --- epoch 343 ---------------------------------------- 20240812-08:39:42 current_test_accuracies 0.9426 0.9461 0.9532 0.9468 0.9419 0.9471 0.0000 0.0000 0.0000 0.0000 20240812-08:39:42 training model 6 20240812-08:39:42 training model 7 20240812-08:43:24 train_perplexity 343 model 7 1.1651929310617637 20240812-08:43:26 train_perplexity 343 model 6 1.1656240814306205 20240812-08:43:31 test_perplexity 343 model 7 1.1700677182923458 20240812-08:43:32 test_perplexity 343 model 6 1.1696694838206854 20240812-08:49:21 test_accuracy 343 model 6 val 1507 / 1603 20240812-08:49:23 test_accuracy 343 model 7 val 1501 / 1593 20240812-08:49:25 wrote gpt_006.pth 20240812-08:49:26 wrote gpt_007.pth 20240812-08:49:59 wrote non_validated_0343_06.png 20240812-08:50:31 wrote non_validated_0343_07.png 20240812-08:50:31 wrote state.pth 20240812-08:50:31 --- epoch 344 ---------------------------------------- 20240812-08:50:31 current_test_accuracies 0.9426 0.9461 0.9532 0.9468 0.9419 0.9471 0.9401 0.9422 0.0000 0.0000 20240812-08:50:31 training model 8 20240812-08:50:31 training model 9 20240812-08:54:14 train_perplexity 344 model 9 1.1659828824633358 20240812-08:54:15 train_perplexity 344 model 8 1.16622959622444 20240812-08:54:21 test_perplexity 344 model 9 1.1699755204315827 20240812-08:54:22 test_perplexity 344 model 8 1.1675342290996327 20240812-09:00:14 test_accuracy 344 model 9 val 1528 / 1610 20240812-09:00:16 test_accuracy 344 model 8 val 1512 / 1603 20240812-09:00:17 wrote gpt_008.pth 20240812-09:00:18 wrote gpt_009.pth 20240812-09:00:51 wrote non_validated_0344_08.png 20240812-09:01:23 wrote non_validated_0344_09.png 20240812-09:01:23 wrote state.pth 20240812-09:01:23 --- epoch 345 ---------------------------------------- 20240812-09:01:23 current_test_accuracies 0.9426 0.9461 0.9532 0.9468 0.9419 0.9471 0.9401 0.9422 0.9432 0.9491 20240812-09:01:23 training model 6 20240812-09:01:23 training model 4 20240812-09:05:05 train_perplexity 345 model 4 1.1662927879248515 20240812-09:05:09 train_perplexity 345 model 6 1.165636079174631 20240812-09:05:12 test_perplexity 345 model 4 1.169137860758735 20240812-09:05:14 test_perplexity 345 model 6 1.1694385472370437 20240812-09:11:07 test_accuracy 345 model 4 val 1516 / 1607 20240812-09:11:10 test_accuracy 345 model 6 val 1504 / 1596 20240812-09:11:12 wrote gpt_006.pth 20240812-09:11:13 wrote gpt_004.pth 20240812-09:11:45 wrote non_validated_0345_06.png 20240812-09:12:18 wrote non_validated_0345_04.png 20240812-09:12:18 wrote state.pth 20240812-09:12:18 --- epoch 346 ---------------------------------------- 20240812-09:12:18 current_test_accuracies 0.9426 0.9461 0.9532 0.9468 0.9434 0.9471 0.9424 0.9422 0.9432 0.9491 20240812-09:12:18 training model 7 20240812-09:12:18 training model 6 20240812-09:16:01 train_perplexity 346 model 6 1.1657469658183701 20240812-09:16:04 train_perplexity 346 model 7 1.1646363824590633 20240812-09:16:07 test_perplexity 346 model 6 1.1695684079313589 20240812-09:16:09 test_perplexity 346 model 7 1.1704865942704759 20240812-09:21:56 test_accuracy 346 model 6 val 1515 / 1613 20240812-09:22:00 test_accuracy 346 model 7 val 1522 / 1598 20240812-09:22:02 wrote gpt_007.pth 20240812-09:22:02 wrote gpt_006.pth 20240812-09:22:35 wrote non_validated_0346_07.png 20240812-09:23:07 wrote non_validated_0346_06.png 20240812-09:23:08 wrote state.pth 20240812-09:23:08 --- epoch 347 ---------------------------------------- 20240812-09:23:08 current_test_accuracies 0.9426 0.9461 0.9532 0.9468 0.9434 0.9471 0.9392 0.9524 0.9432 0.9491 20240812-09:23:08 training model 6 20240812-09:23:08 training model 0 20240812-09:26:50 train_perplexity 347 model 0 1.1666537157777044 20240812-09:26:51 train_perplexity 347 model 6 1.1652220880257664 20240812-09:26:57 test_perplexity 347 model 0 1.1694757229151218 20240812-09:26:58 test_perplexity 347 model 6 1.170074265073282 20240812-09:32:51 test_accuracy 347 model 6 val 1515 / 1590 20240812-09:32:52 test_accuracy 347 model 0 val 1509 / 1598 20240812-09:32:54 wrote gpt_006.pth 20240812-09:32:54 wrote gpt_000.pth 20240812-09:33:27 wrote non_validated_0347_06.png 20240812-09:34:00 wrote non_validated_0347_00.png 20240812-09:34:00 wrote state.pth 20240812-09:34:00 --- epoch 348 ---------------------------------------- 20240812-09:34:00 current_test_accuracies 0.9443 0.9461 0.9532 0.9468 0.9434 0.9471 0.9528 0.9524 0.9432 0.9491 20240812-09:34:00 training model 8 20240812-09:34:00 training model 4 20240812-09:37:42 train_perplexity 348 model 4 1.166083605382731 20240812-09:37:46 train_perplexity 348 model 8 1.1657512420520657 20240812-09:37:48 test_perplexity 348 model 4 1.1704189761940653 20240812-09:37:51 test_perplexity 348 model 8 1.168238553196369 20240812-09:43:43 test_accuracy 348 model 4 val 1524 / 1609 20240812-09:43:49 test_accuracy 348 model 8 val 1479 / 1569 20240812-09:43:51 wrote gpt_008.pth 20240812-09:43:52 wrote gpt_004.pth 20240812-09:44:25 wrote non_validated_0348_08.png 20240812-09:44:57 wrote non_validated_0348_04.png 20240812-09:44:57 wrote state.pth 20240812-09:44:57 --- epoch 349 ---------------------------------------- 20240812-09:44:57 current_test_accuracies 0.9443 0.9461 0.9532 0.9468 0.9472 0.9471 0.9528 0.9524 0.9426 0.9491 20240812-09:44:57 training model 8 20240812-09:44:57 training model 0 20240812-09:48:38 train_perplexity 349 model 0 1.1662456210757306 20240812-09:48:44 test_perplexity 349 model 0 1.1675061755638112 20240812-09:48:45 train_perplexity 349 model 8 1.1653801810028723 20240812-09:48:49 test_perplexity 349 model 8 1.1700845468123036 20240812-09:54:36 test_accuracy 349 model 8 val 1520 / 1611 20240812-09:54:43 test_accuracy 349 model 0 val 1479 / 1568 20240812-09:54:44 wrote gpt_008.pth 20240812-09:54:45 wrote gpt_000.pth 20240812-09:55:18 wrote non_validated_0349_08.png 20240812-09:55:50 wrote non_validated_0349_00.png 20240812-09:55:50 wrote state.pth 20240812-09:55:50 --- epoch 350 ---------------------------------------- 20240812-09:55:50 current_test_accuracies 0.9432 0.9461 0.9532 0.9468 0.9472 0.9471 0.9528 0.9524 0.9435 0.9491 20240812-09:55:50 training model 0 20240812-09:55:50 training model 8 20240812-09:59:32 train_perplexity 350 model 8 1.1654179299793874 20240812-09:59:37 train_perplexity 350 model 0 1.166030986199998 20240812-09:59:38 test_perplexity 350 model 8 1.1692528562308346 20240812-09:59:41 test_perplexity 350 model 0 1.1688113076873865 20240812-10:05:24 test_accuracy 350 model 0 val 1541 / 1627 20240812-10:05:30 test_accuracy 350 model 8 val 1502 / 1582 20240812-10:05:32 wrote gpt_000.pth 20240812-10:05:33 wrote gpt_008.pth 20240812-10:06:05 wrote non_validated_0350_00.png 20240812-10:06:38 wrote non_validated_0350_08.png 20240812-10:06:38 wrote state.pth 20240812-10:06:38 --- epoch 351 ---------------------------------------- 20240812-10:06:38 current_test_accuracies 0.9471 0.9461 0.9532 0.9468 0.9472 0.9471 0.9528 0.9524 0.9494 0.9491 20240812-10:06:38 training model 1 20240812-10:06:38 training model 3 20240812-10:10:19 train_perplexity 351 model 3 1.166584780261983 20240812-10:10:25 test_perplexity 351 model 3 1.1707622396881967 20240812-10:10:26 train_perplexity 351 model 1 1.166066634888527 20240812-10:10:30 test_perplexity 351 model 1 1.1674761868770047 20240812-10:16:22 test_accuracy 351 model 3 val 1511 / 1603 20240812-10:16:24 test_accuracy 351 model 1 val 1525 / 1598 20240812-10:16:26 wrote gpt_001.pth 20240812-10:16:27 wrote gpt_003.pth 20240812-10:17:00 wrote non_validated_0351_01.png 20240812-10:17:33 wrote non_validated_0351_03.png 20240812-10:17:33 wrote state.pth 20240812-10:17:33 --- epoch 352 ---------------------------------------- 20240812-10:17:33 current_test_accuracies 0.9471 0.9543 0.9532 0.9426 0.9472 0.9471 0.9528 0.9524 0.9494 0.9491 20240812-10:17:33 training model 3 20240812-10:17:33 training model 5 20240812-10:21:15 train_perplexity 352 model 3 1.1666395049196208 20240812-10:21:15 train_perplexity 352 model 5 1.1656339432156624 20240812-10:21:23 test_perplexity 352 model 3 1.169293291662288 20240812-10:21:23 test_perplexity 352 model 5 1.1689993873230735 20240812-10:27:18 test_accuracy 352 model 5 val 1505 / 1596 20240812-10:27:20 test_accuracy 352 model 3 val 1490 / 1581 20240812-10:27:22 wrote gpt_003.pth 20240812-10:27:23 wrote gpt_005.pth 20240812-10:27:55 wrote non_validated_0352_03.png 20240812-10:28:28 wrote non_validated_0352_05.png 20240812-10:28:28 wrote state.pth 20240812-10:28:28 --- epoch 353 ---------------------------------------- 20240812-10:28:28 current_test_accuracies 0.9471 0.9543 0.9532 0.9424 0.9472 0.9430 0.9528 0.9524 0.9494 0.9491 20240812-10:28:28 training model 3 20240812-10:28:28 training model 5 20240812-10:32:11 train_perplexity 353 model 3 1.1661937176003205 20240812-10:32:11 train_perplexity 353 model 5 1.1651678418593951 20240812-10:32:18 test_perplexity 353 model 3 1.1693998098977598 20240812-10:32:18 test_perplexity 353 model 5 1.1697310217974215 20240812-10:38:14 test_accuracy 353 model 3 val 1491 / 1594 20240812-10:38:16 test_accuracy 353 model 5 val 1508 / 1581 20240812-10:38:18 wrote gpt_003.pth 20240812-10:38:19 wrote gpt_005.pth 20240812-10:38:51 wrote non_validated_0353_03.png 20240812-10:39:24 wrote non_validated_0353_05.png 20240812-10:39:24 wrote state.pth 20240812-10:39:24 --- epoch 354 ---------------------------------------- 20240812-10:39:24 current_test_accuracies 0.9471 0.9543 0.9532 0.9354 0.9472 0.9538 0.9528 0.9524 0.9494 0.9491 20240812-10:39:24 training model 3 20240812-10:39:24 training model 0 20240812-10:43:06 train_perplexity 354 model 0 1.1661012909723105 20240812-10:43:10 train_perplexity 354 model 3 1.1661533684921952 20240812-10:43:13 test_perplexity 354 model 0 1.1670427346762995 20240812-10:43:15 test_perplexity 354 model 3 1.168306225923806 20240812-10:49:07 test_accuracy 354 model 3 val 1521 / 1603 20240812-10:49:09 test_accuracy 354 model 0 val 1515 / 1595 20240812-10:49:11 wrote gpt_003.pth 20240812-10:49:12 wrote gpt_000.pth 20240812-10:49:44 wrote non_validated_0354_03.png 20240812-10:50:17 wrote non_validated_0354_00.png 20240812-10:50:17 wrote state.pth 20240812-10:50:17 --- epoch 355 ---------------------------------------- 20240812-10:50:17 current_test_accuracies 0.9498 0.9543 0.9532 0.9488 0.9472 0.9538 0.9528 0.9524 0.9494 0.9491 20240812-10:50:17 training model 4 20240812-10:50:17 training model 3 20240812-10:53:59 train_perplexity 355 model 3 1.1662633466713523 20240812-10:54:02 train_perplexity 355 model 4 1.1657111257311474 20240812-10:54:05 test_perplexity 355 model 3 1.1687764150471018 20240812-10:54:08 test_perplexity 355 model 4 1.1680118534806716 20240812-11:00:00 test_accuracy 355 model 4 val 1505 / 1605 20240812-11:00:03 test_accuracy 355 model 3 val 1496 / 1580 20240812-11:00:05 wrote gpt_004.pth 20240812-11:00:06 wrote gpt_003.pth 20240812-11:00:39 wrote non_validated_0355_04.png 20240812-11:01:11 wrote non_validated_0355_03.png 20240812-11:01:11 wrote state.pth 20240812-11:01:11 --- epoch 356 ---------------------------------------- 20240812-11:01:11 current_test_accuracies 0.9498 0.9543 0.9532 0.9468 0.9377 0.9538 0.9528 0.9524 0.9494 0.9491 20240812-11:01:11 training model 4 20240812-11:01:11 training model 3 20240812-11:04:53 train_perplexity 356 model 3 1.1658583825178035 20240812-11:04:55 train_perplexity 356 model 4 1.1658532722681416 20240812-11:05:00 test_perplexity 356 model 3 1.168465553949417 20240812-11:05:02 test_perplexity 356 model 4 1.1692325492025255 20240812-11:10:52 test_accuracy 356 model 4 val 1519 / 1619 20240812-11:10:54 test_accuracy 356 model 3 val 1524 / 1606 20240812-11:10:56 wrote gpt_004.pth 20240812-11:10:57 wrote gpt_003.pth 20240812-11:11:29 wrote non_validated_0356_04.png 20240812-11:12:02 wrote non_validated_0356_03.png 20240812-11:12:02 wrote state.pth 20240812-11:12:02 --- epoch 357 ---------------------------------------- 20240812-11:12:02 current_test_accuracies 0.9498 0.9543 0.9532 0.9489 0.9382 0.9538 0.9528 0.9524 0.9494 0.9491 20240812-11:12:02 training model 4 20240812-11:12:02 training model 3 20240812-11:15:44 train_perplexity 357 model 3 1.1654811812725552 20240812-11:15:49 train_perplexity 357 model 4 1.165306876223476 20240812-11:15:50 test_perplexity 357 model 3 1.1681030156319887 20240812-11:15:53 test_perplexity 357 model 4 1.1687670310585125 20240812-11:21:48 test_accuracy 357 model 4 val 1527 / 1597 20240812-11:21:54 test_accuracy 357 model 3 val 1486 / 1558 20240812-11:21:56 wrote gpt_004.pth 20240812-11:21:56 wrote gpt_003.pth 20240812-11:22:29 wrote non_validated_0357_04.png 20240812-11:23:02 wrote non_validated_0357_03.png 20240812-11:23:02 wrote state.pth 20240812-11:23:02 --- epoch 358 ---------------------------------------- 20240812-11:23:02 current_test_accuracies 0.9498 0.9543 0.9532 0.9538 0.9562 0.9538 0.9528 0.9524 0.9494 0.9491 20240812-11:23:02 training model 9 20240812-11:23:02 training model 8 20240812-11:26:44 train_perplexity 358 model 8 1.1652540969448042 20240812-11:26:49 train_perplexity 358 model 9 1.1652075750790736 20240812-11:26:50 test_perplexity 358 model 8 1.1707524699094511 20240812-11:26:53 test_perplexity 358 model 9 1.1673357857089985 20240812-11:32:39 test_accuracy 358 model 8 val 1536 / 1617 20240812-11:32:40 test_accuracy 358 model 9 val 1530 / 1610 20240812-11:32:42 wrote gpt_009.pth 20240812-11:32:43 wrote gpt_008.pth 20240812-11:33:16 wrote non_validated_0358_09.png 20240812-11:33:48 wrote non_validated_0358_08.png 20240812-11:33:48 wrote state.pth 20240812-11:33:48 --- epoch 359 ---------------------------------------- 20240812-11:33:48 current_test_accuracies 0.9498 0.9543 0.9532 0.9538 0.9562 0.9538 0.9528 0.9524 0.9499 0.9503 20240812-11:33:48 training model 0 20240812-11:33:48 training model 8 20240812-11:37:30 train_perplexity 359 model 8 1.1646751570110998 20240812-11:37:34 train_perplexity 359 model 0 1.165773560304828 20240812-11:37:37 test_perplexity 359 model 8 1.1699361400994794 20240812-11:37:39 test_perplexity 359 model 0 1.1704454683320447 20240812-11:43:23 test_accuracy 359 model 0 val 1536 / 1625 20240812-11:43:27 test_accuracy 359 model 8 val 1519 / 1599 20240812-11:43:29 wrote gpt_000.pth 20240812-11:43:29 wrote gpt_008.pth 20240812-11:44:03 wrote non_validated_0359_00.png 20240812-11:44:36 wrote non_validated_0359_08.png 20240812-11:44:36 wrote state.pth 20240812-11:44:36 --- epoch 360 ---------------------------------------- 20240812-11:44:36 current_test_accuracies 0.9452 0.9543 0.9532 0.9538 0.9562 0.9538 0.9528 0.9524 0.9500 0.9503 20240812-11:44:36 training model 0 20240812-11:44:36 training model 8 20240812-11:48:18 train_perplexity 360 model 8 1.1645609468568008 20240812-11:48:20 train_perplexity 360 model 0 1.1654866945664293 20240812-11:48:25 test_perplexity 360 model 8 1.1674828820357668 20240812-11:48:26 test_perplexity 360 model 0 1.1670670278626656 20240812-11:54:19 test_accuracy 360 model 0 val 1509 / 1608 20240812-11:54:20 test_accuracy 360 model 8 val 1497 / 1580 20240812-11:54:22 wrote gpt_000.pth 20240812-11:54:23 wrote gpt_008.pth 20240812-11:54:56 wrote non_validated_0360_00.png 20240812-11:55:28 wrote non_validated_0360_08.png 20240812-11:55:28 wrote state.pth 20240812-11:55:28 --- epoch 361 ---------------------------------------- 20240812-11:55:28 current_test_accuracies 0.9384 0.9543 0.9532 0.9538 0.9562 0.9538 0.9528 0.9524 0.9475 0.9503 20240812-11:55:28 training model 0 20240812-11:55:28 training model 8 20240812-11:59:10 train_perplexity 361 model 8 1.164387604916824 20240812-11:59:14 train_perplexity 361 model 0 1.165240035623683 20240812-11:59:17 test_perplexity 361 model 8 1.1705201977575295 20240812-11:59:19 test_perplexity 361 model 0 1.1692696068218178 20240812-12:05:05 test_accuracy 361 model 0 val 1518 / 1611 20240812-12:05:07 test_accuracy 361 model 8 val 1507 / 1601 20240812-12:05:09 wrote gpt_000.pth 20240812-12:05:09 wrote gpt_008.pth 20240812-12:05:42 wrote non_validated_0361_00.png 20240812-12:06:16 wrote non_validated_0361_08.png 20240812-12:06:16 wrote state.pth 20240812-12:06:16 --- epoch 362 ---------------------------------------- 20240812-12:06:16 current_test_accuracies 0.9423 0.9543 0.9532 0.9538 0.9562 0.9538 0.9528 0.9524 0.9413 0.9503 20240812-12:06:16 training model 8 20240812-12:06:16 training model 0 20240812-12:09:59 train_perplexity 362 model 0 1.1656140049287151 20240812-12:09:59 train_perplexity 362 model 8 1.164619803473654 20240812-12:10:06 test_perplexity 362 model 8 1.1680240314182107 20240812-12:10:06 test_perplexity 362 model 0 1.1694999732948268 20240812-12:15:51 test_accuracy 362 model 0 val 1538 / 1621 20240812-12:15:52 test_accuracy 362 model 8 val 1545 / 1625 20240812-12:15:54 wrote gpt_008.pth 20240812-12:15:54 wrote gpt_000.pth 20240812-12:16:27 wrote non_validated_0362_08.png 20240812-12:16:59 wrote non_validated_0362_00.png 20240812-12:17:00 wrote state.pth 20240812-12:17:00 --- epoch 363 ---------------------------------------- 20240812-12:17:00 current_test_accuracies 0.9488 0.9543 0.9532 0.9538 0.9562 0.9538 0.9528 0.9524 0.9508 0.9503 20240812-12:17:00 training model 0 20240812-12:17:00 training model 9 20240812-12:20:41 train_perplexity 363 model 9 1.165600586767862 20240812-12:20:44 train_perplexity 363 model 0 1.1654607792912655 20240812-12:20:48 test_perplexity 363 model 9 1.1688500788722915 20240812-12:20:50 test_perplexity 363 model 0 1.169039521549929 20240812-12:26:44 test_accuracy 363 model 0 val 1535 / 1617 20240812-12:26:49 test_accuracy 363 model 9 val 1502 / 1575 20240812-12:26:51 wrote gpt_000.pth 20240812-12:26:52 wrote gpt_009.pth 20240812-12:27:24 wrote non_validated_0363_00.png 20240812-12:27:57 wrote non_validated_0363_09.png 20240812-12:27:57 wrote state.pth 20240812-12:27:57 --- epoch 364 ---------------------------------------- 20240812-12:27:57 current_test_accuracies 0.9493 0.9543 0.9532 0.9538 0.9562 0.9538 0.9528 0.9524 0.9508 0.9537 20240812-12:27:57 training model 0 20240812-12:27:57 training model 8 20240812-12:31:40 train_perplexity 364 model 8 1.1636848073096107 20240812-12:31:41 train_perplexity 364 model 0 1.16472749262772 20240812-12:31:48 test_perplexity 364 model 8 1.168858022459463 20240812-12:31:48 test_perplexity 364 model 0 1.1671616240278286 20240812-12:37:41 test_accuracy 364 model 8 val 1511 / 1596 20240812-12:37:44 test_accuracy 364 model 0 val 1484 / 1579 20240812-12:37:46 wrote gpt_000.pth 20240812-12:37:47 wrote gpt_008.pth 20240812-12:38:20 wrote non_validated_0364_00.png 20240812-12:38:52 wrote non_validated_0364_08.png 20240812-12:38:52 wrote state.pth 20240812-12:38:52 --- epoch 365 ---------------------------------------- 20240812-12:38:52 current_test_accuracies 0.9398 0.9543 0.9532 0.9538 0.9562 0.9538 0.9528 0.9524 0.9467 0.9537 20240812-12:38:52 training model 0 20240812-12:38:52 training model 8 20240812-12:42:34 train_perplexity 365 model 8 1.163852109682451 20240812-12:42:36 train_perplexity 365 model 0 1.1647636960981533 20240812-12:42:41 test_perplexity 365 model 8 1.1692478566470275 20240812-12:42:43 test_perplexity 365 model 0 1.1705185259324657 20240812-12:48:36 test_accuracy 365 model 8 val 1494 / 1582 20240812-12:48:38 test_accuracy 365 model 0 val 1483 / 1575 20240812-12:48:40 wrote gpt_000.pth 20240812-12:48:40 wrote gpt_008.pth 20240812-12:49:13 wrote non_validated_0365_00.png 20240812-12:49:46 wrote non_validated_0365_08.png 20240812-12:49:46 wrote state.pth 20240812-12:49:46 --- epoch 366 ---------------------------------------- 20240812-12:49:46 current_test_accuracies 0.9416 0.9543 0.9532 0.9538 0.9562 0.9538 0.9528 0.9524 0.9444 0.9537 20240812-12:49:46 training model 0 20240812-12:49:46 training model 8 20240812-12:53:28 train_perplexity 366 model 8 1.1640621596633232 20240812-12:53:32 train_perplexity 366 model 0 1.1648315177810216 20240812-12:53:34 test_perplexity 366 model 8 1.1705603292524986 20240812-12:53:37 test_perplexity 366 model 0 1.1691980603823713 20240812-12:59:30 test_accuracy 366 model 8 val 1506 / 1595 20240812-12:59:33 test_accuracy 366 model 0 val 1493 / 1576 20240812-12:59:35 wrote gpt_000.pth 20240812-12:59:36 wrote gpt_008.pth 20240812-13:00:09 wrote non_validated_0366_00.png 20240812-13:00:41 wrote non_validated_0366_08.png 20240812-13:00:41 wrote state.pth 20240812-13:00:41 --- epoch 367 ---------------------------------------- 20240812-13:00:41 current_test_accuracies 0.9473 0.9543 0.9532 0.9538 0.9562 0.9538 0.9528 0.9524 0.9442 0.9537 20240812-13:00:41 training model 8 20240812-13:00:41 training model 0 20240812-13:04:23 train_perplexity 367 model 0 1.1642848392392928 20240812-13:04:27 train_perplexity 367 model 8 1.163333933644888 20240812-13:04:29 test_perplexity 367 model 0 1.167676154643697 20240812-13:04:32 test_perplexity 367 model 8 1.171300176086551 20240812-13:10:28 test_accuracy 367 model 8 val 1500 / 1580 20240812-13:10:29 test_accuracy 367 model 0 val 1511 / 1589 20240812-13:10:31 wrote gpt_008.pth 20240812-13:10:32 wrote gpt_000.pth 20240812-13:11:05 wrote non_validated_0367_08.png 20240812-13:11:38 wrote non_validated_0367_00.png 20240812-13:11:38 wrote state.pth 20240812-13:11:38 --- epoch 368 ---------------------------------------- 20240812-13:11:38 current_test_accuracies 0.9509 0.9543 0.9532 0.9538 0.9562 0.9538 0.9528 0.9524 0.9494 0.9537 20240812-13:11:38 training model 8 20240812-13:11:38 training model 0 20240812-13:15:20 train_perplexity 368 model 0 1.164643935480524 20240812-13:15:24 train_perplexity 368 model 8 1.163473957903388 20240812-13:15:26 test_perplexity 368 model 0 1.1698530035712948 20240812-13:15:29 test_perplexity 368 model 8 1.1707885851977802 20240812-13:21:21 test_accuracy 368 model 0 val 1523 / 1607 20240812-13:21:23 test_accuracy 368 model 8 val 1512 / 1598 20240812-13:21:25 wrote gpt_008.pth 20240812-13:21:26 wrote gpt_000.pth 20240812-13:21:58 wrote non_validated_0368_08.png 20240812-13:22:31 wrote non_validated_0368_00.png 20240812-13:22:31 wrote state.pth 20240812-13:22:31 --- epoch 369 ---------------------------------------- 20240812-13:22:31 current_test_accuracies 0.9477 0.9543 0.9532 0.9538 0.9562 0.9538 0.9528 0.9524 0.9462 0.9537 20240812-13:22:31 training model 8 20240812-13:22:31 training model 0 20240812-13:26:13 train_perplexity 369 model 0 1.1640706424627234 20240812-13:26:17 train_perplexity 369 model 8 1.163317367837322 20240812-13:26:20 test_perplexity 369 model 0 1.169559606643302 20240812-13:26:22 test_perplexity 369 model 8 1.1706624993593293 20240812-13:32:12 test_accuracy 369 model 0 val 1507 / 1601 20240812-13:32:16 test_accuracy 369 model 8 val 1504 / 1587 20240812-13:32:18 wrote gpt_008.pth 20240812-13:32:19 wrote gpt_000.pth 20240812-13:32:51 wrote non_validated_0369_08.png 20240812-13:33:24 wrote non_validated_0369_00.png 20240812-13:33:24 wrote state.pth 20240812-13:33:24 --- epoch 370 ---------------------------------------- 20240812-13:33:24 current_test_accuracies 0.9413 0.9543 0.9532 0.9538 0.9562 0.9538 0.9528 0.9524 0.9477 0.9537 20240812-13:33:24 training model 0 20240812-13:33:24 training model 8 20240812-13:37:05 train_perplexity 370 model 8 1.1634588140554185 20240812-13:37:12 test_perplexity 370 model 8 1.1696396569935332 20240812-13:37:12 train_perplexity 370 model 0 1.1636866966619668 20240812-13:37:16 test_perplexity 370 model 0 1.1696235267932382 20240812-13:43:03 test_accuracy 370 model 0 val 1532 / 1612 20240812-13:43:06 test_accuracy 370 model 8 val 1516 / 1589 20240812-13:43:08 wrote gpt_000.pth 20240812-13:43:09 wrote gpt_008.pth 20240812-13:43:42 wrote non_validated_0370_00.png 20240812-13:44:14 wrote non_validated_0370_08.png 20240812-13:44:14 wrote state.pth 20240812-13:44:14 --- epoch 371 ---------------------------------------- 20240812-13:44:14 current_test_accuracies 0.9504 0.9543 0.9532 0.9538 0.9562 0.9538 0.9528 0.9524 0.9541 0.9537 20240812-13:47:38 keep c_quizzes model 1 validated 30 / 420 (7.14%) nb_accumulated 30 / 420 (finishes Mon 14:31 -- 530/h) 20240812-13:50:40 keep c_quizzes model 8 validated 27 / 420 (6.43%) nb_accumulated 57 / 420 (finishes Mon 14:31 -- 531/h) 20240812-13:53:42 keep c_quizzes model 8 validated 25 / 420 (5.95%) nb_accumulated 82 / 420 (finishes Mon 14:32 -- 520/h) 20240812-13:56:43 keep c_quizzes model 9 validated 28 / 420 (6.67%) nb_accumulated 110 / 420 (finishes Mon 14:31 -- 528/h) 20240812-13:59:45 keep c_quizzes model 6 validated 40 / 420 (9.52%) nb_accumulated 150 / 420 (finishes Mon 14:27 -- 580/h) 20240812-14:02:46 keep c_quizzes model 3 validated 38 / 420 (9.05%) nb_accumulated 188 / 420 (finishes Mon 14:25 -- 608/h) 20240812-14:05:47 keep c_quizzes model 2 validated 42 / 420 (10.00%) nb_accumulated 230 / 420 (finishes Mon 14:23 -- 640/h) 20240812-14:08:48 keep c_quizzes model 2 validated 36 / 420 (8.57%) nb_accumulated 266 / 420 (finishes Mon 14:23 -- 649/h) 20240812-14:11:49 keep c_quizzes model 6 validated 38 / 420 (9.05%) nb_accumulated 304 / 420 (finishes Mon 14:22 -- 661/h) 20240812-14:14:51 keep c_quizzes model 7 validated 43 / 420 (10.24%) nb_accumulated 347 / 420 (finishes Mon 14:21 -- 679/h) 20240812-14:17:53 keep c_quizzes model 5 validated 27 / 420 (6.43%) nb_accumulated 374 / 420 (finishes Mon 14:22 -- 666/h) 20240812-14:20:55 keep c_quizzes model 4 validated 35 / 420 (8.33%) nb_accumulated 409 / 420 (finishes Mon 14:21 -- 669/h) 20240812-14:23:57 keep c_quizzes model 6 validated 24 / 420 (5.71%) nb_accumulated 433 / 420 (finishes now! -- 654/h) 20240812-14:24:11 wrote c_quizzes.pth 20240812-14:24:11 training model 0 20240812-14:24:11 training model 1 20240812-14:27:53 train_perplexity 371 model 0 1.1648901508724165 20240812-14:27:53 train_perplexity 371 model 1 1.1668519794284014 20240812-14:28:00 test_perplexity 371 model 0 1.1713692802890814 20240812-14:28:00 test_perplexity 371 model 1 1.1694519990913403 20240812-14:33:56 test_accuracy 371 model 1 val 1527 / 1607 20240812-14:34:00 test_accuracy 371 model 0 val 1494 / 1580 20240812-14:34:02 wrote gpt_000.pth 20240812-14:34:03 wrote gpt_001.pth 20240812-14:34:35 wrote non_validated_0371_00.png 20240812-14:35:07 wrote non_validated_0371_01.png 20240812-14:35:07 wrote state.pth 20240812-14:35:07 --- epoch 372 ---------------------------------------- 20240812-14:35:07 current_test_accuracies 0.9456 0.9502 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240812-14:35:07 training model 2 20240812-14:35:07 training model 3 20240812-14:38:49 train_perplexity 372 model 3 1.166604965011283 20240812-14:38:52 train_perplexity 372 model 2 1.1661669929851601 20240812-14:38:56 test_perplexity 372 model 3 1.1720270571540374 20240812-14:38:58 test_perplexity 372 model 2 1.1711026106051607 20240812-14:44:44 test_accuracy 372 model 2 val 1522 / 1619 20240812-14:44:46 test_accuracy 372 model 3 val 1523 / 1607 20240812-14:44:48 wrote gpt_002.pth 20240812-14:44:49 wrote gpt_003.pth 20240812-14:45:21 wrote non_validated_0372_02.png 20240812-14:45:53 wrote non_validated_0372_03.png 20240812-14:45:54 wrote state.pth 20240812-14:45:54 --- epoch 373 ---------------------------------------- 20240812-14:45:54 current_test_accuracies 0.9456 0.9502 0.9401 0.9477 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240812-14:45:54 training model 4 20240812-14:45:54 training model 5 20240812-14:49:35 train_perplexity 373 model 5 1.1663545432100846 20240812-14:49:41 test_perplexity 373 model 5 1.1683289498331164 20240812-14:49:41 train_perplexity 373 model 4 1.165666677856724 20240812-14:49:45 test_perplexity 373 model 4 1.170984154752563 20240812-14:55:36 test_accuracy 373 model 4 val 1536 / 1608 20240812-14:55:39 test_accuracy 373 model 5 val 1504 / 1589 20240812-14:55:41 wrote gpt_004.pth 20240812-14:55:42 wrote gpt_005.pth 20240812-14:56:14 wrote non_validated_0373_04.png 20240812-14:56:47 wrote non_validated_0373_05.png 20240812-14:56:47 wrote state.pth 20240812-14:56:47 --- epoch 374 ---------------------------------------- 20240812-14:56:47 current_test_accuracies 0.9456 0.9502 0.9401 0.9477 0.9552 0.9465 0.0000 0.0000 0.0000 0.0000 20240812-14:56:47 training model 6 20240812-14:56:47 training model 7 20240812-15:00:29 train_perplexity 374 model 7 1.1657282930401194 20240812-15:00:33 train_perplexity 374 model 6 1.1658858764621496 20240812-15:00:36 test_perplexity 374 model 7 1.1695977530079082 20240812-15:00:38 test_perplexity 374 model 6 1.1704637257096713 20240812-15:06:27 test_accuracy 374 model 6 val 1520 / 1607 20240812-15:06:29 test_accuracy 374 model 7 val 1496 / 1584 20240812-15:06:31 wrote gpt_006.pth 20240812-15:06:32 wrote gpt_007.pth 20240812-15:07:05 wrote non_validated_0374_06.png 20240812-15:07:37 wrote non_validated_0374_07.png 20240812-15:07:37 wrote state.pth 20240812-15:07:37 --- epoch 375 ---------------------------------------- 20240812-15:07:37 current_test_accuracies 0.9456 0.9502 0.9401 0.9477 0.9552 0.9465 0.9459 0.9444 0.0000 0.0000 20240812-15:07:37 training model 8 20240812-15:07:37 training model 9 20240812-15:11:19 train_perplexity 375 model 9 1.1663081874881511 20240812-15:11:21 train_perplexity 375 model 8 1.1637954100929218 20240812-15:11:27 test_perplexity 375 model 9 1.1700341112320236 20240812-15:11:27 test_perplexity 375 model 8 1.1709800932812402 20240812-15:17:16 test_accuracy 375 model 9 val 1532 / 1623 20240812-15:17:20 test_accuracy 375 model 8 val 1517 / 1606 20240812-15:17:22 wrote gpt_008.pth 20240812-15:17:23 wrote gpt_009.pth 20240812-15:17:55 wrote non_validated_0375_08.png 20240812-15:18:28 wrote non_validated_0375_09.png 20240812-15:18:28 wrote state.pth 20240812-15:18:28 --- epoch 376 ---------------------------------------- 20240812-15:18:28 current_test_accuracies 0.9456 0.9502 0.9401 0.9477 0.9552 0.9465 0.9459 0.9444 0.9446 0.9439 20240812-15:18:28 training model 2 20240812-15:18:28 training model 9 20240812-15:22:10 train_perplexity 376 model 9 1.1659945840841635 20240812-15:22:14 train_perplexity 376 model 2 1.165980089782032 20240812-15:22:17 test_perplexity 376 model 9 1.1703121124771447 20240812-15:22:19 test_perplexity 376 model 2 1.1698790708364974 20240812-15:28:12 test_accuracy 376 model 9 val 1498 / 1593 20240812-15:28:13 test_accuracy 376 model 2 val 1511 / 1593 20240812-15:28:15 wrote gpt_002.pth 20240812-15:28:16 wrote gpt_009.pth 20240812-15:28:49 wrote non_validated_0376_02.png 20240812-15:29:22 wrote non_validated_0376_09.png 20240812-15:29:22 wrote state.pth 20240812-15:29:22 --- epoch 377 ---------------------------------------- 20240812-15:29:22 current_test_accuracies 0.9456 0.9502 0.9485 0.9477 0.9552 0.9465 0.9459 0.9444 0.9446 0.9404 20240812-15:29:22 training model 9 20240812-15:29:22 training model 7 20240812-15:33:03 train_perplexity 377 model 7 1.1656179663054844 20240812-15:33:09 test_perplexity 377 model 7 1.1692969386929364 20240812-15:33:10 train_perplexity 377 model 9 1.1656075851460768 20240812-15:33:14 test_perplexity 377 model 9 1.1710806034040822 20240812-15:38:55 test_accuracy 377 model 7 val 1538 / 1628 20240812-15:38:59 test_accuracy 377 model 9 val 1504 / 1604 20240812-15:39:01 wrote gpt_009.pth 20240812-15:39:02 wrote gpt_007.pth 20240812-15:39:35 wrote non_validated_0377_09.png 20240812-15:40:07 wrote non_validated_0377_07.png 20240812-15:40:07 wrote state.pth 20240812-15:40:07 --- epoch 378 ---------------------------------------- 20240812-15:40:07 current_test_accuracies 0.9456 0.9502 0.9485 0.9477 0.9552 0.9465 0.9459 0.9447 0.9446 0.9377 20240812-15:40:07 training model 9 20240812-15:40:07 training model 8 20240812-15:43:49 train_perplexity 378 model 8 1.1635461235445212 20240812-15:43:54 train_perplexity 378 model 9 1.164947789115127 20240812-15:43:55 test_perplexity 378 model 8 1.1697249852468576 20240812-15:43:58 test_perplexity 378 model 9 1.1683013082931542 20240812-15:49:46 test_accuracy 378 model 8 val 1508 / 1601 20240812-15:49:54 test_accuracy 378 model 9 val 1462 / 1554 20240812-15:49:56 wrote gpt_009.pth 20240812-15:49:56 wrote gpt_008.pth 20240812-15:50:29 wrote non_validated_0378_09.png 20240812-15:51:01 wrote non_validated_0378_08.png 20240812-15:51:01 wrote state.pth 20240812-15:51:01 --- epoch 379 ---------------------------------------- 20240812-15:51:02 current_test_accuracies 0.9456 0.9502 0.9485 0.9477 0.9552 0.9465 0.9459 0.9447 0.9419 0.9408 20240812-15:51:02 training model 9 20240812-15:51:02 training model 8 20240812-15:54:44 train_perplexity 379 model 8 1.163356610046958 20240812-15:54:46 train_perplexity 379 model 9 1.1658071437641457 20240812-15:54:51 test_perplexity 379 model 8 1.172738549709067 20240812-15:54:52 test_perplexity 379 model 9 1.1711815564898314 20240812-16:00:44 test_accuracy 379 model 8 val 1507 / 1597 20240812-16:00:46 test_accuracy 379 model 9 val 1501 / 1588 20240812-16:00:48 wrote gpt_009.pth 20240812-16:00:49 wrote gpt_008.pth 20240812-16:01:22 wrote non_validated_0379_09.png 20240812-16:01:54 wrote non_validated_0379_08.png 20240812-16:01:54 wrote state.pth 20240812-16:01:54 --- epoch 380 ---------------------------------------- 20240812-16:01:54 current_test_accuracies 0.9456 0.9502 0.9485 0.9477 0.9552 0.9465 0.9459 0.9447 0.9436 0.9452 20240812-16:01:54 training model 8 20240812-16:01:54 training model 7 20240812-16:05:36 train_perplexity 380 model 7 1.1649407459956067 20240812-16:05:40 train_perplexity 380 model 8 1.1632664261307695 20240812-16:05:42 test_perplexity 380 model 7 1.17065351234427 20240812-16:05:45 test_perplexity 380 model 8 1.1713133550379335 20240812-16:11:37 test_accuracy 380 model 8 val 1497 / 1595 20240812-16:11:40 test_accuracy 380 model 7 val 1477 / 1573 20240812-16:11:41 wrote gpt_008.pth 20240812-16:11:42 wrote gpt_007.pth 20240812-16:12:15 wrote non_validated_0380_08.png 20240812-16:12:47 wrote non_validated_0380_07.png 20240812-16:12:47 wrote state.pth 20240812-16:12:47 --- epoch 381 ---------------------------------------- 20240812-16:12:47 current_test_accuracies 0.9456 0.9502 0.9485 0.9477 0.9552 0.9465 0.9459 0.9390 0.9386 0.9452 20240812-16:12:47 training model 8 20240812-16:12:47 training model 7 20240812-16:16:30 train_perplexity 381 model 7 1.1651768649576972 20240812-16:16:31 train_perplexity 381 model 8 1.1628938350154452 20240812-16:16:37 test_perplexity 381 model 7 1.170658085541201 20240812-16:16:38 test_perplexity 381 model 8 1.1718713342355633 20240812-16:22:30 test_accuracy 381 model 8 val 1518 / 1606 20240812-16:22:32 test_accuracy 381 model 7 val 1481 / 1584 20240812-16:22:34 wrote gpt_008.pth 20240812-16:22:35 wrote gpt_007.pth 20240812-16:23:08 wrote non_validated_0381_08.png 20240812-16:23:40 wrote non_validated_0381_07.png 20240812-16:23:40 wrote state.pth 20240812-16:23:40 --- epoch 382 ---------------------------------------- 20240812-16:23:40 current_test_accuracies 0.9456 0.9502 0.9485 0.9477 0.9552 0.9465 0.9459 0.9350 0.9452 0.9452 20240812-16:23:40 training model 7 20240812-16:23:40 training model 8 20240812-16:27:22 train_perplexity 382 model 8 1.1627867131155476 20240812-16:27:26 train_perplexity 382 model 7 1.1644918025510704 20240812-16:27:28 test_perplexity 382 model 8 1.1728584581963892 20240812-16:27:31 test_perplexity 382 model 7 1.172522183207477 20240812-16:33:15 test_accuracy 382 model 7 val 1523 / 1621 20240812-16:33:20 test_accuracy 382 model 8 val 1514 / 1593 20240812-16:33:21 wrote gpt_007.pth 20240812-16:33:22 wrote gpt_008.pth 20240812-16:33:55 wrote non_validated_0382_07.png 20240812-16:34:27 wrote non_validated_0382_08.png 20240812-16:34:27 wrote state.pth 20240812-16:34:27 --- epoch 383 ---------------------------------------- 20240812-16:34:27 current_test_accuracies 0.9456 0.9502 0.9485 0.9477 0.9552 0.9465 0.9459 0.9395 0.9504 0.9452 20240812-16:34:27 training model 7 20240812-16:34:27 training model 9 20240812-16:38:09 train_perplexity 383 model 9 1.1651569252057734 20240812-16:38:13 train_perplexity 383 model 7 1.1641979290940125 20240812-16:38:16 test_perplexity 383 model 9 1.1700797491937796 20240812-16:38:18 test_perplexity 383 model 7 1.1721740618973338 20240812-16:44:07 test_accuracy 383 model 7 val 1528 / 1622 20240812-16:44:08 test_accuracy 383 model 9 val 1533 / 1613 20240812-16:44:10 wrote gpt_007.pth 20240812-16:44:11 wrote gpt_009.pth 20240812-16:44:43 wrote non_validated_0383_07.png 20240812-16:45:16 wrote non_validated_0383_09.png 20240812-16:45:16 wrote state.pth 20240812-16:45:16 --- epoch 384 ---------------------------------------- 20240812-16:45:16 current_test_accuracies 0.9456 0.9502 0.9485 0.9477 0.9552 0.9465 0.9459 0.9420 0.9504 0.9504 20240812-16:45:16 training model 7 20240812-16:45:16 training model 0 20240812-16:48:57 train_perplexity 384 model 0 1.1647556189775847 20240812-16:49:03 test_perplexity 384 model 0 1.1701070383224232 20240812-16:49:04 train_perplexity 384 model 7 1.1638003866715312 20240812-16:49:08 test_perplexity 384 model 7 1.1702690169005858 20240812-16:55:02 test_accuracy 384 model 0 val 1507 / 1587 20240812-16:55:04 test_accuracy 384 model 7 val 1497 / 1586 20240812-16:55:06 wrote gpt_007.pth 20240812-16:55:07 wrote gpt_000.pth 20240812-16:55:40 wrote non_validated_0384_07.png 20240812-16:56:13 wrote non_validated_0384_00.png 20240812-16:56:13 wrote state.pth 20240812-16:56:13 --- epoch 385 ---------------------------------------- 20240812-16:56:13 current_test_accuracies 0.9496 0.9502 0.9485 0.9477 0.9552 0.9465 0.9459 0.9439 0.9504 0.9504 20240812-16:56:13 training model 7 20240812-16:56:13 training model 6 20240812-16:59:55 train_perplexity 385 model 6 1.1654709109868715 20240812-16:59:57 train_perplexity 385 model 7 1.164187159490986 20240812-17:00:02 test_perplexity 385 model 6 1.171416288871288 20240812-17:00:03 test_perplexity 385 model 7 1.1705425007601995 20240812-17:05:49 test_accuracy 385 model 6 val 1515 / 1618 20240812-17:05:57 test_accuracy 385 model 7 val 1473 / 1560 20240812-17:05:59 wrote gpt_007.pth 20240812-17:06:00 wrote gpt_006.pth 20240812-17:06:33 wrote non_validated_0385_07.png 20240812-17:07:05 wrote non_validated_0385_06.png 20240812-17:07:05 wrote state.pth 20240812-17:07:05 --- epoch 386 ---------------------------------------- 20240812-17:07:05 current_test_accuracies 0.9496 0.9502 0.9485 0.9477 0.9552 0.9465 0.9363 0.9442 0.9504 0.9504 20240812-17:07:05 training model 6 20240812-17:07:05 training model 7 20240812-17:10:47 train_perplexity 386 model 7 1.1634662347923859 20240812-17:10:52 train_perplexity 386 model 6 1.1655283868389155 20240812-17:10:53 test_perplexity 386 model 7 1.1711663524364724 20240812-17:10:56 test_perplexity 386 model 6 1.1689815941452735 20240812-17:16:42 test_accuracy 386 model 7 val 1518 / 1613 20240812-17:16:47 test_accuracy 386 model 6 val 1507 / 1589 20240812-17:16:49 wrote gpt_006.pth 20240812-17:16:50 wrote gpt_007.pth 20240812-17:17:22 wrote non_validated_0386_06.png 20240812-17:17:55 wrote non_validated_0386_07.png 20240812-17:17:55 wrote state.pth 20240812-17:17:55 --- epoch 387 ---------------------------------------- 20240812-17:17:55 current_test_accuracies 0.9496 0.9502 0.9485 0.9477 0.9552 0.9465 0.9484 0.9411 0.9504 0.9504 20240812-17:17:55 training model 7 20240812-17:17:55 training model 5 20240812-17:21:37 train_perplexity 387 model 5 1.1659647186412836 20240812-17:21:38 train_perplexity 387 model 7 1.1636549711662698 20240812-17:21:45 test_perplexity 387 model 5 1.1708758824595649 20240812-17:21:45 test_perplexity 387 model 7 1.1736777026217466 20240812-17:27:36 test_accuracy 387 model 7 val 1519 / 1611 20240812-17:27:39 test_accuracy 387 model 5 val 1511 / 1606 20240812-17:27:41 wrote gpt_007.pth 20240812-17:27:42 wrote gpt_005.pth 20240812-17:28:14 wrote non_validated_0387_07.png 20240812-17:28:46 wrote non_validated_0387_05.png 20240812-17:28:47 wrote state.pth 20240812-17:28:47 --- epoch 388 ---------------------------------------- 20240812-17:28:47 current_test_accuracies 0.9496 0.9502 0.9485 0.9477 0.9552 0.9408 0.9484 0.9429 0.9504 0.9504 20240812-17:28:47 training model 5 20240812-17:28:47 training model 7 20240812-17:32:29 train_perplexity 388 model 7 1.1632403099269029 20240812-17:32:32 train_perplexity 388 model 5 1.1655990920911166 20240812-17:32:36 test_perplexity 388 model 7 1.1722814037309177 20240812-17:32:37 test_perplexity 388 model 5 1.1703858791959258 20240812-17:38:23 test_accuracy 388 model 7 val 1520 / 1613 20240812-17:38:25 test_accuracy 388 model 5 val 1502 / 1595 20240812-17:38:27 wrote gpt_005.pth 20240812-17:38:28 wrote gpt_007.pth 20240812-17:39:01 wrote non_validated_0388_05.png 20240812-17:39:33 wrote non_validated_0388_07.png 20240812-17:39:33 wrote state.pth 20240812-17:39:33 --- epoch 389 ---------------------------------------- 20240812-17:39:33 current_test_accuracies 0.9496 0.9502 0.9485 0.9477 0.9552 0.9417 0.9484 0.9423 0.9504 0.9504 20240812-17:39:33 training model 5 20240812-17:39:33 training model 7 20240812-17:43:14 train_perplexity 389 model 7 1.163040013147877 20240812-17:43:20 test_perplexity 389 model 7 1.171292823724714 20240812-17:43:22 train_perplexity 389 model 5 1.1656382289027338 20240812-17:43:26 test_perplexity 389 model 5 1.17045913670287 20240812-17:49:11 test_accuracy 389 model 7 val 1530 / 1614 20240812-17:49:15 test_accuracy 389 model 5 val 1516 / 1593 20240812-17:49:17 wrote gpt_005.pth 20240812-17:49:17 wrote gpt_007.pth 20240812-17:49:50 wrote non_validated_0389_05.png 20240812-17:50:22 wrote non_validated_0389_07.png 20240812-17:50:22 wrote state.pth 20240812-17:50:22 --- epoch 390 ---------------------------------------- 20240812-17:50:22 current_test_accuracies 0.9496 0.9502 0.9485 0.9477 0.9552 0.9517 0.9484 0.9480 0.9504 0.9504 20240812-17:50:22 training model 3 20240812-17:50:22 training model 7 20240812-17:54:03 train_perplexity 390 model 7 1.1628770890394526 20240812-17:54:09 test_perplexity 390 model 7 1.17161573582173 20240812-17:54:10 train_perplexity 390 model 3 1.166159292991588 20240812-17:54:14 test_perplexity 390 model 3 1.1685098435343897 20240812-18:00:03 test_accuracy 390 model 7 val 1504 / 1590 20240812-18:00:06 test_accuracy 390 model 3 val 1488 / 1574 20240812-18:00:08 wrote gpt_003.pth 20240812-18:00:09 wrote gpt_007.pth 20240812-18:00:42 wrote non_validated_0390_03.png 20240812-18:01:14 wrote non_validated_0390_07.png 20240812-18:01:14 wrote state.pth 20240812-18:01:14 --- epoch 391 ---------------------------------------- 20240812-18:01:14 current_test_accuracies 0.9496 0.9502 0.9485 0.9454 0.9552 0.9517 0.9484 0.9459 0.9504 0.9504 20240812-18:01:14 training model 3 20240812-18:01:14 training model 7 20240812-18:04:57 train_perplexity 391 model 7 1.1624008160220822 20240812-18:05:00 train_perplexity 391 model 3 1.1659289653190195 20240812-18:05:04 test_perplexity 391 model 7 1.1730412038057112 20240812-18:05:06 test_perplexity 391 model 3 1.170200231102746 20240812-18:10:47 test_accuracy 391 model 7 val 1539 / 1623 20240812-18:10:53 test_accuracy 391 model 3 val 1508 / 1597 20240812-18:10:54 wrote gpt_003.pth 20240812-18:10:55 wrote gpt_007.pth 20240812-18:11:28 wrote non_validated_0391_03.png 20240812-18:12:00 wrote non_validated_0391_07.png 20240812-18:12:00 wrote state.pth 20240812-18:12:00 --- epoch 392 ---------------------------------------- 20240812-18:12:00 current_test_accuracies 0.9496 0.9502 0.9485 0.9443 0.9552 0.9517 0.9484 0.9482 0.9504 0.9504 20240812-18:12:00 training model 3 20240812-18:12:00 training model 7 20240812-18:15:41 train_perplexity 392 model 7 1.1624671415889336 20240812-18:15:47 test_perplexity 392 model 7 1.173394847133156 20240812-18:15:47 train_perplexity 392 model 3 1.166085609010986 20240812-18:15:51 test_perplexity 392 model 3 1.1703706410001973 20240812-18:21:37 test_accuracy 392 model 7 val 1514 / 1600 20240812-18:21:39 test_accuracy 392 model 3 val 1522 / 1609 20240812-18:21:41 wrote gpt_003.pth 20240812-18:21:42 wrote gpt_007.pth 20240812-18:22:14 wrote non_validated_0392_03.png 20240812-18:22:47 wrote non_validated_0392_07.png 20240812-18:22:47 wrote state.pth 20240812-18:22:47 --- epoch 393 ---------------------------------------- 20240812-18:22:47 current_test_accuracies 0.9496 0.9502 0.9485 0.9459 0.9552 0.9517 0.9484 0.9463 0.9504 0.9504 20240812-18:22:47 training model 3 20240812-18:22:47 training model 7 20240812-18:26:28 train_perplexity 393 model 7 1.1622069967467437 20240812-18:26:34 test_perplexity 393 model 7 1.1732568422011584 20240812-18:26:35 train_perplexity 393 model 3 1.1655494980333938 20240812-18:26:39 test_perplexity 393 model 3 1.170807793303752 20240812-18:32:27 test_accuracy 393 model 3 val 1519 / 1608 20240812-18:32:30 test_accuracy 393 model 7 val 1506 / 1589 20240812-18:32:32 wrote gpt_003.pth 20240812-18:32:33 wrote gpt_007.pth 20240812-18:33:05 wrote non_validated_0393_03.png 20240812-18:33:38 wrote non_validated_0393_07.png 20240812-18:33:38 wrote state.pth 20240812-18:33:38 --- epoch 394 ---------------------------------------- 20240812-18:33:38 current_test_accuracies 0.9496 0.9502 0.9485 0.9447 0.9552 0.9517 0.9484 0.9478 0.9504 0.9504 20240812-18:33:38 training model 3 20240812-18:33:38 training model 7 20240812-18:37:20 train_perplexity 394 model 7 1.161699326370911 20240812-18:37:24 train_perplexity 394 model 3 1.1656817502720669 20240812-18:37:26 test_perplexity 394 model 7 1.1741499732517187 20240812-18:37:29 test_perplexity 394 model 3 1.1698318282545488 20240812-18:43:09 test_accuracy 394 model 3 val 1555 / 1638 20240812-18:43:14 test_accuracy 394 model 7 val 1531 / 1608 20240812-18:43:16 wrote gpt_003.pth 20240812-18:43:17 wrote gpt_007.pth 20240812-18:43:49 wrote non_validated_0394_03.png 20240812-18:44:22 wrote non_validated_0394_07.png 20240812-18:44:22 wrote state.pth 20240812-18:44:22 --- epoch 395 ---------------------------------------- 20240812-18:44:22 current_test_accuracies 0.9496 0.9502 0.9485 0.9493 0.9552 0.9517 0.9484 0.9521 0.9504 0.9504 20240812-18:44:22 training model 6 20240812-18:44:22 training model 2 20240812-18:48:04 train_perplexity 395 model 2 1.165240601297226 20240812-18:48:09 train_perplexity 395 model 6 1.1652564172827682 20240812-18:48:10 test_perplexity 395 model 2 1.1691364398175488 20240812-18:48:13 test_perplexity 395 model 6 1.1704533495089136 20240812-18:54:08 test_accuracy 395 model 2 val 1503 / 1586 20240812-18:54:10 test_accuracy 395 model 6 val 1507 / 1587 20240812-18:54:12 wrote gpt_006.pth 20240812-18:54:12 wrote gpt_002.pth 20240812-18:54:45 wrote non_validated_0395_06.png 20240812-18:55:18 wrote non_validated_0395_02.png 20240812-18:55:18 wrote state.pth 20240812-18:55:18 --- epoch 396 ---------------------------------------- 20240812-18:55:18 current_test_accuracies 0.9496 0.9502 0.9477 0.9493 0.9552 0.9517 0.9496 0.9521 0.9504 0.9504 20240812-18:55:18 training model 2 20240812-18:55:18 training model 3 20240812-18:59:00 train_perplexity 396 model 3 1.165620661140157 20240812-18:59:04 train_perplexity 396 model 2 1.1652200444810399 20240812-18:59:07 test_perplexity 396 model 3 1.1707105212583782 20240812-18:59:09 test_perplexity 396 model 2 1.1692011117034682 20240812-19:04:59 test_accuracy 396 model 2 val 1524 / 1619 20240812-19:05:02 test_accuracy 396 model 3 val 1514 / 1594 20240812-19:05:04 wrote gpt_002.pth 20240812-19:05:05 wrote gpt_003.pth 20240812-19:05:37 wrote non_validated_0396_02.png 20240812-19:06:10 wrote non_validated_0396_03.png 20240812-19:06:10 wrote state.pth 20240812-19:06:10 --- epoch 397 ---------------------------------------- 20240812-19:06:10 current_test_accuracies 0.9496 0.9502 0.9413 0.9498 0.9552 0.9517 0.9496 0.9521 0.9504 0.9504 20240812-19:06:10 training model 2 20240812-19:06:10 training model 0 20240812-19:09:52 train_perplexity 397 model 0 1.1638348565726988 20240812-19:09:54 train_perplexity 397 model 2 1.1652597591653155 20240812-19:09:59 test_perplexity 397 model 0 1.1703023952834415 20240812-19:10:00 test_perplexity 397 model 2 1.1693267840059849 20240812-19:15:54 test_accuracy 397 model 0 val 1528 / 1599 20240812-19:15:56 test_accuracy 397 model 2 val 1495 / 1576 20240812-19:15:58 wrote gpt_002.pth 20240812-19:15:59 wrote gpt_000.pth 20240812-19:16:31 wrote non_validated_0397_02.png 20240812-19:17:05 wrote non_validated_0397_00.png 20240812-19:17:05 wrote state.pth 20240812-19:17:05 --- epoch 398 ---------------------------------------- 20240812-19:17:05 current_test_accuracies 0.9556 0.9502 0.9486 0.9498 0.9552 0.9517 0.9496 0.9521 0.9504 0.9504 20240812-19:17:05 training model 2 20240812-19:17:05 training model 6 20240812-19:20:47 train_perplexity 398 model 6 1.1657578381070457 20240812-19:20:50 train_perplexity 398 model 2 1.1647144407221326 20240812-19:20:54 test_perplexity 398 model 6 1.1707442164387747 20240812-19:20:56 test_perplexity 398 model 2 1.1710692807273022 20240812-19:26:52 test_accuracy 398 model 6 val 1495 / 1579 20240812-19:26:54 test_accuracy 398 model 2 val 1480 / 1561 20240812-19:26:56 wrote gpt_002.pth 20240812-19:26:57 wrote gpt_006.pth 20240812-19:27:30 wrote non_validated_0398_02.png 20240812-19:28:02 wrote non_validated_0398_06.png 20240812-19:28:02 wrote state.pth 20240812-19:28:02 --- epoch 399 ---------------------------------------- 20240812-19:28:02 current_test_accuracies 0.9556 0.9502 0.9481 0.9498 0.9552 0.9517 0.9468 0.9521 0.9504 0.9504 20240812-19:28:02 training model 6 20240812-19:28:02 training model 2 20240812-19:31:44 train_perplexity 399 model 2 1.1650737235648883 20240812-19:31:49 train_perplexity 399 model 6 1.1649135617917188 20240812-19:31:50 test_perplexity 399 model 2 1.1713806477582924 20240812-19:31:54 test_perplexity 399 model 6 1.1712442820593179 20240812-19:37:48 test_accuracy 399 model 2 val 1515 / 1599 20240812-19:37:51 test_accuracy 399 model 6 val 1489 / 1584 20240812-19:37:53 wrote gpt_006.pth 20240812-19:37:54 wrote gpt_002.pth 20240812-19:38:26 wrote non_validated_0399_06.png 20240812-19:38:59 wrote non_validated_0399_02.png 20240812-19:38:59 wrote state.pth 20240812-19:38:59 --- epoch 400 ---------------------------------------- 20240812-19:38:59 current_test_accuracies 0.9556 0.9502 0.9475 0.9498 0.9552 0.9517 0.9400 0.9521 0.9504 0.9504 20240812-19:38:59 training model 6 20240812-19:38:59 training model 2 20240812-19:42:40 train_perplexity 400 model 2 1.1645236707802313 20240812-19:42:46 test_perplexity 400 model 2 1.1716343368406599 20240812-19:42:47 train_perplexity 400 model 6 1.1646886403391756 20240812-19:42:50 test_perplexity 400 model 6 1.1697673500444248 20240812-19:48:39 test_accuracy 400 model 2 val 1524 / 1614 20240812-19:48:43 test_accuracy 400 model 6 val 1503 / 1593 20240812-19:48:45 wrote gpt_006.pth 20240812-19:48:45 wrote gpt_002.pth 20240812-19:49:18 wrote non_validated_0400_06.png 20240812-19:49:50 wrote non_validated_0400_02.png 20240812-19:49:50 wrote state.pth 20240812-19:49:50 --- epoch 401 ---------------------------------------- 20240812-19:49:50 current_test_accuracies 0.9556 0.9502 0.9442 0.9498 0.9552 0.9517 0.9435 0.9521 0.9504 0.9504 20240812-19:49:50 training model 6 20240812-19:49:50 training model 2 20240812-19:53:33 train_perplexity 401 model 2 1.1645668471708133 20240812-19:53:36 train_perplexity 401 model 6 1.164844610660558 20240812-19:53:40 test_perplexity 401 model 2 1.1704236276118156 20240812-19:53:42 test_perplexity 401 model 6 1.1720216732779047 20240812-19:59:34 test_accuracy 401 model 2 val 1518 / 1594 20240812-19:59:36 test_accuracy 401 model 6 val 1492 / 1592 20240812-19:59:38 wrote gpt_006.pth 20240812-19:59:39 wrote gpt_002.pth 20240812-20:00:11 wrote non_validated_0401_06.png 20240812-20:00:44 wrote non_validated_0401_02.png 20240812-20:00:44 wrote state.pth 20240812-20:00:44 --- epoch 402 ---------------------------------------- 20240812-20:00:44 current_test_accuracies 0.9556 0.9502 0.9523 0.9498 0.9552 0.9517 0.9372 0.9521 0.9504 0.9504 20240812-20:00:44 training model 6 20240812-20:00:44 training model 3 20240812-20:04:27 train_perplexity 402 model 3 1.165341995771671 20240812-20:04:27 train_perplexity 402 model 6 1.1645869541413363 20240812-20:04:34 test_perplexity 402 model 3 1.1724273655015394 20240812-20:04:35 test_perplexity 402 model 6 1.167921765271263 20240812-20:10:27 test_accuracy 402 model 3 val 1523 / 1617 20240812-20:10:33 test_accuracy 402 model 6 val 1493 / 1572 20240812-20:10:34 wrote gpt_006.pth 20240812-20:10:35 wrote gpt_003.pth 20240812-20:11:07 wrote non_validated_0402_06.png 20240812-20:11:41 wrote non_validated_0402_03.png 20240812-20:11:41 wrote state.pth 20240812-20:11:41 --- epoch 403 ---------------------------------------- 20240812-20:11:41 current_test_accuracies 0.9556 0.9502 0.9523 0.9419 0.9552 0.9517 0.9497 0.9521 0.9504 0.9504 20240812-20:11:41 training model 3 20240812-20:11:41 training model 6 20240812-20:15:22 train_perplexity 403 model 6 1.1640225752108995 20240812-20:15:28 test_perplexity 403 model 6 1.173835091134494 20240812-20:15:29 train_perplexity 403 model 3 1.1651469786984952 20240812-20:15:33 test_perplexity 403 model 3 1.1690646293323566 20240812-20:21:15 test_accuracy 403 model 6 val 1534 / 1620 20240812-20:21:19 test_accuracy 403 model 3 val 1516 / 1604 20240812-20:21:21 wrote gpt_003.pth 20240812-20:21:21 wrote gpt_006.pth 20240812-20:21:54 wrote non_validated_0403_03.png 20240812-20:22:27 wrote non_validated_0403_06.png 20240812-20:22:27 wrote state.pth 20240812-20:22:27 --- epoch 404 ---------------------------------------- 20240812-20:22:27 current_test_accuracies 0.9556 0.9502 0.9523 0.9451 0.9552 0.9517 0.9469 0.9521 0.9504 0.9504 20240812-20:22:27 training model 3 20240812-20:22:27 training model 6 20240812-20:26:08 train_perplexity 404 model 6 1.1639387907935284 20240812-20:26:14 test_perplexity 404 model 6 1.1702421924216022 20240812-20:26:14 train_perplexity 404 model 3 1.1653365372809832 20240812-20:26:18 test_perplexity 404 model 3 1.1693613700639212 20240812-20:32:12 test_accuracy 404 model 6 val 1508 / 1586 20240812-20:32:14 test_accuracy 404 model 3 val 1491 / 1574 20240812-20:32:16 wrote gpt_003.pth 20240812-20:32:17 wrote gpt_006.pth 20240812-20:32:50 wrote non_validated_0404_03.png 20240812-20:33:22 wrote non_validated_0404_06.png 20240812-20:33:22 wrote state.pth 20240812-20:33:22 --- epoch 405 ---------------------------------------- 20240812-20:33:22 current_test_accuracies 0.9556 0.9502 0.9523 0.9473 0.9552 0.9517 0.9508 0.9521 0.9504 0.9504 20240812-20:33:22 training model 3 20240812-20:33:22 training model 1 20240812-20:37:04 train_perplexity 405 model 1 1.1664244236867771 20240812-20:37:10 test_perplexity 405 model 1 1.1702314676847567 20240812-20:37:10 train_perplexity 405 model 3 1.164676513365755 20240812-20:37:14 test_perplexity 405 model 3 1.1715725032058053 20240812-20:43:03 test_accuracy 405 model 1 val 1524 / 1609 20240812-20:43:06 test_accuracy 405 model 3 val 1509 / 1588 20240812-20:43:08 wrote gpt_003.pth 20240812-20:43:09 wrote gpt_001.pth 20240812-20:43:41 wrote non_validated_0405_03.png 20240812-20:44:14 wrote non_validated_0405_01.png 20240812-20:44:14 wrote state.pth 20240812-20:44:14 --- epoch 406 ---------------------------------------- 20240812-20:44:14 current_test_accuracies 0.9556 0.9472 0.9523 0.9503 0.9552 0.9517 0.9508 0.9521 0.9504 0.9504 20240812-20:44:14 training model 1 20240812-20:44:14 training model 3 20240812-20:47:55 train_perplexity 406 model 3 1.1648703305784718 20240812-20:48:01 test_perplexity 406 model 3 1.1716098241923443 20240812-20:48:01 train_perplexity 406 model 1 1.1667762675733504 20240812-20:48:05 test_perplexity 406 model 1 1.1693660771767234 20240812-20:53:56 test_accuracy 406 model 1 val 1520 / 1607 20240812-20:53:57 test_accuracy 406 model 3 val 1506 / 1598 20240812-20:53:59 wrote gpt_001.pth 20240812-20:53:59 wrote gpt_003.pth 20240812-20:54:32 wrote non_validated_0406_01.png 20240812-20:55:04 wrote non_validated_0406_03.png 20240812-20:55:04 wrote state.pth 20240812-20:55:04 --- epoch 407 ---------------------------------------- 20240812-20:55:04 current_test_accuracies 0.9556 0.9459 0.9523 0.9424 0.9552 0.9517 0.9508 0.9521 0.9504 0.9504 20240812-20:55:04 training model 3 20240812-20:55:04 training model 1 20240812-20:58:46 train_perplexity 407 model 1 1.1662980856456533 20240812-20:58:50 train_perplexity 407 model 3 1.1644358378364927 20240812-20:58:52 test_perplexity 407 model 1 1.1695560956011755 20240812-20:58:55 test_perplexity 407 model 3 1.171762206842507 20240812-21:04:41 test_accuracy 407 model 3 val 1580 / 1641 20240812-21:04:50 test_accuracy 407 model 1 val 1470 / 1575 20240812-21:04:52 wrote gpt_003.pth 20240812-21:04:53 wrote gpt_001.pth 20240812-21:05:25 wrote non_validated_0407_03.png 20240812-21:05:57 wrote non_validated_0407_01.png 20240812-21:05:57 wrote state.pth 20240812-21:05:57 --- epoch 408 ---------------------------------------- 20240812-21:05:57 current_test_accuracies 0.9556 0.9333 0.9523 0.9628 0.9552 0.9517 0.9508 0.9521 0.9504 0.9504 20240812-21:05:57 training model 1 20240812-21:05:57 training model 9 20240812-21:09:40 train_perplexity 408 model 9 1.1650500563472825 20240812-21:09:42 train_perplexity 408 model 1 1.165894307700589 20240812-21:09:47 test_perplexity 408 model 9 1.1714290181186087 20240812-21:09:48 test_perplexity 408 model 1 1.1718453432837899 20240812-21:15:39 test_accuracy 408 model 1 val 1543 / 1615 20240812-21:15:43 test_accuracy 408 model 9 val 1512 / 1589 20240812-21:15:45 wrote gpt_001.pth 20240812-21:15:46 wrote gpt_009.pth 20240812-21:16:19 wrote non_validated_0408_01.png 20240812-21:16:51 wrote non_validated_0408_09.png 20240812-21:16:51 wrote state.pth 20240812-21:16:51 --- epoch 409 ---------------------------------------- 20240812-21:16:51 current_test_accuracies 0.9556 0.9554 0.9523 0.9628 0.9552 0.9517 0.9508 0.9521 0.9504 0.9515 20240812-21:20:15 keep c_quizzes model 7 validated 38 / 420 (9.05%) nb_accumulated 38 / 420 (finishes Mon 21:54 -- 672/h) 20240812-21:23:17 keep c_quizzes model 0 validated 36 / 420 (8.57%) nb_accumulated 74 / 420 (finishes Mon 21:53 -- 690/h) 20240812-21:26:19 keep c_quizzes model 9 validated 29 / 420 (6.90%) nb_accumulated 103 / 420 (finishes Mon 21:55 -- 652/h) 20240812-21:29:22 keep c_quizzes model 5 validated 36 / 420 (8.57%) nb_accumulated 139 / 420 (finishes Mon 21:54 -- 666/h) 20240812-21:32:24 keep c_quizzes model 5 validated 37 / 420 (8.81%) nb_accumulated 176 / 420 (finishes Mon 21:53 -- 679/h) 20240812-21:35:26 keep c_quizzes model 9 validated 27 / 420 (6.43%) nb_accumulated 203 / 420 (finishes Mon 21:55 -- 655/h) 20240812-21:38:27 keep c_quizzes model 4 validated 31 / 420 (7.38%) nb_accumulated 234 / 420 (finishes Mon 21:55 -- 650/h) 20240812-21:41:29 keep c_quizzes model 5 validated 19 / 420 (4.52%) nb_accumulated 253 / 420 (finishes Mon 21:57 -- 616/h) 20240812-21:44:31 keep c_quizzes model 9 validated 27 / 420 (6.43%) nb_accumulated 280 / 420 (finishes Mon 21:58 -- 607/h) 20240812-21:47:33 keep c_quizzes model 9 validated 31 / 420 (7.38%) nb_accumulated 311 / 420 (finishes Mon 21:58 -- 608/h) 20240812-21:50:35 keep c_quizzes model 4 validated 29 / 420 (6.90%) nb_accumulated 340 / 420 (finishes Mon 21:58 -- 605/h) 20240812-21:53:36 keep c_quizzes model 1 validated 30 / 420 (7.14%) nb_accumulated 370 / 420 (finishes Mon 21:58 -- 604/h) 20240812-21:56:38 keep c_quizzes model 4 validated 32 / 420 (7.62%) nb_accumulated 402 / 420 (finishes Mon 21:58 -- 606/h) 20240812-21:59:39 keep c_quizzes model 1 validated 24 / 420 (5.71%) nb_accumulated 426 / 420 (finishes now! -- 597/h) 20240812-21:59:52 wrote c_quizzes.pth 20240812-21:59:52 training model 0 20240812-21:59:52 training model 1 20240812-22:03:34 train_perplexity 409 model 0 1.1648855249713055 20240812-22:03:34 train_perplexity 409 model 1 1.1669531929768975 20240812-22:03:42 test_perplexity 409 model 0 1.173148110879871 20240812-22:03:42 test_perplexity 409 model 1 1.1698084783030729 20240812-22:09:23 test_accuracy 409 model 0 val 1541 / 1651 20240812-22:09:30 test_accuracy 409 model 1 val 1498 / 1600 20240812-22:09:32 wrote gpt_000.pth 20240812-22:09:33 wrote gpt_001.pth 20240812-22:10:05 wrote non_validated_0409_00.png 20240812-22:10:38 wrote non_validated_0409_01.png 20240812-22:10:38 wrote state.pth 20240812-22:10:38 --- epoch 410 ---------------------------------------- 20240812-22:10:38 current_test_accuracies 0.9334 0.9362 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240812-22:10:38 training model 2 20240812-22:10:38 training model 3 20240812-22:14:20 train_perplexity 410 model 3 1.1652953406476811 20240812-22:14:22 train_perplexity 410 model 2 1.1655999462826 20240812-22:14:27 test_perplexity 410 model 3 1.1719635088210678 20240812-22:14:28 test_perplexity 410 model 2 1.1705643626676743 20240812-22:20:26 test_accuracy 410 model 3 val 1523 / 1610 20240812-22:20:27 test_accuracy 410 model 2 val 1503 / 1601 20240812-22:20:29 wrote gpt_002.pth 20240812-22:20:29 wrote gpt_003.pth 20240812-22:21:03 wrote non_validated_0410_02.png 20240812-22:21:36 wrote non_validated_0410_03.png 20240812-22:21:36 wrote state.pth 20240812-22:21:36 --- epoch 411 ---------------------------------------- 20240812-22:21:36 current_test_accuracies 0.9334 0.9362 0.9388 0.9460 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240812-22:21:36 training model 4 20240812-22:21:36 training model 5 20240812-22:25:18 train_perplexity 411 model 5 1.1659587594048222 20240812-22:25:24 test_perplexity 411 model 5 1.172937906527525 20240812-22:25:24 train_perplexity 411 model 4 1.1667399683788682 20240812-22:25:28 test_perplexity 411 model 4 1.1707547683657225 20240812-22:31:20 test_accuracy 411 model 5 val 1508 / 1593 20240812-22:31:20 test_accuracy 411 model 4 val 1528 / 1608 20240812-22:31:23 wrote gpt_004.pth 20240812-22:31:23 wrote gpt_005.pth 20240812-22:31:56 wrote non_validated_0411_04.png 20240812-22:32:28 wrote non_validated_0411_05.png 20240812-22:32:29 wrote state.pth 20240812-22:32:29 --- epoch 412 ---------------------------------------- 20240812-22:32:29 current_test_accuracies 0.9334 0.9362 0.9388 0.9460 0.9502 0.9466 0.0000 0.0000 0.0000 0.0000 20240812-22:32:29 training model 6 20240812-22:32:29 training model 7 20240812-22:36:11 train_perplexity 412 model 7 1.16365491993255 20240812-22:36:14 train_perplexity 412 model 6 1.1652766630572435 20240812-22:36:18 test_perplexity 412 model 7 1.1735563236302315 20240812-22:36:20 test_perplexity 412 model 6 1.1698321895297508 20240812-22:42:16 test_accuracy 412 model 7 val 1478 / 1563 20240812-22:42:18 test_accuracy 412 model 6 val 1487 / 1565 20240812-22:42:19 wrote gpt_006.pth 20240812-22:42:20 wrote gpt_007.pth 20240812-22:42:53 wrote non_validated_0412_06.png 20240812-22:43:26 wrote non_validated_0412_07.png 20240812-22:43:26 wrote state.pth 20240812-22:43:26 --- epoch 413 ---------------------------------------- 20240812-22:43:26 current_test_accuracies 0.9334 0.9362 0.9388 0.9460 0.9502 0.9466 0.9502 0.9456 0.0000 0.0000 20240812-22:43:26 training model 8 20240812-22:43:26 training model 9 20240812-22:47:08 train_perplexity 413 model 9 1.1657560122382333 20240812-22:47:12 train_perplexity 413 model 8 1.1639246479100498 20240812-22:47:14 test_perplexity 413 model 9 1.174122784256101 20240812-22:47:17 test_perplexity 413 model 8 1.1722024730097418 20240812-22:53:08 test_accuracy 413 model 9 val 1513 / 1602 20240812-22:53:13 test_accuracy 413 model 8 val 1486 / 1577 20240812-22:53:15 wrote gpt_008.pth 20240812-22:53:16 wrote gpt_009.pth 20240812-22:53:49 wrote non_validated_0413_08.png 20240812-22:54:22 wrote non_validated_0413_09.png 20240812-22:54:22 wrote state.pth 20240812-22:54:22 --- epoch 414 ---------------------------------------- 20240812-22:54:22 current_test_accuracies 0.9334 0.9362 0.9388 0.9460 0.9502 0.9466 0.9502 0.9456 0.9423 0.9444 20240812-22:54:22 training model 0 20240812-22:54:22 training model 1 20240812-22:58:04 train_perplexity 414 model 1 1.1667319296237784 20240812-22:58:08 train_perplexity 414 model 0 1.1644072891581068 20240812-22:58:11 test_perplexity 414 model 1 1.1693675994616248 20240812-22:58:13 test_perplexity 414 model 0 1.172612548516208 20240812-23:04:02 test_accuracy 414 model 1 val 1530 / 1622 20240812-23:04:05 test_accuracy 414 model 0 val 1511 / 1600 20240812-23:04:07 wrote gpt_000.pth 20240812-23:04:08 wrote gpt_001.pth 20240812-23:04:40 wrote non_validated_0414_00.png 20240812-23:05:13 wrote non_validated_0414_01.png 20240812-23:05:13 wrote state.pth 20240812-23:05:13 --- epoch 415 ---------------------------------------- 20240812-23:05:13 current_test_accuracies 0.9444 0.9433 0.9388 0.9460 0.9502 0.9466 0.9502 0.9456 0.9423 0.9444 20240812-23:05:13 training model 2 20240812-23:05:13 training model 8 20240812-23:08:55 train_perplexity 415 model 8 1.1631038374047817 20240812-23:08:59 train_perplexity 415 model 2 1.165197308908147 20240812-23:09:01 test_perplexity 415 model 8 1.1754331287937656 20240812-23:09:04 test_perplexity 415 model 2 1.170940046632659 20240812-23:14:58 test_accuracy 415 model 8 val 1494 / 1585 20240812-23:15:02 test_accuracy 415 model 2 val 1470 / 1558 20240812-23:15:04 wrote gpt_002.pth 20240812-23:15:05 wrote gpt_008.pth 20240812-23:15:37 wrote non_validated_0415_02.png 20240812-23:16:10 wrote non_validated_0415_08.png 20240812-23:16:10 wrote state.pth 20240812-23:16:10 --- epoch 416 ---------------------------------------- 20240812-23:16:10 current_test_accuracies 0.9444 0.9433 0.9435 0.9460 0.9502 0.9466 0.9502 0.9456 0.9426 0.9444 20240812-23:16:10 training model 8 20240812-23:16:10 training model 1 20240812-23:19:52 train_perplexity 416 model 1 1.1665908038392783 20240812-23:19:56 train_perplexity 416 model 8 1.1627484730774025 20240812-23:19:59 test_perplexity 416 model 1 1.1731684935611648 20240812-23:20:01 test_perplexity 416 model 8 1.1742718591865369 20240812-23:25:50 test_accuracy 416 model 1 val 1553 / 1624 20240812-23:25:56 test_accuracy 416 model 8 val 1516 / 1599 20240812-23:25:57 wrote gpt_008.pth 20240812-23:25:58 wrote gpt_001.pth 20240812-23:26:31 wrote non_validated_0416_08.png 20240812-23:27:04 wrote non_validated_0416_01.png 20240812-23:27:04 wrote state.pth 20240812-23:27:04 --- epoch 417 ---------------------------------------- 20240812-23:27:04 current_test_accuracies 0.9444 0.9563 0.9435 0.9460 0.9502 0.9466 0.9502 0.9456 0.9481 0.9444 20240812-23:27:04 training model 2 20240812-23:27:04 training model 0 20240812-23:30:46 train_perplexity 417 model 0 1.164276018399265 20240812-23:30:49 train_perplexity 417 model 2 1.1644945180667687 20240812-23:30:53 test_perplexity 417 model 0 1.1732889587084205 20240812-23:30:55 test_perplexity 417 model 2 1.1733506941045984 20240812-23:36:40 test_accuracy 417 model 0 val 1534 / 1624 20240812-23:36:43 test_accuracy 417 model 2 val 1521 / 1601 20240812-23:36:45 wrote gpt_002.pth 20240812-23:36:46 wrote gpt_000.pth 20240812-23:37:18 wrote non_validated_0417_02.png 20240812-23:37:51 wrote non_validated_0417_00.png 20240812-23:37:51 wrote state.pth 20240812-23:37:51 --- epoch 418 ---------------------------------------- 20240812-23:37:51 current_test_accuracies 0.9446 0.9563 0.9500 0.9460 0.9502 0.9466 0.9502 0.9456 0.9481 0.9444 20240812-23:37:51 training model 9 20240812-23:37:51 training model 0 20240812-23:41:33 train_perplexity 418 model 0 1.164604612009918 20240812-23:41:37 train_perplexity 418 model 9 1.1654270228403052 20240812-23:41:40 test_perplexity 418 model 0 1.1741339956136985 20240812-23:41:42 test_perplexity 418 model 9 1.172862458459278 20240812-23:47:29 test_accuracy 418 model 0 val 1501 / 1600 20240812-23:47:33 test_accuracy 418 model 9 val 1504 / 1583 20240812-23:47:35 wrote gpt_009.pth 20240812-23:47:36 wrote gpt_000.pth 20240812-23:48:08 wrote non_validated_0418_09.png 20240812-23:48:40 wrote non_validated_0418_00.png 20240812-23:48:40 wrote state.pth 20240812-23:48:40 --- epoch 419 ---------------------------------------- 20240812-23:48:40 current_test_accuracies 0.9381 0.9563 0.9500 0.9460 0.9502 0.9466 0.9502 0.9456 0.9481 0.9501 20240812-23:48:40 training model 0 20240812-23:48:40 training model 7 20240812-23:52:22 train_perplexity 419 model 7 1.1621148549049338 20240812-23:52:28 train_perplexity 419 model 0 1.164180534288466 20240812-23:52:28 test_perplexity 419 model 7 1.1745894167094701 20240812-23:52:32 test_perplexity 419 model 0 1.1731790610010144 20240812-23:58:18 test_accuracy 419 model 7 val 1528 / 1616 20240812-23:58:19 test_accuracy 419 model 0 val 1522 / 1619 20240812-23:58:21 wrote gpt_000.pth 20240812-23:58:21 wrote gpt_007.pth 20240812-23:58:54 wrote non_validated_0419_00.png 20240812-23:59:27 wrote non_validated_0419_07.png 20240812-23:59:27 wrote state.pth 20240812-23:59:27 --- epoch 420 ---------------------------------------- 20240812-23:59:27 current_test_accuracies 0.9401 0.9563 0.9500 0.9460 0.9502 0.9466 0.9502 0.9455 0.9481 0.9501 20240812-23:59:27 training model 0 20240812-23:59:27 training model 7 20240813-00:03:08 train_perplexity 420 model 7 1.1625506216706625 20240813-00:03:14 train_perplexity 420 model 0 1.1636903361221276 20240813-00:03:14 test_perplexity 420 model 7 1.1728161653882019 20240813-00:03:18 test_perplexity 420 model 0 1.1718893984110117 20240813-00:09:11 test_accuracy 420 model 7 val 1503 / 1593 20240813-00:09:14 test_accuracy 420 model 0 val 1488 / 1573 20240813-00:09:16 wrote gpt_000.pth 20240813-00:09:17 wrote gpt_007.pth 20240813-00:09:50 wrote non_validated_0420_00.png 20240813-00:10:22 wrote non_validated_0420_07.png 20240813-00:10:22 wrote state.pth 20240813-00:10:22 --- epoch 421 ---------------------------------------- 20240813-00:10:22 current_test_accuracies 0.9460 0.9563 0.9500 0.9460 0.9502 0.9466 0.9502 0.9435 0.9481 0.9501 20240813-00:10:22 training model 7 20240813-00:10:22 training model 3 20240813-00:14:05 train_perplexity 421 model 3 1.1651256535195018 20240813-00:14:08 train_perplexity 421 model 7 1.1622308146170313 20240813-00:14:12 test_perplexity 421 model 3 1.1734197216992093 20240813-00:14:13 test_perplexity 421 model 7 1.1749999444608312 20240813-00:20:06 test_accuracy 421 model 3 val 1533 / 1608 20240813-00:20:07 test_accuracy 421 model 7 val 1505 / 1603 20240813-00:20:09 wrote gpt_007.pth 20240813-00:20:10 wrote gpt_003.pth 20240813-00:20:43 wrote non_validated_0421_07.png 20240813-00:21:15 wrote non_validated_0421_03.png 20240813-00:21:15 wrote state.pth 20240813-00:21:15 --- epoch 422 ---------------------------------------- 20240813-00:21:15 current_test_accuracies 0.9460 0.9563 0.9500 0.9534 0.9502 0.9466 0.9502 0.9389 0.9481 0.9501 20240813-00:21:15 training model 7 20240813-00:21:15 training model 0 20240813-00:24:57 train_perplexity 422 model 0 1.1637348795216529 20240813-00:25:03 test_perplexity 422 model 0 1.171843192858153 20240813-00:25:03 train_perplexity 422 model 7 1.1619032518082002 20240813-00:25:07 test_perplexity 422 model 7 1.1738154463699495 20240813-00:30:58 test_accuracy 422 model 7 val 1518 / 1613 20240813-00:31:01 test_accuracy 422 model 0 val 1499 / 1583 20240813-00:31:03 wrote gpt_007.pth 20240813-00:31:03 wrote gpt_000.pth 20240813-00:31:36 wrote non_validated_0422_07.png 20240813-00:32:09 wrote non_validated_0422_00.png 20240813-00:32:09 wrote state.pth 20240813-00:32:09 --- epoch 423 ---------------------------------------- 20240813-00:32:09 current_test_accuracies 0.9469 0.9563 0.9500 0.9534 0.9502 0.9466 0.9502 0.9411 0.9481 0.9501 20240813-00:32:09 training model 7 20240813-00:32:09 training model 5 20240813-00:35:52 train_perplexity 423 model 5 1.166147096092951 20240813-00:35:54 train_perplexity 423 model 7 1.1620238391603808 20240813-00:35:59 test_perplexity 423 model 5 1.1696382570108241 20240813-00:36:01 test_perplexity 423 model 7 1.1731478257167032 20240813-00:41:52 test_accuracy 423 model 5 val 1508 / 1604 20240813-00:41:54 test_accuracy 423 model 7 val 1504 / 1598 20240813-00:41:56 wrote gpt_007.pth 20240813-00:41:57 wrote gpt_005.pth 20240813-00:42:30 wrote non_validated_0423_07.png 20240813-00:43:02 wrote non_validated_0423_05.png 20240813-00:43:02 wrote state.pth 20240813-00:43:02 --- epoch 424 ---------------------------------------- 20240813-00:43:02 current_test_accuracies 0.9469 0.9563 0.9500 0.9534 0.9502 0.9401 0.9502 0.9412 0.9481 0.9501 20240813-00:43:02 training model 5 20240813-00:43:02 training model 7 20240813-00:46:44 train_perplexity 424 model 7 1.1615507280511446 20240813-00:46:50 train_perplexity 424 model 5 1.1656924388383272 20240813-00:46:50 test_perplexity 424 model 7 1.172842538547996 20240813-00:46:54 test_perplexity 424 model 5 1.1709366367906233 20240813-00:52:49 test_accuracy 424 model 7 val 1505 / 1586 20240813-00:52:51 test_accuracy 424 model 5 val 1496 / 1577 20240813-00:52:53 wrote gpt_005.pth 20240813-00:52:54 wrote gpt_007.pth 20240813-00:53:26 wrote non_validated_0424_05.png 20240813-00:53:59 wrote non_validated_0424_07.png 20240813-00:53:59 wrote state.pth 20240813-00:53:59 --- epoch 425 ---------------------------------------- 20240813-00:53:59 current_test_accuracies 0.9469 0.9563 0.9500 0.9534 0.9502 0.9486 0.9502 0.9489 0.9481 0.9501 20240813-00:53:59 training model 0 20240813-00:53:59 training model 8 20240813-00:57:41 train_perplexity 425 model 8 1.1625510436243391 20240813-00:57:46 train_perplexity 425 model 0 1.1637978389048425 20240813-00:57:47 test_perplexity 425 model 8 1.1728707015212145 20240813-00:57:50 test_perplexity 425 model 0 1.1735677986680892 20240813-01:03:44 test_accuracy 425 model 8 val 1494 / 1586 20240813-01:03:46 test_accuracy 425 model 0 val 1508 / 1579 20240813-01:03:48 wrote gpt_000.pth 20240813-01:03:48 wrote gpt_008.pth 20240813-01:04:22 wrote non_validated_0425_00.png 20240813-01:04:54 wrote non_validated_0425_08.png 20240813-01:04:54 wrote state.pth 20240813-01:04:54 --- epoch 426 ---------------------------------------- 20240813-01:04:54 current_test_accuracies 0.9550 0.9563 0.9500 0.9534 0.9502 0.9486 0.9502 0.9489 0.9420 0.9501 20240813-01:04:54 training model 8 20240813-01:04:54 training model 5 20240813-01:08:36 train_perplexity 426 model 5 1.1655296172669944 20240813-01:08:41 train_perplexity 426 model 8 1.1632439867859332 20240813-01:08:42 test_perplexity 426 model 5 1.170549683580054 20240813-01:08:45 test_perplexity 426 model 8 1.1742754436602287 20240813-01:14:36 test_accuracy 426 model 8 val 1528 / 1612 20240813-01:14:39 test_accuracy 426 model 5 val 1499 / 1597 20240813-01:14:41 wrote gpt_008.pth 20240813-01:14:42 wrote gpt_005.pth 20240813-01:15:14 wrote non_validated_0426_08.png 20240813-01:15:47 wrote non_validated_0426_05.png 20240813-01:15:47 wrote state.pth 20240813-01:15:47 --- epoch 427 ---------------------------------------- 20240813-01:15:47 current_test_accuracies 0.9550 0.9563 0.9500 0.9534 0.9502 0.9386 0.9502 0.9489 0.9479 0.9501 20240813-01:15:47 training model 5 20240813-01:15:47 training model 8 20240813-01:19:30 train_perplexity 427 model 8 1.1623765304768665 20240813-01:19:31 train_perplexity 427 model 5 1.1655562110502182 20240813-01:19:37 test_perplexity 427 model 8 1.1729729828323783 20240813-01:19:38 test_perplexity 427 model 5 1.1715329635046128 20240813-01:25:28 test_accuracy 427 model 5 val 1486 / 1600 20240813-01:25:30 test_accuracy 427 model 8 val 1496 / 1593 20240813-01:25:32 wrote gpt_005.pth 20240813-01:25:32 wrote gpt_008.pth 20240813-01:26:05 wrote non_validated_0427_05.png 20240813-01:26:38 wrote non_validated_0427_08.png 20240813-01:26:38 wrote state.pth 20240813-01:26:38 --- epoch 428 ---------------------------------------- 20240813-01:26:38 current_test_accuracies 0.9550 0.9563 0.9500 0.9534 0.9502 0.9287 0.9502 0.9489 0.9391 0.9501 20240813-01:26:38 training model 5 20240813-01:26:38 training model 8 20240813-01:30:21 train_perplexity 428 model 8 1.162308699693955 20240813-01:30:24 train_perplexity 428 model 5 1.164813896008241 20240813-01:30:27 test_perplexity 428 model 8 1.1743187961418462 20240813-01:30:29 test_perplexity 428 model 5 1.1708815234389836 20240813-01:36:21 test_accuracy 428 model 8 val 1510 / 1599 20240813-01:36:24 test_accuracy 428 model 5 val 1497 / 1588 20240813-01:36:26 wrote gpt_005.pth 20240813-01:36:27 wrote gpt_008.pth 20240813-01:37:00 wrote non_validated_0428_05.png 20240813-01:37:33 wrote non_validated_0428_08.png 20240813-01:37:33 wrote state.pth 20240813-01:37:33 --- epoch 429 ---------------------------------------- 20240813-01:37:33 current_test_accuracies 0.9550 0.9563 0.9500 0.9534 0.9502 0.9427 0.9502 0.9489 0.9443 0.9501 20240813-01:37:33 training model 5 20240813-01:37:33 training model 8 20240813-01:41:15 train_perplexity 429 model 8 1.1619340525556898 20240813-01:41:17 train_perplexity 429 model 5 1.1650818291500078 20240813-01:41:22 test_perplexity 429 model 8 1.1741583078139275 20240813-01:41:23 test_perplexity 429 model 5 1.1723456565002062 20240813-01:47:07 test_accuracy 429 model 8 val 1541 / 1634 20240813-01:47:12 test_accuracy 429 model 5 val 1500 / 1590 20240813-01:47:14 wrote gpt_005.pth 20240813-01:47:15 wrote gpt_008.pth 20240813-01:47:48 wrote non_validated_0429_05.png 20240813-01:48:20 wrote non_validated_0429_08.png 20240813-01:48:21 wrote state.pth 20240813-01:48:21 --- epoch 430 ---------------------------------------- 20240813-01:48:21 current_test_accuracies 0.9550 0.9563 0.9500 0.9534 0.9502 0.9434 0.9502 0.9489 0.9431 0.9501 20240813-01:48:21 training model 8 20240813-01:48:21 training model 5 20240813-01:52:02 train_perplexity 430 model 5 1.1647637587870552 20240813-01:52:06 train_perplexity 430 model 8 1.161453414768246 20240813-01:52:09 test_perplexity 430 model 5 1.1717678747909217 20240813-01:52:11 test_perplexity 430 model 8 1.1742018138812143 20240813-01:57:59 test_accuracy 430 model 8 val 1531 / 1613 20240813-01:58:01 test_accuracy 430 model 5 val 1535 / 1604 20240813-01:58:03 wrote gpt_008.pth 20240813-01:58:03 wrote gpt_005.pth 20240813-01:58:36 wrote non_validated_0430_08.png 20240813-01:59:08 wrote non_validated_0430_05.png 20240813-01:59:08 wrote state.pth 20240813-01:59:08 --- epoch 431 ---------------------------------------- 20240813-01:59:08 current_test_accuracies 0.9550 0.9563 0.9500 0.9534 0.9502 0.9570 0.9502 0.9489 0.9492 0.9501 20240813-01:59:08 training model 7 20240813-01:59:08 training model 8 20240813-02:02:51 train_perplexity 431 model 8 1.1612634744872314 20240813-02:02:55 train_perplexity 431 model 7 1.1610380079764442 20240813-02:02:57 test_perplexity 431 model 8 1.177103937050218 20240813-02:03:00 test_perplexity 431 model 7 1.17578639096412 20240813-02:08:39 test_accuracy 431 model 8 val 1539 / 1640 20240813-02:08:45 test_accuracy 431 model 7 val 1526 / 1609 20240813-02:08:47 wrote gpt_007.pth 20240813-02:08:48 wrote gpt_008.pth 20240813-02:09:21 wrote non_validated_0431_07.png 20240813-02:09:54 wrote non_validated_0431_08.png 20240813-02:09:54 wrote state.pth 20240813-02:09:54 --- epoch 432 ---------------------------------------- 20240813-02:09:54 current_test_accuracies 0.9550 0.9563 0.9500 0.9534 0.9502 0.9570 0.9502 0.9484 0.9384 0.9501 20240813-02:09:54 training model 8 20240813-02:09:54 training model 7 20240813-02:13:36 train_perplexity 432 model 7 1.1616677993696858 20240813-02:13:39 train_perplexity 432 model 8 1.1608784300837467 20240813-02:13:43 test_perplexity 432 model 7 1.1768131638708657 20240813-02:13:45 test_perplexity 432 model 8 1.1742842865607481 20240813-02:19:33 test_accuracy 432 model 8 val 1510 / 1602 20240813-02:19:36 test_accuracy 432 model 7 val 1489 / 1580 20240813-02:19:38 wrote gpt_008.pth 20240813-02:19:38 wrote gpt_007.pth 20240813-02:20:12 wrote non_validated_0432_08.png 20240813-02:20:45 wrote non_validated_0432_07.png 20240813-02:20:45 wrote state.pth 20240813-02:20:45 --- epoch 433 ---------------------------------------- 20240813-02:20:45 current_test_accuracies 0.9550 0.9563 0.9500 0.9534 0.9502 0.9570 0.9502 0.9424 0.9426 0.9501 20240813-02:20:45 training model 7 20240813-02:20:45 training model 8 20240813-02:24:26 train_perplexity 433 model 8 1.1611282744029934 20240813-02:24:32 train_perplexity 433 model 7 1.160886721972995 20240813-02:24:32 test_perplexity 433 model 8 1.1735460857046907 20240813-02:24:36 test_perplexity 433 model 7 1.1763256204905588 20240813-02:30:28 test_accuracy 433 model 8 val 1497 / 1596 20240813-02:30:29 test_accuracy 433 model 7 val 1501 / 1595 20240813-02:30:31 wrote gpt_007.pth 20240813-02:30:32 wrote gpt_008.pth 20240813-02:31:05 wrote non_validated_0433_07.png 20240813-02:31:37 wrote non_validated_0433_08.png 20240813-02:31:37 wrote state.pth 20240813-02:31:37 --- epoch 434 ---------------------------------------- 20240813-02:31:37 current_test_accuracies 0.9550 0.9563 0.9500 0.9534 0.9502 0.9570 0.9502 0.9411 0.9380 0.9501 20240813-02:31:37 training model 8 20240813-02:31:37 training model 7 20240813-02:35:20 train_perplexity 434 model 7 1.160713910679242 20240813-02:35:21 train_perplexity 434 model 8 1.1606784764652458 20240813-02:35:27 test_perplexity 434 model 7 1.1769164933347445 20240813-02:35:28 test_perplexity 434 model 8 1.1760310773394727 20240813-02:41:15 test_accuracy 434 model 7 val 1511 / 1605 20240813-02:41:19 test_accuracy 434 model 8 val 1502 / 1589 20240813-02:41:21 wrote gpt_008.pth 20240813-02:41:21 wrote gpt_007.pth 20240813-02:41:54 wrote non_validated_0434_08.png 20240813-02:42:26 wrote non_validated_0434_07.png 20240813-02:42:26 wrote state.pth 20240813-02:42:26 --- epoch 435 ---------------------------------------- 20240813-02:42:26 current_test_accuracies 0.9550 0.9563 0.9500 0.9534 0.9502 0.9570 0.9502 0.9414 0.9452 0.9501 20240813-02:42:26 training model 7 20240813-02:42:26 training model 8 20240813-02:46:08 train_perplexity 435 model 8 1.1604705252942171 20240813-02:46:13 train_perplexity 435 model 7 1.1610417915517992 20240813-02:46:15 test_perplexity 435 model 8 1.176937261114339 20240813-02:46:18 test_perplexity 435 model 7 1.177973859913448 20240813-02:52:03 test_accuracy 435 model 7 val 1559 / 1639 20240813-02:52:06 test_accuracy 435 model 8 val 1508 / 1600 20240813-02:52:08 wrote gpt_007.pth 20240813-02:52:09 wrote gpt_008.pth 20240813-02:52:42 wrote non_validated_0435_07.png 20240813-02:53:14 wrote non_validated_0435_08.png 20240813-02:53:14 wrote state.pth 20240813-02:53:14 --- epoch 436 ---------------------------------------- 20240813-02:53:14 current_test_accuracies 0.9550 0.9563 0.9500 0.9534 0.9502 0.9570 0.9502 0.9512 0.9425 0.9501 20240813-02:53:14 training model 8 20240813-02:53:14 training model 2 20240813-02:56:57 train_perplexity 436 model 2 1.164738354301881 20240813-02:56:59 train_perplexity 436 model 8 1.1603841622996929 20240813-02:57:04 test_perplexity 436 model 2 1.171590624532718 20240813-02:57:05 test_perplexity 436 model 8 1.1753337600535099 20240813-03:03:01 test_accuracy 436 model 2 val 1488 / 1583 20240813-03:03:04 test_accuracy 436 model 8 val 1503 / 1576 20240813-03:03:06 wrote gpt_008.pth 20240813-03:03:06 wrote gpt_002.pth 20240813-03:03:40 wrote non_validated_0436_08.png 20240813-03:04:13 wrote non_validated_0436_02.png 20240813-03:04:13 wrote state.pth 20240813-03:04:13 --- epoch 437 ---------------------------------------- 20240813-03:04:13 current_test_accuracies 0.9550 0.9563 0.9400 0.9534 0.9502 0.9570 0.9502 0.9512 0.9537 0.9501 20240813-03:04:13 training model 2 20240813-03:04:13 training model 9 20240813-03:07:55 train_perplexity 437 model 9 1.165276449165644 20240813-03:07:58 train_perplexity 437 model 2 1.165269580714746 20240813-03:08:02 test_perplexity 437 model 9 1.1709610210655286 20240813-03:08:04 test_perplexity 437 model 2 1.1725770704140799 20240813-03:13:55 test_accuracy 437 model 2 val 1529 / 1618 20240813-03:14:02 test_accuracy 437 model 9 val 1487 / 1567 20240813-03:14:04 wrote gpt_002.pth 20240813-03:14:05 wrote gpt_009.pth 20240813-03:14:38 wrote non_validated_0437_02.png 20240813-03:15:11 wrote non_validated_0437_09.png 20240813-03:15:11 wrote state.pth 20240813-03:15:11 --- epoch 438 ---------------------------------------- 20240813-03:15:11 current_test_accuracies 0.9550 0.9563 0.9450 0.9534 0.9502 0.9570 0.9502 0.9512 0.9537 0.9489 20240813-03:15:11 training model 2 20240813-03:15:11 training model 9 20240813-03:18:53 train_perplexity 438 model 9 1.1648926514400155 20240813-03:18:56 train_perplexity 438 model 2 1.1646435371178119 20240813-03:18:59 test_perplexity 438 model 9 1.1710244539661012 20240813-03:19:01 test_perplexity 438 model 2 1.171266380475085 20240813-03:24:44 test_accuracy 438 model 2 val 1560 / 1642 20240813-03:24:47 test_accuracy 438 model 9 val 1541 / 1626 20240813-03:24:49 wrote gpt_002.pth 20240813-03:24:49 wrote gpt_009.pth 20240813-03:25:23 wrote non_validated_0438_02.png 20240813-03:25:56 wrote non_validated_0438_09.png 20240813-03:25:56 wrote state.pth 20240813-03:25:56 --- epoch 439 ---------------------------------------- 20240813-03:25:56 current_test_accuracies 0.9550 0.9563 0.9501 0.9534 0.9502 0.9570 0.9502 0.9512 0.9537 0.9477 20240813-03:25:56 training model 9 20240813-03:25:56 training model 2 20240813-03:29:38 train_perplexity 439 model 2 1.1641564457540705 20240813-03:29:40 train_perplexity 439 model 9 1.1650809103861566 20240813-03:29:45 test_perplexity 439 model 2 1.1730247933451232 20240813-03:29:46 test_perplexity 439 model 9 1.1705396988531978 20240813-03:35:40 test_accuracy 439 model 2 val 1520 / 1609 20240813-03:35:42 test_accuracy 439 model 9 val 1519 / 1596 20240813-03:35:44 wrote gpt_009.pth 20240813-03:35:45 wrote gpt_002.pth 20240813-03:36:17 wrote non_validated_0439_09.png 20240813-03:36:50 wrote non_validated_0439_02.png 20240813-03:36:50 wrote state.pth 20240813-03:36:50 --- epoch 440 ---------------------------------------- 20240813-03:36:50 current_test_accuracies 0.9550 0.9563 0.9447 0.9534 0.9502 0.9570 0.9502 0.9512 0.9537 0.9518 20240813-03:36:50 training model 2 20240813-03:36:50 training model 6 20240813-03:40:32 train_perplexity 440 model 6 1.1652124420911776 20240813-03:40:35 train_perplexity 440 model 2 1.1636370784240286 20240813-03:40:39 test_perplexity 440 model 6 1.1729957977300092 20240813-03:40:40 test_perplexity 440 model 2 1.1738161501728508 20240813-03:46:30 test_accuracy 440 model 6 val 1541 / 1617 20240813-03:46:30 test_accuracy 440 model 2 val 1544 / 1612 20240813-03:46:32 wrote gpt_002.pth 20240813-03:46:33 wrote gpt_006.pth 20240813-03:47:06 wrote non_validated_0440_02.png 20240813-03:47:38 wrote non_validated_0440_06.png 20240813-03:47:38 wrote state.pth 20240813-03:47:38 --- epoch 441 ---------------------------------------- 20240813-03:47:38 current_test_accuracies 0.9550 0.9563 0.9578 0.9534 0.9502 0.9570 0.9530 0.9512 0.9537 0.9518 20240813-03:51:03 keep c_quizzes model 2 validated 30 / 420 (7.14%) nb_accumulated 30 / 420 (finishes Tue 04:35 -- 527/h) 20240813-03:54:05 keep c_quizzes model 2 validated 39 / 420 (9.29%) nb_accumulated 69 / 420 (finishes Tue 04:26 -- 642/h) 20240813-03:57:06 keep c_quizzes model 4 validated 35 / 420 (8.33%) nb_accumulated 104 / 420 (finishes Tue 04:25 -- 659/h) 20240813-04:00:08 keep c_quizzes model 4 validated 27 / 420 (6.43%) nb_accumulated 131 / 420 (finishes Tue 04:27 -- 629/h) 20240813-04:03:08 keep c_quizzes model 4 validated 45 / 420 (10.71%) nb_accumulated 176 / 420 (finishes Tue 04:24 -- 681/h) 20240813-04:06:09 keep c_quizzes model 3 validated 33 / 420 (7.86%) nb_accumulated 209 / 420 (finishes Tue 04:24 -- 677/h) 20240813-04:09:09 keep c_quizzes model 5 validated 27 / 420 (6.43%) nb_accumulated 236 / 420 (finishes Tue 04:25 -- 658/h) 20240813-04:12:10 keep c_quizzes model 9 validated 27 / 420 (6.43%) nb_accumulated 263 / 420 (finishes Tue 04:26 -- 643/h) 20240813-04:15:11 keep c_quizzes model 1 validated 29 / 420 (6.90%) nb_accumulated 292 / 420 (finishes Tue 04:27 -- 636/h) 20240813-04:18:11 keep c_quizzes model 1 validated 30 / 420 (7.14%) nb_accumulated 322 / 420 (finishes Tue 04:27 -- 632/h) 20240813-04:21:11 keep c_quizzes model 0 validated 40 / 420 (9.52%) nb_accumulated 362 / 420 (finishes Tue 04:26 -- 647/h) 20240813-04:24:11 keep c_quizzes model 1 validated 37 / 420 (8.81%) nb_accumulated 399 / 420 (finishes Tue 04:26 -- 654/h) 20240813-04:27:17 keep c_quizzes model 5 validated 30 / 420 (7.14%) nb_accumulated 429 / 420 (finishes now! -- 649/h) 20240813-04:27:30 wrote c_quizzes.pth 20240813-04:27:30 training model 0 20240813-04:27:30 training model 1 20240813-04:31:12 train_perplexity 441 model 0 1.1640157133267734 20240813-04:31:12 train_perplexity 441 model 1 1.167972696414182 20240813-04:31:19 test_perplexity 441 model 0 1.1731901466556143 20240813-04:31:19 test_perplexity 441 model 1 1.1723351823844919 20240813-04:37:09 test_accuracy 441 model 0 val 1538 / 1615 20240813-04:37:11 test_accuracy 441 model 1 val 1519 / 1606 20240813-04:37:13 wrote gpt_000.pth 20240813-04:37:14 wrote gpt_001.pth 20240813-04:37:47 wrote non_validated_0441_00.png 20240813-04:38:19 wrote non_validated_0441_01.png 20240813-04:38:19 wrote state.pth 20240813-04:38:19 --- epoch 442 ---------------------------------------- 20240813-04:38:19 current_test_accuracies 0.9523 0.9458 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240813-04:38:19 training model 2 20240813-04:38:19 training model 3 20240813-04:42:02 train_perplexity 442 model 3 1.166085958882825 20240813-04:42:03 train_perplexity 442 model 2 1.16493046226721 20240813-04:42:09 test_perplexity 442 model 3 1.1727096498537948 20240813-04:42:10 test_perplexity 442 model 2 1.1744913262266454 20240813-04:48:04 test_accuracy 442 model 3 val 1539 / 1607 20240813-04:48:05 test_accuracy 442 model 2 val 1520 / 1608 20240813-04:48:07 wrote gpt_002.pth 20240813-04:48:08 wrote gpt_003.pth 20240813-04:48:40 wrote non_validated_0442_02.png 20240813-04:49:13 wrote non_validated_0442_03.png 20240813-04:49:13 wrote state.pth 20240813-04:49:13 --- epoch 443 ---------------------------------------- 20240813-04:49:13 current_test_accuracies 0.9523 0.9458 0.9453 0.9577 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240813-04:49:13 training model 4 20240813-04:49:13 training model 5 20240813-04:52:55 train_perplexity 443 model 5 1.1655399288682837 20240813-04:52:58 train_perplexity 443 model 4 1.167702465083629 20240813-04:53:01 test_perplexity 443 model 5 1.173123775239046 20240813-04:53:04 test_perplexity 443 model 4 1.1708568322234605 20240813-04:58:56 test_accuracy 443 model 4 val 1524 / 1619 20240813-04:59:01 test_accuracy 443 model 5 val 1497 / 1574 20240813-04:59:03 wrote gpt_004.pth 20240813-04:59:04 wrote gpt_005.pth 20240813-04:59:37 wrote non_validated_0443_04.png 20240813-05:00:10 wrote non_validated_0443_05.png 20240813-05:00:10 wrote state.pth 20240813-05:00:10 --- epoch 444 ---------------------------------------- 20240813-05:00:10 current_test_accuracies 0.9523 0.9458 0.9453 0.9577 0.9413 0.9511 0.0000 0.0000 0.0000 0.0000 20240813-05:00:10 training model 6 20240813-05:00:10 training model 7 20240813-05:03:52 train_perplexity 444 model 7 1.1616130854069615 20240813-05:03:54 train_perplexity 444 model 6 1.1655523074802288 20240813-05:03:59 test_perplexity 444 model 7 1.1755623034833242 20240813-05:04:00 test_perplexity 444 model 6 1.172914943539203 20240813-05:09:45 test_accuracy 444 model 6 val 1535 / 1615 20240813-05:09:49 test_accuracy 444 model 7 val 1521 / 1596 20240813-05:09:51 wrote gpt_006.pth 20240813-05:09:52 wrote gpt_007.pth 20240813-05:10:24 wrote non_validated_0444_06.png 20240813-05:10:57 wrote non_validated_0444_07.png 20240813-05:10:57 wrote state.pth 20240813-05:10:57 --- epoch 445 ---------------------------------------- 20240813-05:10:57 current_test_accuracies 0.9523 0.9458 0.9453 0.9577 0.9413 0.9511 0.9505 0.9530 0.0000 0.0000 20240813-05:10:57 training model 8 20240813-05:10:57 training model 9 20240813-05:14:39 train_perplexity 445 model 9 1.1657132614352845 20240813-05:14:44 train_perplexity 445 model 8 1.1613449943606506 20240813-05:14:45 test_perplexity 445 model 9 1.1741996208567218 20240813-05:14:49 test_perplexity 445 model 8 1.174920190135717 20240813-05:20:43 test_accuracy 445 model 9 val 1505 / 1591 20240813-05:20:47 test_accuracy 445 model 8 val 1484 / 1574 20240813-05:20:49 wrote gpt_008.pth 20240813-05:20:50 wrote gpt_009.pth 20240813-05:21:23 wrote non_validated_0445_08.png 20240813-05:21:55 wrote non_validated_0445_09.png 20240813-05:21:55 wrote state.pth 20240813-05:21:55 --- epoch 446 ---------------------------------------- 20240813-05:21:55 current_test_accuracies 0.9523 0.9458 0.9453 0.9577 0.9413 0.9511 0.9505 0.9530 0.9428 0.9459 20240813-05:21:55 training model 4 20240813-05:21:55 training model 8 20240813-05:25:37 train_perplexity 446 model 8 1.160873778786719 20240813-05:25:43 test_perplexity 446 model 8 1.1762769524139178 20240813-05:25:43 train_perplexity 446 model 4 1.1671371671947943 20240813-05:25:47 test_perplexity 446 model 4 1.171359269807399 20240813-05:31:35 test_accuracy 446 model 4 val 1510 / 1603 20240813-05:31:36 test_accuracy 446 model 8 val 1505 / 1595 20240813-05:31:38 wrote gpt_004.pth 20240813-05:31:38 wrote gpt_008.pth 20240813-05:32:11 wrote non_validated_0446_04.png 20240813-05:32:44 wrote non_validated_0446_08.png 20240813-05:32:44 wrote state.pth 20240813-05:32:44 --- epoch 447 ---------------------------------------- 20240813-05:32:44 current_test_accuracies 0.9523 0.9458 0.9453 0.9577 0.9420 0.9511 0.9505 0.9530 0.9436 0.9459 20240813-05:32:44 training model 4 20240813-05:32:44 training model 8 20240813-05:36:26 train_perplexity 447 model 8 1.1607507859754764 20240813-05:36:27 train_perplexity 447 model 4 1.1675852264091051 20240813-05:36:34 test_perplexity 447 model 8 1.1758419664702977 20240813-05:36:34 test_perplexity 447 model 4 1.1702202972964084 20240813-05:42:23 test_accuracy 447 model 8 val 1516 / 1602 20240813-05:42:29 test_accuracy 447 model 4 val 1481 / 1561 20240813-05:42:30 wrote gpt_004.pth 20240813-05:42:31 wrote gpt_008.pth 20240813-05:43:03 wrote non_validated_0447_04.png 20240813-05:43:36 wrote non_validated_0447_08.png 20240813-05:43:36 wrote state.pth 20240813-05:43:36 --- epoch 448 ---------------------------------------- 20240813-05:43:36 current_test_accuracies 0.9523 0.9458 0.9453 0.9577 0.9488 0.9511 0.9505 0.9530 0.9463 0.9459 20240813-05:43:36 training model 2 20240813-05:43:36 training model 1 20240813-05:47:18 train_perplexity 448 model 1 1.1671357304782888 20240813-05:47:19 train_perplexity 448 model 2 1.1645105837910124 20240813-05:47:25 test_perplexity 448 model 1 1.1705392712964278 20240813-05:47:26 test_perplexity 448 model 2 1.172425994502565 20240813-05:53:11 test_accuracy 448 model 1 val 1543 / 1623 20240813-05:53:15 test_accuracy 448 model 2 val 1529 / 1607 20240813-05:53:17 wrote gpt_002.pth 20240813-05:53:18 wrote gpt_001.pth 20240813-05:53:50 wrote non_validated_0448_02.png 20240813-05:54:23 wrote non_validated_0448_01.png 20240813-05:54:23 wrote state.pth 20240813-05:54:23 --- epoch 449 ---------------------------------------- 20240813-05:54:23 current_test_accuracies 0.9523 0.9507 0.9515 0.9577 0.9488 0.9511 0.9505 0.9530 0.9463 0.9459 20240813-05:54:23 training model 9 20240813-05:54:23 training model 8 20240813-05:58:05 train_perplexity 449 model 8 1.1602336501833637 20240813-05:58:07 train_perplexity 449 model 9 1.1655801292448034 20240813-05:58:12 test_perplexity 449 model 8 1.178741332254797 20240813-05:58:13 test_perplexity 449 model 9 1.173549022464913 20240813-06:03:55 test_accuracy 449 model 9 val 1533 / 1630 20240813-06:04:00 test_accuracy 449 model 8 val 1513 / 1602 20240813-06:04:02 wrote gpt_009.pth 20240813-06:04:03 wrote gpt_008.pth 20240813-06:04:36 wrote non_validated_0449_09.png 20240813-06:05:09 wrote non_validated_0449_08.png 20240813-06:05:09 wrote state.pth 20240813-06:05:09 --- epoch 450 ---------------------------------------- 20240813-06:05:09 current_test_accuracies 0.9523 0.9507 0.9515 0.9577 0.9488 0.9511 0.9505 0.9530 0.9444 0.9405 20240813-06:05:09 training model 9 20240813-06:05:09 training model 8 20240813-06:08:51 train_perplexity 450 model 8 1.1602568156674995 20240813-06:08:53 train_perplexity 450 model 9 1.1651963479322138 20240813-06:08:58 test_perplexity 450 model 8 1.1765687760906924 20240813-06:08:59 test_perplexity 450 model 9 1.1736587681347166 20240813-06:14:49 test_accuracy 450 model 9 val 1517 / 1603 20240813-06:14:51 test_accuracy 450 model 8 val 1528 / 1606 20240813-06:14:53 wrote gpt_009.pth 20240813-06:14:54 wrote gpt_008.pth 20240813-06:15:26 wrote non_validated_0450_09.png 20240813-06:15:59 wrote non_validated_0450_08.png 20240813-06:15:59 wrote state.pth 20240813-06:15:59 --- epoch 451 ---------------------------------------- 20240813-06:15:59 current_test_accuracies 0.9523 0.9507 0.9515 0.9577 0.9488 0.9511 0.9505 0.9530 0.9514 0.9464 20240813-06:15:59 training model 9 20240813-06:15:59 training model 4 20240813-06:19:41 train_perplexity 451 model 4 1.1666021538093905 20240813-06:19:43 train_perplexity 451 model 9 1.1650229486411676 20240813-06:19:48 test_perplexity 451 model 4 1.1731854003305908 20240813-06:19:49 test_perplexity 451 model 9 1.1759540405059186 20240813-06:25:38 test_accuracy 451 model 9 val 1535 / 1626 20240813-06:25:40 test_accuracy 451 model 4 val 1490 / 1600 20240813-06:25:42 wrote gpt_009.pth 20240813-06:25:43 wrote gpt_004.pth 20240813-06:26:15 wrote non_validated_0451_09.png 20240813-06:26:48 wrote non_validated_0451_04.png 20240813-06:26:48 wrote state.pth 20240813-06:26:48 --- epoch 452 ---------------------------------------- 20240813-06:26:48 current_test_accuracies 0.9523 0.9507 0.9515 0.9577 0.9312 0.9511 0.9505 0.9530 0.9514 0.9440 20240813-06:26:48 training model 4 20240813-06:26:48 training model 9 20240813-06:30:30 train_perplexity 452 model 9 1.16494026199508 20240813-06:30:34 train_perplexity 452 model 4 1.1665069088798574 20240813-06:30:37 test_perplexity 452 model 9 1.172108769275328 20240813-06:30:39 test_perplexity 452 model 4 1.171016909863521 20240813-06:36:35 test_accuracy 452 model 9 val 1513 / 1597 20240813-06:36:37 test_accuracy 452 model 4 val 1501 / 1580 20240813-06:36:39 wrote gpt_004.pth 20240813-06:36:40 wrote gpt_009.pth 20240813-06:37:12 wrote non_validated_0452_04.png 20240813-06:37:45 wrote non_validated_0452_09.png 20240813-06:37:45 wrote state.pth 20240813-06:37:45 --- epoch 453 ---------------------------------------- 20240813-06:37:45 current_test_accuracies 0.9523 0.9507 0.9515 0.9577 0.9500 0.9511 0.9505 0.9530 0.9514 0.9474 20240813-06:37:45 training model 9 20240813-06:37:45 training model 4 20240813-06:41:27 train_perplexity 453 model 4 1.166409659401482 20240813-06:41:32 train_perplexity 453 model 9 1.1644561499309725 20240813-06:41:34 test_perplexity 453 model 4 1.173212078117015 20240813-06:41:36 test_perplexity 453 model 9 1.1733981410829988 20240813-06:47:29 test_accuracy 453 model 4 val 1499 / 1596 20240813-06:47:32 test_accuracy 453 model 9 val 1494 / 1588 20240813-06:47:34 wrote gpt_009.pth 20240813-06:47:35 wrote gpt_004.pth 20240813-06:48:07 wrote non_validated_0453_09.png 20240813-06:48:40 wrote non_validated_0453_04.png 20240813-06:48:40 wrote state.pth 20240813-06:48:40 --- epoch 454 ---------------------------------------- 20240813-06:48:40 current_test_accuracies 0.9523 0.9507 0.9515 0.9577 0.9392 0.9511 0.9505 0.9530 0.9514 0.9408 20240813-06:48:40 training model 4 20240813-06:48:40 training model 9 20240813-06:52:22 train_perplexity 454 model 9 1.164613973706911 20240813-06:52:23 train_perplexity 454 model 4 1.1662992621634907 20240813-06:52:30 test_perplexity 454 model 9 1.1722370275272391 20240813-06:52:30 test_perplexity 454 model 4 1.1726186705172672 20240813-06:58:17 test_accuracy 454 model 4 val 1537 / 1623 20240813-06:58:20 test_accuracy 454 model 9 val 1504 / 1596 20240813-06:58:22 wrote gpt_004.pth 20240813-06:58:23 wrote gpt_009.pth 20240813-06:58:56 wrote non_validated_0454_04.png 20240813-06:59:28 wrote non_validated_0454_09.png 20240813-06:59:29 wrote state.pth 20240813-06:59:29 --- epoch 455 ---------------------------------------- 20240813-06:59:29 current_test_accuracies 0.9523 0.9507 0.9515 0.9577 0.9470 0.9511 0.9505 0.9530 0.9514 0.9424 20240813-06:59:29 training model 9 20240813-06:59:29 training model 4 20240813-07:03:11 train_perplexity 455 model 4 1.1660663545797538 20240813-07:03:13 train_perplexity 455 model 9 1.1642540077406032 20240813-07:03:18 test_perplexity 455 model 4 1.1711110400208804 20240813-07:03:19 test_perplexity 455 model 9 1.1739704768563903 20240813-07:09:10 test_accuracy 455 model 9 val 1501 / 1605 20240813-07:09:14 test_accuracy 455 model 4 val 1500 / 1583 20240813-07:09:16 wrote gpt_009.pth 20240813-07:09:16 wrote gpt_004.pth 20240813-07:09:49 wrote non_validated_0455_09.png 20240813-07:10:22 wrote non_validated_0455_04.png 20240813-07:10:22 wrote state.pth 20240813-07:10:22 --- epoch 456 ---------------------------------------- 20240813-07:10:22 current_test_accuracies 0.9523 0.9507 0.9515 0.9577 0.9476 0.9511 0.9505 0.9530 0.9514 0.9352 20240813-07:10:22 training model 9 20240813-07:10:22 training model 4 20240813-07:14:05 train_perplexity 456 model 9 1.1642182228021087 20240813-07:14:05 train_perplexity 456 model 4 1.1658967905368203 20240813-07:14:12 test_perplexity 456 model 9 1.1744229388855312 20240813-07:14:12 test_perplexity 456 model 4 1.1711137246289989 20240813-07:20:05 test_accuracy 456 model 9 val 1519 / 1603 20240813-07:20:07 test_accuracy 456 model 4 val 1481 / 1598 20240813-07:20:09 wrote gpt_009.pth 20240813-07:20:10 wrote gpt_004.pth 20240813-07:20:42 wrote non_validated_0456_09.png 20240813-07:21:15 wrote non_validated_0456_04.png 20240813-07:21:15 wrote state.pth 20240813-07:21:15 --- epoch 457 ---------------------------------------- 20240813-07:21:15 current_test_accuracies 0.9523 0.9507 0.9515 0.9577 0.9268 0.9511 0.9505 0.9530 0.9514 0.9476 20240813-07:21:15 training model 4 20240813-07:21:15 training model 9 20240813-07:24:57 train_perplexity 457 model 9 1.1635027944631415 20240813-07:25:02 train_perplexity 457 model 4 1.1658245985574214 20240813-07:25:03 test_perplexity 457 model 9 1.174614074618397 20240813-07:25:06 test_perplexity 457 model 4 1.1713764367635524 20240813-07:31:02 test_accuracy 457 model 9 val 1499 / 1583 20240813-07:31:04 test_accuracy 457 model 4 val 1499 / 1578 20240813-07:31:06 wrote gpt_004.pth 20240813-07:31:07 wrote gpt_009.pth 20240813-07:31:41 wrote non_validated_0457_04.png 20240813-07:32:13 wrote non_validated_0457_09.png 20240813-07:32:13 wrote state.pth 20240813-07:32:13 --- epoch 458 ---------------------------------------- 20240813-07:32:13 current_test_accuracies 0.9523 0.9507 0.9515 0.9577 0.9499 0.9511 0.9505 0.9530 0.9514 0.9469 20240813-07:32:13 training model 9 20240813-07:32:13 training model 4 20240813-07:35:56 train_perplexity 458 model 4 1.1658076777324347 20240813-07:35:58 train_perplexity 458 model 9 1.1630829354473813 20240813-07:36:03 test_perplexity 458 model 4 1.1732739333797138 20240813-07:36:04 test_perplexity 458 model 9 1.176062209105619 20240813-07:41:53 test_accuracy 458 model 9 val 1541 / 1626 20240813-07:41:59 test_accuracy 458 model 4 val 1498 / 1584 20240813-07:42:01 wrote gpt_009.pth 20240813-07:42:01 wrote gpt_004.pth 20240813-07:42:34 wrote non_validated_0458_09.png 20240813-07:43:07 wrote non_validated_0458_04.png 20240813-07:43:07 wrote state.pth 20240813-07:43:07 --- epoch 459 ---------------------------------------- 20240813-07:43:07 current_test_accuracies 0.9523 0.9507 0.9515 0.9577 0.9457 0.9511 0.9505 0.9530 0.9514 0.9477 20240813-07:43:07 training model 4 20240813-07:43:07 training model 9 20240813-07:46:49 train_perplexity 459 model 9 1.1628002763724397 20240813-07:46:51 train_perplexity 459 model 4 1.1655049790911791 20240813-07:46:56 test_perplexity 459 model 9 1.1749243134192942 20240813-07:46:57 test_perplexity 459 model 4 1.1751525165352048 20240813-07:52:51 test_accuracy 459 model 4 val 1520 / 1606 20240813-07:52:52 test_accuracy 459 model 9 val 1515 / 1597 20240813-07:52:54 wrote gpt_004.pth 20240813-07:52:55 wrote gpt_009.pth 20240813-07:53:28 wrote non_validated_0459_04.png 20240813-07:54:01 wrote non_validated_0459_09.png 20240813-07:54:01 wrote state.pth 20240813-07:54:01 --- epoch 460 ---------------------------------------- 20240813-07:54:01 current_test_accuracies 0.9523 0.9507 0.9515 0.9577 0.9465 0.9511 0.9505 0.9530 0.9514 0.9487 20240813-07:54:01 training model 4 20240813-07:54:01 training model 9 20240813-07:57:43 train_perplexity 460 model 9 1.1626111787052353 20240813-07:57:47 train_perplexity 460 model 4 1.1647282592278863 20240813-07:57:49 test_perplexity 460 model 9 1.1743992076714846 20240813-07:57:52 test_perplexity 460 model 4 1.172274583668583 20240813-08:03:43 test_accuracy 460 model 9 val 1524 / 1604 20240813-08:03:44 test_accuracy 460 model 4 val 1511 / 1600 20240813-08:03:46 wrote gpt_004.pth 20240813-08:03:47 wrote gpt_009.pth 20240813-08:04:20 wrote non_validated_0460_04.png 20240813-08:04:52 wrote non_validated_0460_09.png 20240813-08:04:52 wrote state.pth 20240813-08:04:52 --- epoch 461 ---------------------------------------- 20240813-08:04:52 current_test_accuracies 0.9523 0.9507 0.9515 0.9577 0.9444 0.9511 0.9505 0.9530 0.9514 0.9501 20240813-08:04:52 training model 4 20240813-08:04:52 training model 9 20240813-08:08:34 train_perplexity 461 model 9 1.1626606553065126 20240813-08:08:39 train_perplexity 461 model 4 1.164561004393872 20240813-08:08:40 test_perplexity 461 model 9 1.176208429333336 20240813-08:08:43 test_perplexity 461 model 4 1.1727284536937115 20240813-08:14:31 test_accuracy 461 model 9 val 1528 / 1620 20240813-08:14:33 test_accuracy 461 model 4 val 1521 / 1619 20240813-08:14:34 wrote gpt_004.pth 20240813-08:14:35 wrote gpt_009.pth 20240813-08:15:08 wrote non_validated_0461_04.png 20240813-08:15:41 wrote non_validated_0461_09.png 20240813-08:15:41 wrote state.pth 20240813-08:15:41 --- epoch 462 ---------------------------------------- 20240813-08:15:41 current_test_accuracies 0.9523 0.9507 0.9515 0.9577 0.9395 0.9511 0.9505 0.9530 0.9514 0.9432 20240813-08:15:41 training model 4 20240813-08:15:41 training model 9 20240813-08:19:23 train_perplexity 462 model 9 1.1627687779647462 20240813-08:19:25 train_perplexity 462 model 4 1.1647886454682739 20240813-08:19:30 test_perplexity 462 model 9 1.1751646297078175 20240813-08:19:31 test_perplexity 462 model 4 1.1736754721000415 20240813-08:25:20 test_accuracy 462 model 9 val 1494 / 1612 20240813-08:25:24 test_accuracy 462 model 4 val 1501 / 1597 20240813-08:25:26 wrote gpt_004.pth 20240813-08:25:27 wrote gpt_009.pth 20240813-08:25:59 wrote non_validated_0462_04.png 20240813-08:26:32 wrote non_validated_0462_09.png 20240813-08:26:32 wrote state.pth 20240813-08:26:32 --- epoch 463 ---------------------------------------- 20240813-08:26:32 current_test_accuracies 0.9523 0.9507 0.9515 0.9577 0.9399 0.9511 0.9505 0.9530 0.9514 0.9268 20240813-08:26:32 training model 9 20240813-08:26:32 training model 4 20240813-08:30:14 train_perplexity 463 model 4 1.1644665101423324 20240813-08:30:14 train_perplexity 463 model 9 1.1622133413423095 20240813-08:30:22 test_perplexity 463 model 4 1.1719924902703718 20240813-08:30:22 test_perplexity 463 model 9 1.1757925718038715 20240813-08:36:14 test_accuracy 463 model 9 val 1508 / 1608 20240813-08:36:19 test_accuracy 463 model 4 val 1475 / 1561 20240813-08:36:21 wrote gpt_009.pth 20240813-08:36:22 wrote gpt_004.pth 20240813-08:36:55 wrote non_validated_0463_09.png 20240813-08:37:27 wrote non_validated_0463_04.png 20240813-08:37:27 wrote state.pth 20240813-08:37:27 --- epoch 464 ---------------------------------------- 20240813-08:37:27 current_test_accuracies 0.9523 0.9507 0.9515 0.9577 0.9449 0.9511 0.9505 0.9530 0.9514 0.9378 20240813-08:37:27 training model 9 20240813-08:37:27 training model 4 20240813-08:41:10 train_perplexity 464 model 4 1.1647088642409509 20240813-08:41:12 train_perplexity 464 model 9 1.1622672337226139 20240813-08:41:16 test_perplexity 464 model 4 1.1747106644901508 20240813-08:41:18 test_perplexity 464 model 9 1.1746649765261359 20240813-08:47:10 test_accuracy 464 model 9 val 1535 / 1607 20240813-08:47:11 test_accuracy 464 model 4 val 1509 / 1599 20240813-08:47:13 wrote gpt_009.pth 20240813-08:47:14 wrote gpt_004.pth 20240813-08:47:47 wrote non_validated_0464_09.png 20240813-08:48:20 wrote non_validated_0464_04.png 20240813-08:48:20 wrote state.pth 20240813-08:48:20 --- epoch 465 ---------------------------------------- 20240813-08:48:20 current_test_accuracies 0.9523 0.9507 0.9515 0.9577 0.9437 0.9511 0.9505 0.9530 0.9514 0.9552 20240813-08:48:20 training model 4 20240813-08:48:20 training model 6 20240813-08:52:02 train_perplexity 465 model 6 1.165595735248131 20240813-08:52:04 train_perplexity 465 model 4 1.1641885519841082 20240813-08:52:09 test_perplexity 465 model 6 1.1752078455886052 20240813-08:52:11 test_perplexity 465 model 4 1.175380130167353 20240813-08:58:01 test_accuracy 465 model 4 val 1514 / 1606 20240813-08:58:06 test_accuracy 465 model 6 val 1493 / 1576 20240813-08:58:08 wrote gpt_004.pth 20240813-08:58:08 wrote gpt_006.pth 20240813-08:58:41 wrote non_validated_0465_04.png 20240813-08:59:14 wrote non_validated_0465_06.png 20240813-08:59:14 wrote state.pth 20240813-08:59:14 --- epoch 466 ---------------------------------------- 20240813-08:59:14 current_test_accuracies 0.9523 0.9507 0.9515 0.9577 0.9427 0.9511 0.9473 0.9530 0.9514 0.9552 20240813-08:59:14 training model 4 20240813-08:59:14 training model 6 20240813-09:02:56 train_perplexity 466 model 6 1.1649351009916356 20240813-09:02:58 train_perplexity 466 model 4 1.1634426760932635 20240813-09:03:03 test_perplexity 466 model 6 1.1720647482541726 20240813-09:03:04 test_perplexity 466 model 4 1.1737759678352242 20240813-09:08:49 test_accuracy 466 model 6 val 1536 / 1610 20240813-09:08:52 test_accuracy 466 model 4 val 1506 / 1609 20240813-09:08:54 wrote gpt_004.pth 20240813-09:08:55 wrote gpt_006.pth 20240813-09:09:28 wrote non_validated_0466_04.png 20240813-09:10:00 wrote non_validated_0466_06.png 20240813-09:10:00 wrote state.pth 20240813-09:10:00 --- epoch 467 ---------------------------------------- 20240813-09:10:00 current_test_accuracies 0.9523 0.9507 0.9515 0.9577 0.9360 0.9511 0.9540 0.9530 0.9514 0.9552 20240813-09:10:00 training model 4 20240813-09:10:00 training model 1 20240813-09:13:43 train_perplexity 467 model 1 1.1667649242220248 20240813-09:13:43 train_perplexity 467 model 4 1.1634533475959874 20240813-09:13:51 test_perplexity 467 model 1 1.173807457722795 20240813-09:13:51 test_perplexity 467 model 4 1.1766298404982565 20240813-09:19:36 test_accuracy 467 model 4 val 1518 / 1611 20240813-09:19:46 test_accuracy 467 model 1 val 1484 / 1555 20240813-09:19:47 wrote gpt_004.pth 20240813-09:19:48 wrote gpt_001.pth 20240813-09:20:21 wrote non_validated_0467_04.png 20240813-09:20:54 wrote non_validated_0467_01.png 20240813-09:20:54 wrote state.pth 20240813-09:20:54 --- epoch 468 ---------------------------------------- 20240813-09:20:54 current_test_accuracies 0.9523 0.9543 0.9515 0.9577 0.9423 0.9511 0.9540 0.9530 0.9514 0.9552 20240813-09:20:54 training model 4 20240813-09:20:54 training model 5 20240813-09:24:36 train_perplexity 468 model 5 1.1653872494955804 20240813-09:24:39 train_perplexity 468 model 4 1.1630039604427953 20240813-09:24:43 test_perplexity 468 model 5 1.1749556439316349 20240813-09:24:45 test_perplexity 468 model 4 1.1738241023659866 20240813-09:30:29 test_accuracy 468 model 5 val 1548 / 1631 20240813-09:30:34 test_accuracy 468 model 4 val 1516 / 1604 20240813-09:30:36 wrote gpt_004.pth 20240813-09:30:37 wrote gpt_005.pth 20240813-09:31:09 wrote non_validated_0468_04.png 20240813-09:31:42 wrote non_validated_0468_05.png 20240813-09:31:42 wrote state.pth 20240813-09:31:42 --- epoch 469 ---------------------------------------- 20240813-09:31:42 current_test_accuracies 0.9523 0.9543 0.9515 0.9577 0.9451 0.9491 0.9540 0.9530 0.9514 0.9552 20240813-09:31:42 training model 4 20240813-09:31:42 training model 5 20240813-09:35:24 train_perplexity 469 model 5 1.164817877647751 20240813-09:35:29 train_perplexity 469 model 4 1.1629918465968858 20240813-09:35:30 test_perplexity 469 model 5 1.17296120467833 20240813-09:35:34 test_perplexity 469 model 4 1.1744978778507986 20240813-09:41:32 test_accuracy 469 model 5 val 1491 / 1589 20240813-09:41:38 test_accuracy 469 model 4 val 1450 / 1552 20240813-09:41:39 wrote gpt_004.pth 20240813-09:41:40 wrote gpt_005.pth 20240813-09:42:13 wrote non_validated_0469_04.png 20240813-09:42:46 wrote non_validated_0469_05.png 20240813-09:42:46 wrote state.pth 20240813-09:42:46 --- epoch 470 ---------------------------------------- 20240813-09:42:46 current_test_accuracies 0.9523 0.9543 0.9515 0.9577 0.9343 0.9383 0.9540 0.9530 0.9514 0.9552 20240813-09:42:46 training model 4 20240813-09:42:46 training model 5 20240813-09:46:27 train_perplexity 470 model 5 1.16493292782391 20240813-09:46:33 test_perplexity 470 model 5 1.1744058055943023 20240813-09:46:33 train_perplexity 470 model 4 1.1631110200093702 20240813-09:46:37 test_perplexity 470 model 4 1.1762278367790644 20240813-09:52:31 test_accuracy 470 model 4 val 1527 / 1605 20240813-09:52:35 test_accuracy 470 model 5 val 1512 / 1578 20240813-09:52:37 wrote gpt_004.pth 20240813-09:52:37 wrote gpt_005.pth 20240813-09:53:10 wrote non_validated_0470_04.png 20240813-09:53:44 wrote non_validated_0470_05.png 20240813-09:53:44 wrote state.pth 20240813-09:53:44 --- epoch 471 ---------------------------------------- 20240813-09:53:44 current_test_accuracies 0.9523 0.9543 0.9515 0.9577 0.9514 0.9582 0.9540 0.9530 0.9514 0.9552 20240813-09:57:11 keep c_quizzes model 0 validated 37 / 420 (8.81%) nb_accumulated 37 / 420 (finishes Tue 10:32 -- 642/h) 20240813-10:00:13 keep c_quizzes model 5 validated 31 / 420 (7.38%) nb_accumulated 68 / 420 (finishes Tue 10:33 -- 629/h) 20240813-10:03:15 keep c_quizzes model 1 validated 33 / 420 (7.86%) nb_accumulated 101 / 420 (finishes Tue 10:33 -- 636/h) 20240813-10:06:17 keep c_quizzes model 1 validated 33 / 420 (7.86%) nb_accumulated 134 / 420 (finishes Tue 10:33 -- 639/h) 20240813-10:09:18 keep c_quizzes model 1 validated 32 / 420 (7.62%) nb_accumulated 166 / 420 (finishes Tue 10:33 -- 639/h) 20240813-10:12:19 keep c_quizzes model 4 validated 28 / 420 (6.67%) nb_accumulated 194 / 420 (finishes Tue 10:33 -- 626/h) 20240813-10:15:18 keep c_quizzes model 0 validated 36 / 420 (8.57%) nb_accumulated 230 / 420 (finishes Tue 10:33 -- 639/h) 20240813-10:18:20 keep c_quizzes model 1 validated 35 / 420 (8.33%) nb_accumulated 265 / 420 (finishes Tue 10:32 -- 646/h) 20240813-10:21:21 keep c_quizzes model 4 validated 37 / 420 (8.81%) nb_accumulated 302 / 420 (finishes Tue 10:32 -- 656/h) 20240813-10:24:23 keep c_quizzes model 1 validated 20 / 420 (4.76%) nb_accumulated 322 / 420 (finishes Tue 10:33 -- 630/h) 20240813-10:27:23 keep c_quizzes model 9 validated 30 / 420 (7.14%) nb_accumulated 352 / 420 (finishes Tue 10:33 -- 627/h) 20240813-10:30:24 keep c_quizzes model 1 validated 34 / 420 (8.10%) nb_accumulated 386 / 420 (finishes Tue 10:33 -- 631/h) 20240813-10:33:24 keep c_quizzes model 8 validated 29 / 420 (6.90%) nb_accumulated 415 / 420 (finishes Tue 10:33 -- 627/h) 20240813-10:36:25 keep c_quizzes model 6 validated 19 / 420 (4.52%) nb_accumulated 434 / 420 (finishes now! -- 609/h) 20240813-10:36:39 wrote c_quizzes.pth 20240813-10:36:39 training model 0 20240813-10:36:39 training model 1 20240813-10:40:21 train_perplexity 471 model 0 1.1652331641806266 20240813-10:40:21 train_perplexity 471 model 1 1.1678287546510875 20240813-10:40:28 test_perplexity 471 model 0 1.1743511602400203 20240813-10:40:28 test_perplexity 471 model 1 1.1718724254080215 20240813-10:46:12 test_accuracy 471 model 0 val 1520 / 1620 20240813-10:46:22 test_accuracy 471 model 1 val 1471 / 1566 20240813-10:46:23 wrote gpt_000.pth 20240813-10:46:24 wrote gpt_001.pth 20240813-10:46:57 wrote non_validated_0471_00.png 20240813-10:47:30 wrote non_validated_0471_01.png 20240813-10:47:30 wrote state.pth 20240813-10:47:30 --- epoch 472 ---------------------------------------- 20240813-10:47:30 current_test_accuracies 0.9383 0.9393 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240813-10:47:30 training model 2 20240813-10:47:30 training model 3 20240813-10:51:12 train_perplexity 472 model 3 1.1665004025916719 20240813-10:51:13 train_perplexity 472 model 2 1.1650292028851168 20240813-10:51:20 test_perplexity 472 model 3 1.173498742690396 20240813-10:51:20 test_perplexity 472 model 2 1.1756452569073799 20240813-10:57:13 test_accuracy 472 model 3 val 1534 / 1611 20240813-10:57:18 test_accuracy 472 model 2 val 1504 / 1588 20240813-10:57:20 wrote gpt_002.pth 20240813-10:57:21 wrote gpt_003.pth 20240813-10:57:54 wrote non_validated_0472_02.png 20240813-10:58:26 wrote non_validated_0472_03.png 20240813-10:58:26 wrote state.pth 20240813-10:58:26 --- epoch 473 ---------------------------------------- 20240813-10:58:26 current_test_accuracies 0.9383 0.9393 0.9471 0.9522 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 20240813-10:58:26 training model 4 20240813-10:58:26 training model 5 20240813-11:02:08 train_perplexity 473 model 5 1.1654703057353308 20240813-11:02:11 train_perplexity 473 model 4 1.1645530878184978 20240813-11:02:15 test_perplexity 473 model 5 1.1750966895107529 20240813-11:02:17 test_perplexity 473 model 4 1.1755850568509438 20240813-11:08:09 test_accuracy 473 model 5 val 1532 / 1607 20240813-11:08:11 test_accuracy 473 model 4 val 1515 / 1598 20240813-11:08:13 wrote gpt_004.pth 20240813-11:08:13 wrote gpt_005.pth 20240813-11:08:46 wrote non_validated_0473_04.png 20240813-11:09:18 wrote non_validated_0473_05.png 20240813-11:09:18 wrote state.pth 20240813-11:09:18 --- epoch 474 ---------------------------------------- 20240813-11:09:18 current_test_accuracies 0.9383 0.9393 0.9471 0.9522 0.9481 0.9533 0.0000 0.0000 0.0000 0.0000 20240813-11:09:18 training model 6 20240813-11:09:18 training model 7 20240813-11:13:01 train_perplexity 474 model 7 1.162864802487392 20240813-11:13:02 train_perplexity 474 model 6 1.1657858733015252 20240813-11:13:08 test_perplexity 474 model 7 1.1783430920472444 20240813-11:13:09 test_perplexity 474 model 6 1.1738268596606751 20240813-11:19:00 test_accuracy 474 model 6 val 1505 / 1597 20240813-11:19:01 test_accuracy 474 model 7 val 1497 / 1585 20240813-11:19:03 wrote gpt_006.pth 20240813-11:19:03 wrote gpt_007.pth 20240813-11:19:36 wrote non_validated_0474_06.png 20240813-11:20:08 wrote non_validated_0474_07.png 20240813-11:20:09 wrote state.pth 20240813-11:20:09 --- epoch 475 ---------------------------------------- 20240813-11:20:09 current_test_accuracies 0.9383 0.9393 0.9471 0.9522 0.9481 0.9533 0.9424 0.9445 0.0000 0.0000 20240813-11:20:09 training model 8 20240813-11:20:09 training model 9 20240813-11:23:51 train_perplexity 475 model 9 1.163155265573551 20240813-11:23:51 train_perplexity 475 model 8 1.1613755595300324 20240813-11:23:59 test_perplexity 475 model 9 1.1764394480764409 20240813-11:23:59 test_perplexity 475 model 8 1.1786988334599109 20240813-11:29:47 test_accuracy 475 model 8 val 1539 / 1620 20240813-11:29:53 test_accuracy 475 model 9 val 1501 / 1574 20240813-11:29:55 wrote gpt_008.pth 20240813-11:29:56 wrote gpt_009.pth 20240813-11:30:30 wrote non_validated_0475_08.png 20240813-11:31:02 wrote non_validated_0475_09.png 20240813-11:31:02 wrote state.pth 20240813-11:31:02 --- epoch 476 ---------------------------------------- 20240813-11:31:02 current_test_accuracies 0.9383 0.9393 0.9471 0.9522 0.9481 0.9533 0.9424 0.9445 0.9500 0.9536 20240813-11:31:02 training model 0 20240813-11:31:02 training model 1 20240813-11:34:45 train_perplexity 476 model 1 1.167809901438945 20240813-11:34:48 train_perplexity 476 model 0 1.1646278427907617 20240813-11:34:51 test_perplexity 476 model 1 1.1715976581882812 20240813-11:34:53 test_perplexity 476 model 0 1.1753830533458418 20240813-11:40:45 test_accuracy 476 model 0 val 1527 / 1617 20240813-11:40:49 test_accuracy 476 model 1 val 1482 / 1587 20240813-11:40:51 wrote gpt_000.pth 20240813-11:40:52 wrote gpt_001.pth 20240813-11:41:24 wrote non_validated_0476_00.png 20240813-11:41:57 wrote non_validated_0476_01.png 20240813-11:41:57 wrote state.pth 20240813-11:41:57 --- epoch 477 ---------------------------------------- 20240813-11:41:57 current_test_accuracies 0.9443 0.9338 0.9471 0.9522 0.9481 0.9533 0.9424 0.9445 0.9500 0.9536 20240813-11:41:57 training model 1 20240813-11:41:57 training model 6 20240813-11:45:40 train_perplexity 477 model 6 1.1650879405346963 20240813-11:45:41 train_perplexity 477 model 1 1.167592621894827 20240813-11:45:47 test_perplexity 477 model 6 1.1746462667861832 20240813-11:45:48 test_perplexity 477 model 1 1.1745043109389786 20240813-11:51:39 test_accuracy 477 model 6 val 1531 / 1602 20240813-11:51:39 test_accuracy 477 model 1 val 1502 / 1591 20240813-11:51:42 wrote gpt_001.pth 20240813-11:51:43 wrote gpt_006.pth 20240813-11:52:16 wrote non_validated_0477_01.png 20240813-11:52:48 wrote non_validated_0477_06.png 20240813-11:52:48 wrote state.pth 20240813-11:52:48 --- epoch 478 ---------------------------------------- 20240813-11:52:48 current_test_accuracies 0.9443 0.9441 0.9471 0.9522 0.9481 0.9533 0.9557 0.9445 0.9500 0.9536 20240813-11:52:48 training model 1 20240813-11:52:48 training model 0 20240813-11:56:31 train_perplexity 478 model 0 1.1645382660894947 20240813-11:56:31 train_perplexity 478 model 1 1.1672195155664475 20240813-11:56:38 test_perplexity 478 model 0 1.1738721352302157 20240813-11:56:38 test_perplexity 478 model 1 1.1725259837978321 20240813-12:02:34 test_accuracy 478 model 0 val 1494 / 1592 20240813-12:02:35 test_accuracy 478 model 1 val 1513 / 1599 20240813-12:02:37 wrote gpt_001.pth 20240813-12:02:38 wrote gpt_000.pth 20240813-12:03:10 wrote non_validated_0478_01.png 20240813-12:03:43 wrote non_validated_0478_00.png 20240813-12:03:43 wrote state.pth 20240813-12:03:43 --- epoch 479 ---------------------------------------- 20240813-12:03:43 current_test_accuracies 0.9384 0.9462 0.9471 0.9522 0.9481 0.9533 0.9557 0.9445 0.9500 0.9536 20240813-12:03:43 training model 0 20240813-12:03:43 training model 7 20240813-12:07:24 train_perplexity 479 model 7 1.1620360653507324 20240813-12:07:30 test_perplexity 479 model 7 1.1791330761664907 20240813-12:07:32 train_perplexity 479 model 0 1.1635862676804216 20240813-12:07:35 test_perplexity 479 model 0 1.175406939962316 20240813-12:13:25 test_accuracy 479 model 0 val 1490 / 1592 20240813-12:13:27 test_accuracy 479 model 7 val 1502 / 1587 20240813-12:13:29 wrote gpt_000.pth 20240813-12:13:29 wrote gpt_007.pth 20240813-12:14:02 wrote non_validated_0479_00.png 20240813-12:14:35 wrote non_validated_0479_07.png 20240813-12:14:35 wrote state.pth 20240813-12:14:35 --- epoch 480 ---------------------------------------- 20240813-12:14:35 current_test_accuracies 0.9359 0.9462 0.9471 0.9522 0.9481 0.9533 0.9557 0.9464 0.9500 0.9536 20240813-12:14:35 training model 0 20240813-12:14:35 training model 1 20240813-12:18:16 train_perplexity 480 model 1 1.1665511779919249 20240813-12:18:21 train_perplexity 480 model 0 1.1638106194802051 20240813-12:18:23 test_perplexity 480 model 1 1.1734378525824445 20240813-12:18:26 test_perplexity 480 model 0 1.1744610973031218 20240813-12:24:20 test_accuracy 480 model 0 val 1512 / 1587 20240813-12:24:21 test_accuracy 480 model 1 val 1470 / 1571 20240813-12:24:23 wrote gpt_000.pth 20240813-12:24:24 wrote gpt_001.pth 20240813-12:24:56 wrote non_validated_0480_00.png 20240813-12:25:29 wrote non_validated_0480_01.png 20240813-12:25:29 wrote state.pth 20240813-12:25:29 --- epoch 481 ---------------------------------------- 20240813-12:25:29 current_test_accuracies 0.9527 0.9357 0.9471 0.9522 0.9481 0.9533 0.9557 0.9464 0.9500 0.9536 20240813-12:25:29 training model 1 20240813-12:25:29 training model 7 20240813-12:29:13 train_perplexity 481 model 7 1.1616730956968369 20240813-12:29:13 train_perplexity 481 model 1 1.166206383525346 20240813-12:29:20 test_perplexity 481 model 7 1.177142911396532 20240813-12:29:20 test_perplexity 481 model 1 1.1736727615070388