20240813-17:55:37 argv ./main.py --result_dir=results_diverse 20240813-17:55:37 args.log_filename train.log 20240813-17:55:37 args.result_dir results_diverse 20240813-17:55:37 args.seed 0 20240813-17:55:37 args.resume False 20240813-17:55:37 args.max_percents_of_test_in_train -1 20240813-17:55:37 args.log_command None 20240813-17:55:37 args.nb_epochs 10000 20240813-17:55:37 args.batch_size 25 20240813-17:55:37 args.physical_batch_size None 20240813-17:55:37 args.inference_batch_size 25 20240813-17:55:37 args.nb_train_samples 40000 20240813-17:55:37 args.nb_test_samples 1000 20240813-17:55:37 args.nb_new_c_quizzes_for_train None 20240813-17:55:37 args.nb_new_c_quizzes_for_test None 20240813-17:55:37 args.learning_rate 0.0005 20240813-17:55:37 args.schedule_free False 20240813-17:55:37 args.model 37M 20240813-17:55:37 args.dim_model 512 20240813-17:55:37 args.dim_keys 64 20240813-17:55:37 args.dim_hidden 2048 20240813-17:55:37 args.nb_heads 8 20240813-17:55:37 args.nb_blocks 12 20240813-17:55:37 args.dropout 0.5 20240813-17:55:37 args.deterministic_synthesis False 20240813-17:55:37 args.problem grids 20240813-17:55:37 args.nb_threads 1 20240813-17:55:37 args.gpus all 20240813-17:55:37 args.nb_gpts 5 20240813-17:55:37 args.min_succeed_to_validate 2 20240813-17:55:37 args.max_fail_to_validate 3 20240813-17:55:37 args.accuracy_to_make_c_quizzes 0.95 20240813-17:55:37 args.proba_understands 0.95 20240813-17:55:37 args.proba_not_understands 0.5 20240813-17:55:37 args.temperature_hot 1.5 20240813-17:55:37 args.temperature_cold 1 20240813-17:55:37 args.prompt_noise 0.05 20240813-17:55:37 args.dirty_debug False 20240813-17:55:37 args.test None 20240813-17:55:37 args.grids_world_tasks replace_color,translate,grow,frame 20240813-17:55:37 args.sky_height 6 20240813-17:55:37 args.sky_width 8 20240813-17:55:37 args.sky_nb_birds 3 20240813-17:55:37 args.sky_nb_iterations 2 20240813-17:55:37 args.sky_speed 3 20240813-17:55:44 main_device cuda:0 gpus ['cuda:0', 'cuda:1'] 20240813-17:55:44 vocabulary_size 15 20240813-17:55:44 creating model 0 20240813-17:55:44 creating model 1 20240813-17:55:44 creating model 2 20240813-17:55:44 creating model 3 20240813-17:55:45 creating model 4 20240813-17:55:45 nb_parameters 37819407 (37M) 20240813-17:55:45 nb_new_c_quizzes_for_train 1000 nb_new_c_quizzes_for_test 25 20240813-17:55:45 wrote state.pth 20240813-17:55:45 --- epoch 0 ---------------------------------------- 20240813-17:55:45 current_test_accuracies 0.0000 0.0000 0.0000 0.0000 0.0000 20240813-17:55:45 training model 0 20240813-17:55:45 training model 1 20240813-17:59:45 train_perplexity 0 model 0 2.456348939715879 20240813-17:59:46 train_perplexity 0 model 1 2.264498831015902 20240813-17:59:47 test_perplexity 0 model 0 1.3213118632578524 20240813-17:59:49 test_perplexity 0 model 1 1.3210078963541778 20240813-18:01:14 test_accuracy 0 model 1 val 0 / 782 20240813-18:01:16 test_accuracy 0 model 0 val 0 / 771 20240813-18:01:18 wrote gpt_000.pth 20240813-18:01:18 wrote gpt_001.pth 20240813-18:01:36 wrote non_validated_0000_00.png 20240813-18:01:53 wrote non_validated_0000_01.png 20240813-18:01:53 wrote state.pth 20240813-18:01:53 --- epoch 1 ---------------------------------------- 20240813-18:01:53 current_test_accuracies 0.0000 0.0000 0.0000 0.0000 0.0000 20240813-18:01:53 training model 0 20240813-18:01:53 training model 1 20240813-18:05:34 train_perplexity 1 model 0 1.3205567277450176 20240813-18:05:34 train_perplexity 1 model 1 1.3170859191206985 20240813-18:05:38 test_perplexity 1 model 0 1.2519945498262497 20240813-18:05:38 test_perplexity 1 model 1 1.253774722714923 20240813-18:07:03 test_accuracy 1 model 0 val 49 / 793 20240813-18:07:04 test_accuracy 1 model 1 val 45 / 795 20240813-18:07:06 wrote gpt_000.pth 20240813-18:07:07 wrote gpt_001.pth 20240813-18:07:24 wrote non_validated_0001_00.png 20240813-18:07:41 wrote non_validated_0001_01.png 20240813-18:07:41 wrote state.pth 20240813-18:07:41 --- epoch 2 ---------------------------------------- 20240813-18:07:41 current_test_accuracies 0.0618 0.0566 0.0000 0.0000 0.0000 20240813-18:07:41 training model 2 20240813-18:07:41 training model 3 20240813-18:11:17 train_perplexity 2 model 3 2.4774138961068966 20240813-18:11:20 test_perplexity 2 model 3 1.320441588897324 20240813-18:11:20 train_perplexity 2 model 2 2.3928871121453863 20240813-18:11:23 test_perplexity 2 model 2 1.3163321436074558 20240813-18:12:41 test_accuracy 2 model 3 val 0 / 801 20240813-18:12:45 test_accuracy 2 model 2 val 0 / 784 20240813-18:12:47 wrote gpt_002.pth 20240813-18:12:47 wrote gpt_003.pth 20240813-18:13:04 wrote non_validated_0002_02.png 20240813-18:13:21 wrote non_validated_0002_03.png 20240813-18:13:21 wrote state.pth 20240813-18:13:21 --- epoch 3 ---------------------------------------- 20240813-18:13:21 current_test_accuracies 0.0618 0.0566 0.0000 0.0000 0.0000 20240813-18:13:21 training model 2 20240813-18:13:21 training model 3 20240813-18:17:02 train_perplexity 3 model 2 1.323899673836305 20240813-18:17:02 train_perplexity 3 model 3 1.3312904561943413 20240813-18:17:05 test_perplexity 3 model 2 1.253863217376971 20240813-18:17:05 test_perplexity 3 model 3 1.2678301536686418 20240813-18:18:28 test_accuracy 3 model 3 val 12 / 807 20240813-18:18:32 test_accuracy 3 model 2 val 24 / 773 20240813-18:18:34 wrote gpt_002.pth 20240813-18:18:35 wrote gpt_003.pth 20240813-18:18:52 wrote non_validated_0003_02.png 20240813-18:19:09 wrote non_validated_0003_03.png 20240813-18:19:09 wrote state.pth 20240813-18:19:09 --- epoch 4 ---------------------------------------- 20240813-18:19:09 current_test_accuracies 0.0618 0.0566 0.0310 0.0149 0.0000 20240813-18:19:09 training model 4 20240813-18:19:09 training model 3 20240813-18:22:46 train_perplexity 4 model 4 2.3638204645396 20240813-18:22:48 test_perplexity 4 model 4 1.3118238296494482 20240813-18:22:48 train_perplexity 4 model 3 1.2690044704681331 20240813-18:22:51 test_perplexity 4 model 3 1.2191567873078153 20240813-18:24:12 test_accuracy 4 model 4 val 1 / 790 20240813-18:24:15 test_accuracy 4 model 3 val 105 / 799 20240813-18:24:17 wrote gpt_004.pth 20240813-18:24:17 wrote gpt_003.pth 20240813-18:24:34 wrote non_validated_0004_04.png 20240813-18:24:52 wrote non_validated_0004_03.png 20240813-18:24:52 wrote state.pth 20240813-18:24:52 --- epoch 5 ---------------------------------------- 20240813-18:24:52 current_test_accuracies 0.0618 0.0566 0.0310 0.1314 0.0013 20240813-18:24:52 training model 4 20240813-18:24:52 training model 2 20240813-18:28:32 train_perplexity 5 model 2 1.26102347230824 20240813-18:28:33 train_perplexity 5 model 4 1.3140573435975014 20240813-18:28:36 test_perplexity 5 model 2 1.2099402439603033 20240813-18:28:36 test_perplexity 5 model 4 1.2401739810966392 20240813-18:30:00 test_accuracy 5 model 4 val 32 / 777 20240813-18:30:02 test_accuracy 5 model 2 val 89 / 788 20240813-18:30:04 wrote gpt_004.pth 20240813-18:30:04 wrote gpt_002.pth 20240813-18:30:21 wrote non_validated_0005_04.png 20240813-18:30:39 wrote non_validated_0005_02.png 20240813-18:30:39 wrote state.pth 20240813-18:30:39 --- epoch 6 ---------------------------------------- 20240813-18:30:39 current_test_accuracies 0.0618 0.0566 0.1129 0.1314 0.0412 20240813-18:30:39 training model 4 20240813-18:30:39 training model 1 20240813-18:34:19 train_perplexity 6 model 4 1.2519760447654837 20240813-18:34:19 train_perplexity 6 model 1 1.2546889653161772 20240813-18:34:23 test_perplexity 6 model 4 1.2061683688751321 20240813-18:34:23 test_perplexity 6 model 1 1.2166845583513657 20240813-18:35:47 test_accuracy 6 model 4 val 112 / 789 20240813-18:35:48 test_accuracy 6 model 1 val 118 / 788 20240813-18:35:50 wrote gpt_004.pth 20240813-18:35:51 wrote gpt_001.pth 20240813-18:36:08 wrote non_validated_0006_04.png 20240813-18:36:25 wrote non_validated_0006_01.png 20240813-18:36:25 wrote state.pth 20240813-18:36:25 --- epoch 7 ---------------------------------------- 20240813-18:36:25 current_test_accuracies 0.0618 0.1497 0.1129 0.1314 0.1420 20240813-18:36:25 training model 0 20240813-18:36:25 training model 2 20240813-18:40:06 train_perplexity 7 model 0 1.2570557586608426 20240813-18:40:06 train_perplexity 7 model 2 1.2241447345186336 20240813-18:40:10 test_perplexity 7 model 0 1.2184731024767708 20240813-18:40:10 test_perplexity 7 model 2 1.1932770286003367 20240813-18:41:32 test_accuracy 7 model 2 val 183 / 808 20240813-18:41:34 test_accuracy 7 model 0 val 92 / 793 20240813-18:41:36 wrote gpt_000.pth 20240813-18:41:37 wrote gpt_002.pth 20240813-18:41:54 wrote non_validated_0007_00.png 20240813-18:42:11 wrote non_validated_0007_02.png 20240813-18:42:11 wrote state.pth 20240813-18:42:11 --- epoch 8 ---------------------------------------- 20240813-18:42:11 current_test_accuracies 0.1160 0.1497 0.2265 0.1314 0.1420 20240813-18:42:11 training model 0 20240813-18:42:11 training model 3 20240813-18:45:52 train_perplexity 8 model 3 1.2296494069598187 20240813-18:45:52 train_perplexity 8 model 0 1.2256668176629568 20240813-18:45:56 test_perplexity 8 model 3 1.1954072603358992 20240813-18:45:56 test_perplexity 8 model 0 1.193875051253358 20240813-18:47:19 test_accuracy 8 model 0 val 171 / 808 20240813-18:47:21 test_accuracy 8 model 3 val 149 / 782 20240813-18:47:23 wrote gpt_000.pth 20240813-18:47:23 wrote gpt_003.pth 20240813-18:47:40 wrote non_validated_0008_00.png 20240813-18:47:58 wrote non_validated_0008_03.png 20240813-18:47:58 wrote state.pth 20240813-18:47:58 --- epoch 9 ---------------------------------------- 20240813-18:47:58 current_test_accuracies 0.2116 0.1497 0.2265 0.1905 0.1420 20240813-18:47:58 training model 4 20240813-18:47:58 training model 1 20240813-18:51:39 train_perplexity 9 model 4 1.2212723200468298 20240813-18:51:39 train_perplexity 9 model 1 1.2205669189275543 20240813-18:51:42 test_perplexity 9 model 4 1.184744670790694 20240813-18:51:42 test_perplexity 9 model 1 1.186103712887118 20240813-18:53:05 test_accuracy 9 model 1 val 205 / 806 20240813-18:53:07 test_accuracy 9 model 4 val 181 / 776 20240813-18:53:09 wrote gpt_004.pth 20240813-18:53:10 wrote gpt_001.pth 20240813-18:53:27 wrote non_validated_0009_04.png 20240813-18:53:44 wrote non_validated_0009_01.png 20240813-18:53:44 wrote state.pth 20240813-18:53:44 --- epoch 10 ---------------------------------------- 20240813-18:53:44 current_test_accuracies 0.2116 0.2543 0.2265 0.1905 0.2332 20240813-18:53:44 training model 3 20240813-18:53:44 training model 0 20240813-18:57:25 train_perplexity 10 model 3 1.205837225235751 20240813-18:57:25 train_perplexity 10 model 0 1.2042290372316604 20240813-18:57:29 test_perplexity 10 model 3 1.177463211315497 20240813-18:57:29 test_perplexity 10 model 0 1.1810540600329962 20240813-18:58:52 test_accuracy 10 model 0 val 312 / 822 20240813-18:58:54 test_accuracy 10 model 3 val 331 / 790 20240813-18:58:56 wrote gpt_003.pth 20240813-18:58:56 wrote gpt_000.pth 20240813-18:59:14 wrote non_validated_0010_03.png 20240813-18:59:31 wrote non_validated_0010_00.png 20240813-18:59:31 wrote state.pth 20240813-18:59:31 --- epoch 11 ---------------------------------------- 20240813-18:59:31 current_test_accuracies 0.3796 0.2543 0.2265 0.4190 0.2332 20240813-18:59:31 training model 2 20240813-18:59:31 training model 4 20240813-19:03:13 train_perplexity 11 model 4 1.2007405050117193 20240813-19:03:13 train_perplexity 11 model 2 1.2013827130713344 20240813-19:03:17 test_perplexity 11 model 4 1.1761116356918 20240813-19:03:17 test_perplexity 11 model 2 1.1771100579435865 20240813-19:04:38 test_accuracy 11 model 4 val 406 / 800 20240813-19:04:40 test_accuracy 11 model 2 val 331 / 811 20240813-19:04:42 wrote gpt_002.pth 20240813-19:04:43 wrote gpt_004.pth 20240813-19:05:00 wrote non_validated_0011_02.png 20240813-19:05:17 wrote non_validated_0011_04.png 20240813-19:05:17 wrote state.pth 20240813-19:05:17 --- epoch 12 ---------------------------------------- 20240813-19:05:17 current_test_accuracies 0.3796 0.2543 0.4081 0.4190 0.5075 20240813-19:05:17 training model 1 20240813-19:05:17 training model 0 20240813-19:08:58 train_perplexity 12 model 1 1.198526760548065 20240813-19:08:58 train_perplexity 12 model 0 1.188686118701608 20240813-19:09:02 test_perplexity 12 model 1 1.1720395242994295 20240813-19:09:02 test_perplexity 12 model 0 1.1665733927119804 20240813-19:10:24 test_accuracy 12 model 1 val 367 / 816 20240813-19:10:24 test_accuracy 12 model 0 val 410 / 805 20240813-19:10:27 wrote gpt_001.pth 20240813-19:10:27 wrote gpt_000.pth 20240813-19:10:44 wrote non_validated_0012_01.png 20240813-19:11:02 wrote non_validated_0012_00.png 20240813-19:11:02 wrote state.pth 20240813-19:11:02 --- epoch 13 ---------------------------------------- 20240813-19:11:02 current_test_accuracies 0.5093 0.4498 0.4081 0.4190 0.5075 20240813-19:11:02 training model 2 20240813-19:11:02 training model 3 20240813-19:14:44 train_perplexity 13 model 3 1.1906791178645781 20240813-19:14:44 train_perplexity 13 model 2 1.1894656297409008 20240813-19:14:47 test_perplexity 13 model 3 1.1719635420019408 20240813-19:14:47 test_perplexity 13 model 2 1.1663688574638538 20240813-19:16:10 test_accuracy 13 model 3 val 419 / 806 20240813-19:16:11 test_accuracy 13 model 2 val 452 / 822 20240813-19:16:13 wrote gpt_002.pth 20240813-19:16:14 wrote gpt_003.pth 20240813-19:16:31 wrote non_validated_0013_02.png 20240813-19:16:48 wrote non_validated_0013_03.png 20240813-19:16:48 wrote state.pth 20240813-19:16:48 --- epoch 14 ---------------------------------------- 20240813-19:16:48 current_test_accuracies 0.5093 0.4498 0.5499 0.5199 0.5075 20240813-19:16:48 training model 1 20240813-19:16:48 training model 4 20240813-19:20:29 train_perplexity 14 model 4 1.1868218054512252 20240813-19:20:29 train_perplexity 14 model 1 1.1856284817332852 20240813-19:20:33 test_perplexity 14 model 4 1.168717355626744 20240813-19:20:33 test_perplexity 14 model 1 1.1698466726607468 20240813-19:21:55 test_accuracy 14 model 4 val 485 / 806 20240813-19:21:58 test_accuracy 14 model 1 val 433 / 796 20240813-19:21:59 wrote gpt_001.pth 20240813-19:22:00 wrote gpt_004.pth 20240813-19:22:17 wrote non_validated_0014_01.png 20240813-19:22:34 wrote non_validated_0014_04.png 20240813-19:22:34 wrote state.pth 20240813-19:22:34 --- epoch 15 ---------------------------------------- 20240813-19:22:34 current_test_accuracies 0.5093 0.5440 0.5499 0.5199 0.6017 20240813-19:22:34 training model 0 20240813-19:22:34 training model 3 20240813-19:26:16 train_perplexity 15 model 3 1.1806800116074092 20240813-19:26:16 train_perplexity 15 model 0 1.1787010490484615 20240813-19:26:20 test_perplexity 15 model 3 1.163731297071441 20240813-19:26:20 test_perplexity 15 model 0 1.1645550411498005 20240813-19:27:41 test_accuracy 15 model 0 val 443 / 804 20240813-19:27:43 test_accuracy 15 model 3 val 486 / 803 20240813-19:27:45 wrote gpt_000.pth 20240813-19:27:45 wrote gpt_003.pth 20240813-19:28:03 wrote non_validated_0015_00.png 20240813-19:28:20 wrote non_validated_0015_03.png 20240813-19:28:20 wrote state.pth 20240813-19:28:20 --- epoch 16 ---------------------------------------- 20240813-19:28:20 current_test_accuracies 0.5510 0.5440 0.5499 0.6052 0.6017 20240813-19:28:20 training model 1 20240813-19:28:20 training model 2 20240813-19:32:01 train_perplexity 16 model 2 1.1781089108929264 20240813-19:32:01 train_perplexity 16 model 1 1.1756599173930866 20240813-19:32:05 test_perplexity 16 model 2 1.161140312880891 20240813-19:32:05 test_perplexity 16 model 1 1.1623407979029945 20240813-19:33:27 test_accuracy 16 model 2 val 519 / 802 20240813-19:33:31 test_accuracy 16 model 1 val 505 / 794 20240813-19:33:33 wrote gpt_001.pth 20240813-19:33:33 wrote gpt_002.pth 20240813-19:33:50 wrote non_validated_0016_01.png 20240813-19:34:08 wrote non_validated_0016_02.png 20240813-19:34:08 wrote state.pth 20240813-19:34:08 --- epoch 17 ---------------------------------------- 20240813-19:34:08 current_test_accuracies 0.5510 0.6360 0.6471 0.6052 0.6017 20240813-19:34:08 training model 0 20240813-19:34:08 training model 4 20240813-19:37:49 train_perplexity 17 model 0 1.1726533693297574 20240813-19:37:49 train_perplexity 17 model 4 1.1767941211194937 20240813-19:37:53 test_perplexity 17 model 0 1.16016543838579 20240813-19:37:53 test_perplexity 17 model 4 1.159800009125135 20240813-19:39:16 test_accuracy 17 model 0 val 557 / 811 20240813-19:39:18 test_accuracy 17 model 4 val 566 / 798 20240813-19:39:20 wrote gpt_000.pth 20240813-19:39:21 wrote gpt_004.pth 20240813-19:39:38 wrote non_validated_0017_00.png 20240813-19:39:55 wrote non_validated_0017_04.png 20240813-19:39:55 wrote state.pth 20240813-19:39:55 --- epoch 18 ---------------------------------------- 20240813-19:39:55 current_test_accuracies 0.6868 0.6360 0.6471 0.6052 0.7093 20240813-19:39:55 training model 3 20240813-19:39:55 training model 1 20240813-19:43:37 train_perplexity 18 model 1 1.1698945018015643 20240813-19:43:38 train_perplexity 18 model 3 1.1735635012702108 20240813-19:43:41 test_perplexity 18 model 1 1.1622919013309718 20240813-19:43:41 test_perplexity 18 model 3 1.157437710587858 20240813-19:45:03 test_accuracy 18 model 1 val 588 / 811 20240813-19:45:05 test_accuracy 18 model 3 val 550 / 795 20240813-19:45:07 wrote gpt_003.pth 20240813-19:45:08 wrote gpt_001.pth 20240813-19:45:25 wrote non_validated_0018_03.png 20240813-19:45:42 wrote non_validated_0018_01.png 20240813-19:45:42 wrote state.pth 20240813-19:45:42 --- epoch 19 ---------------------------------------- 20240813-19:45:42 current_test_accuracies 0.6868 0.7250 0.6471 0.6918 0.7093 20240813-19:45:42 training model 2 20240813-19:45:42 training model 0 20240813-19:49:24 train_perplexity 19 model 0 1.1673271326321988 20240813-19:49:25 train_perplexity 19 model 2 1.1717103997056135 20240813-19:49:27 test_perplexity 19 model 0 1.1583400882969337 20240813-19:49:28 test_perplexity 19 model 2 1.159168524850027 20240813-19:50:48 test_accuracy 19 model 0 val 570 / 800 20240813-19:50:52 test_accuracy 19 model 2 val 576 / 796 20240813-19:50:54 wrote gpt_002.pth 20240813-19:50:55 wrote gpt_000.pth 20240813-19:51:12 wrote non_validated_0019_02.png 20240813-19:51:29 wrote non_validated_0019_00.png 20240813-19:51:29 wrote state.pth 20240813-19:51:29 --- epoch 20 ---------------------------------------- 20240813-19:51:29 current_test_accuracies 0.7125 0.7250 0.7236 0.6918 0.7093 20240813-19:51:29 training model 3 20240813-19:51:29 training model 4 20240813-19:55:11 train_perplexity 20 model 4 1.1707680170151409 20240813-19:55:12 train_perplexity 20 model 3 1.1686004322182055 20240813-19:55:15 test_perplexity 20 model 4 1.1582237992484514 20240813-19:55:15 test_perplexity 20 model 3 1.1548701523875704 20240813-19:56:37 test_accuracy 20 model 4 val 623 / 809 20240813-19:56:37 test_accuracy 20 model 3 val 593 / 803 20240813-19:56:39 wrote gpt_003.pth 20240813-19:56:40 wrote gpt_004.pth 20240813-19:56:57 wrote non_validated_0020_03.png 20240813-19:57:14 wrote non_validated_0020_04.png 20240813-19:57:14 wrote state.pth 20240813-19:57:14 --- epoch 21 ---------------------------------------- 20240813-19:57:14 current_test_accuracies 0.7125 0.7250 0.7236 0.7385 0.7701 20240813-19:57:14 training model 0 20240813-19:57:14 training model 2 20240813-20:00:56 train_perplexity 21 model 2 1.166760556356079 20240813-20:00:58 train_perplexity 21 model 0 1.1630180893336273 20240813-20:00:59 test_perplexity 21 model 2 1.1576579301984296 20240813-20:01:01 test_perplexity 21 model 0 1.1556112965462844 20240813-20:02:23 test_accuracy 21 model 0 val 647 / 803 20240813-20:02:24 test_accuracy 21 model 2 val 646 / 824 20240813-20:02:26 wrote gpt_000.pth 20240813-20:02:26 wrote gpt_002.pth 20240813-20:02:44 wrote non_validated_0021_00.png 20240813-20:03:01 wrote non_validated_0021_02.png 20240813-20:03:01 wrote state.pth 20240813-20:03:01 --- epoch 22 ---------------------------------------- 20240813-20:03:01 current_test_accuracies 0.8057 0.7250 0.7840 0.7385 0.7701 20240813-20:03:01 training model 1 20240813-20:03:01 training model 3 20240813-20:06:43 train_perplexity 22 model 1 1.166193479602696 20240813-20:06:43 train_perplexity 22 model 3 1.1651050933925524 20240813-20:06:47 test_perplexity 22 model 1 1.1538091991999753 20240813-20:06:47 test_perplexity 22 model 3 1.156849909830185 20240813-20:08:07 test_accuracy 22 model 1 val 644 / 830 20240813-20:08:10 test_accuracy 22 model 3 val 589 / 813 20240813-20:08:11 wrote gpt_001.pth 20240813-20:08:12 wrote gpt_003.pth 20240813-20:08:29 wrote non_validated_0022_01.png 20240813-20:08:46 wrote non_validated_0022_03.png 20240813-20:08:46 wrote state.pth 20240813-20:08:46 --- epoch 23 ---------------------------------------- 20240813-20:08:46 current_test_accuracies 0.8057 0.7759 0.7840 0.7245 0.7701 20240813-20:08:46 training model 3 20240813-20:08:46 training model 4 20240813-20:12:29 train_perplexity 23 model 4 1.1669067258035672 20240813-20:12:30 train_perplexity 23 model 3 1.1614118265050708 20240813-20:12:33 test_perplexity 23 model 4 1.1539123615331144 20240813-20:12:33 test_perplexity 23 model 3 1.1531701132757954 20240813-20:13:59 test_accuracy 23 model 3 val 636 / 783 20240813-20:14:00 test_accuracy 23 model 4 val 660 / 799 20240813-20:14:02 wrote gpt_003.pth 20240813-20:14:03 wrote gpt_004.pth 20240813-20:14:20 wrote non_validated_0023_03.png 20240813-20:14:37 wrote non_validated_0023_04.png 20240813-20:14:37 wrote state.pth 20240813-20:14:37 --- epoch 24 ---------------------------------------- 20240813-20:14:37 current_test_accuracies 0.8057 0.7759 0.7840 0.8123 0.8260 20240813-20:14:37 training model 1 20240813-20:14:37 training model 2 20240813-20:18:20 train_perplexity 24 model 1 1.1627939560793394 20240813-20:18:20 train_perplexity 24 model 2 1.163723542176722 20240813-20:18:23 test_perplexity 24 model 1 1.15552496367645 20240813-20:18:23 test_perplexity 24 model 2 1.1528825079360019 20240813-20:19:46 test_accuracy 24 model 1 val 621 / 801 20240813-20:19:48 test_accuracy 24 model 2 val 654 / 791 20240813-20:19:50 wrote gpt_001.pth 20240813-20:19:51 wrote gpt_002.pth 20240813-20:20:08 wrote non_validated_0024_01.png 20240813-20:20:25 wrote non_validated_0024_02.png 20240813-20:20:25 wrote state.pth 20240813-20:20:25 --- epoch 25 ---------------------------------------- 20240813-20:20:25 current_test_accuracies 0.8057 0.7753 0.8268 0.8123 0.8260 20240813-20:20:25 training model 1 20240813-20:20:25 training model 0 20240813-20:24:07 train_perplexity 25 model 1 1.1603461840932094 20240813-20:24:07 train_perplexity 25 model 0 1.1606645147212349 20240813-20:24:11 test_perplexity 25 model 1 1.1546604565968241 20240813-20:24:11 test_perplexity 25 model 0 1.152765478472963 20240813-20:25:34 test_accuracy 25 model 1 val 627 / 802 20240813-20:25:36 test_accuracy 25 model 0 val 692 / 797 20240813-20:25:38 wrote gpt_001.pth 20240813-20:25:39 wrote gpt_000.pth 20240813-20:25:56 wrote non_validated_0025_01.png 20240813-20:26:13 wrote non_validated_0025_00.png 20240813-20:26:13 wrote state.pth 20240813-20:26:13 --- epoch 26 ---------------------------------------- 20240813-20:26:13 current_test_accuracies 0.8683 0.7818 0.8268 0.8123 0.8260 20240813-20:26:13 training model 1 20240813-20:26:13 training model 3 20240813-20:29:56 train_perplexity 26 model 3 1.1596550110412103 20240813-20:29:56 train_perplexity 26 model 1 1.1582055751045093 20240813-20:29:59 test_perplexity 26 model 3 1.1569607740444923 20240813-20:30:00 test_perplexity 26 model 1 1.1505639273702124 20240813-20:31:25 test_accuracy 26 model 3 val 420 / 788 20240813-20:31:26 test_accuracy 26 model 1 val 664 / 799 20240813-20:31:28 wrote gpt_001.pth 20240813-20:31:28 wrote gpt_003.pth 20240813-20:31:46 wrote non_validated_0026_01.png 20240813-20:32:03 wrote non_validated_0026_03.png 20240813-20:32:03 wrote state.pth 20240813-20:32:03 --- epoch 27 ---------------------------------------- 20240813-20:32:03 current_test_accuracies 0.8683 0.8310 0.8268 0.5330 0.8260 20240813-20:32:03 training model 3 20240813-20:32:03 training model 4 20240813-20:35:45 train_perplexity 27 model 3 1.1579768589659447 20240813-20:35:45 train_perplexity 27 model 4 1.1629953645236322 20240813-20:35:48 test_perplexity 27 model 3 1.1522432062276529 20240813-20:35:49 test_perplexity 27 model 4 1.1535393653296981 20240813-20:37:13 test_accuracy 27 model 4 val 674 / 790 20240813-20:37:15 test_accuracy 27 model 3 val 710 / 794 20240813-20:37:17 wrote gpt_003.pth 20240813-20:37:17 wrote gpt_004.pth 20240813-20:37:34 wrote non_validated_0027_03.png 20240813-20:37:52 wrote non_validated_0027_04.png 20240813-20:37:52 wrote state.pth 20240813-20:37:52 --- epoch 28 ---------------------------------------- 20240813-20:37:52 current_test_accuracies 0.8683 0.8310 0.8268 0.8942 0.8532 20240813-20:37:52 training model 2 20240813-20:37:52 training model 1 20240813-20:41:34 train_perplexity 28 model 1 1.1563531191848604 20240813-20:41:35 train_perplexity 28 model 2 1.1605703936308267 20240813-20:41:38 test_perplexity 28 model 1 1.1491652373564845 20240813-20:41:38 test_perplexity 28 model 2 1.150982862013966 20240813-20:43:01 test_accuracy 28 model 2 val 716 / 810 20240813-20:43:03 test_accuracy 28 model 1 val 721 / 798 20240813-20:43:05 wrote gpt_002.pth 20240813-20:43:05 wrote gpt_001.pth 20240813-20:43:22 wrote non_validated_0028_02.png 20240813-20:43:40 wrote non_validated_0028_01.png 20240813-20:43:40 wrote state.pth 20240813-20:43:40 --- epoch 29 ---------------------------------------- 20240813-20:43:40 current_test_accuracies 0.8683 0.9035 0.8840 0.8942 0.8532 20240813-20:43:40 training model 4 20240813-20:43:40 training model 0 20240813-20:47:22 train_perplexity 29 model 4 1.1607840436340127 20240813-20:47:22 train_perplexity 29 model 0 1.1585852292388377 20240813-20:47:26 test_perplexity 29 model 4 1.1507526065424065 20240813-20:47:26 test_perplexity 29 model 0 1.1541430321181745 20240813-20:48:48 test_accuracy 29 model 0 val 697 / 811 20240813-20:48:49 test_accuracy 29 model 4 val 702 / 801 20240813-20:48:51 wrote gpt_004.pth 20240813-20:48:52 wrote gpt_000.pth 20240813-20:49:09 wrote non_validated_0029_04.png 20240813-20:49:26 wrote non_validated_0029_00.png 20240813-20:49:26 wrote state.pth 20240813-20:49:26 --- epoch 30 ---------------------------------------- 20240813-20:49:26 current_test_accuracies 0.8594 0.9035 0.8840 0.8942 0.8764 20240813-20:49:26 training model 0 20240813-20:49:26 training model 4 20240813-20:53:09 train_perplexity 30 model 0 1.1570084350076106 20240813-20:53:09 train_perplexity 30 model 4 1.159583853713106 20240813-20:53:12 test_perplexity 30 model 0 1.1491685598252943 20240813-20:53:12 test_perplexity 30 model 4 1.151610399192819 20240813-20:54:36 test_accuracy 30 model 0 val 705 / 782 20240813-20:54:39 test_accuracy 30 model 4 val 722 / 796 20240813-20:54:41 wrote gpt_000.pth 20240813-20:54:41 wrote gpt_004.pth 20240813-20:54:59 wrote non_validated_0030_00.png 20240813-20:55:16 wrote non_validated_0030_04.png 20240813-20:55:16 wrote state.pth 20240813-20:55:16 --- epoch 31 ---------------------------------------- 20240813-20:55:16 current_test_accuracies 0.9015 0.9035 0.8840 0.8942 0.9070 20240813-20:55:16 training model 2 20240813-20:55:16 training model 3 20240813-20:58:58 train_perplexity 31 model 3 1.1563138445454386 20240813-20:58:59 train_perplexity 31 model 2 1.1592000307631793 20240813-20:59:02 test_perplexity 31 model 3 1.1499462975514683 20240813-20:59:02 test_perplexity 31 model 2 1.1545354048660794 20240813-21:00:28 test_accuracy 31 model 3 val 698 / 797 20240813-21:00:28 test_accuracy 31 model 2 val 679 / 797 20240813-21:00:30 wrote gpt_002.pth 20240813-21:00:31 wrote gpt_003.pth 20240813-21:00:48 wrote non_validated_0031_02.png 20240813-21:01:06 wrote non_validated_0031_03.png 20240813-21:01:06 wrote state.pth 20240813-21:01:06 --- epoch 32 ---------------------------------------- 20240813-21:01:06 current_test_accuracies 0.9015 0.9035 0.8519 0.8758 0.9070 20240813-21:01:06 training model 2 20240813-21:01:06 training model 3 20240813-21:04:48 train_perplexity 32 model 2 1.15762284580516 20240813-21:04:48 train_perplexity 32 model 3 1.1555215479506007 20240813-21:04:51 test_perplexity 32 model 2 1.147277597016581 20240813-21:04:51 test_perplexity 32 model 3 1.1490841975213282 20240813-21:06:14 test_accuracy 32 model 3 val 707 / 785 20240813-21:06:15 test_accuracy 32 model 2 val 732 / 797 20240813-21:06:17 wrote gpt_002.pth 20240813-21:06:18 wrote gpt_003.pth 20240813-21:06:35 wrote non_validated_0032_02.png 20240813-21:06:52 wrote non_validated_0032_03.png 20240813-21:06:52 wrote state.pth 20240813-21:06:52 --- epoch 33 ---------------------------------------- 20240813-21:06:52 current_test_accuracies 0.9015 0.9035 0.9184 0.9006 0.9070 20240813-21:06:52 training model 3 20240813-21:06:52 training model 0 20240813-21:10:35 train_perplexity 33 model 0 1.155370563365688 20240813-21:10:35 train_perplexity 33 model 3 1.1544686337186034 20240813-21:10:39 test_perplexity 33 model 0 1.1503295080985612 20240813-21:10:39 test_perplexity 33 model 3 1.1478890009076763 20240813-21:12:00 test_accuracy 33 model 3 val 726 / 804 20240813-21:12:03 test_accuracy 33 model 0 val 734 / 788 20240813-21:12:05 wrote gpt_003.pth 20240813-21:12:05 wrote gpt_000.pth 20240813-21:12:23 wrote non_validated_0033_03.png 20240813-21:12:40 wrote non_validated_0033_00.png 20240813-21:12:40 wrote state.pth 20240813-21:12:40 --- epoch 34 ---------------------------------------- 20240813-21:12:40 current_test_accuracies 0.9315 0.9035 0.9184 0.9030 0.9070 20240813-21:12:40 training model 3 20240813-21:12:40 training model 1 20240813-21:16:22 train_perplexity 34 model 1 1.1557254975154745 20240813-21:16:22 train_perplexity 34 model 3 1.1536851183438355 20240813-21:16:26 test_perplexity 34 model 1 1.1493875582860509 20240813-21:16:26 test_perplexity 34 model 3 1.1488970110542507 20240813-21:17:48 test_accuracy 34 model 1 val 745 / 813 20240813-21:17:49 test_accuracy 34 model 3 val 760 / 808 20240813-21:17:51 wrote gpt_003.pth 20240813-21:17:52 wrote gpt_001.pth 20240813-21:18:09 wrote non_validated_0034_03.png 20240813-21:18:26 wrote non_validated_0034_01.png 20240813-21:18:26 wrote state.pth 20240813-21:18:26 --- epoch 35 ---------------------------------------- 20240813-21:18:26 current_test_accuracies 0.9315 0.9164 0.9184 0.9406 0.9070 20240813-21:18:26 training model 4 20240813-21:18:26 training model 1 20240813-21:22:09 train_perplexity 35 model 4 1.1576788954931352 20240813-21:22:09 train_perplexity 35 model 1 1.1543650584409257 20240813-21:22:12 test_perplexity 35 model 4 1.1478713406835468 20240813-21:22:12 test_perplexity 35 model 1 1.1479983496963357 20240813-21:23:36 test_accuracy 35 model 1 val 741 / 812 20240813-21:23:39 test_accuracy 35 model 4 val 736 / 795 20240813-21:23:41 wrote gpt_004.pth 20240813-21:23:41 wrote gpt_001.pth 20240813-21:23:59 wrote non_validated_0035_04.png 20240813-21:24:16 wrote non_validated_0035_01.png 20240813-21:24:16 wrote state.pth 20240813-21:24:16 --- epoch 36 ---------------------------------------- 20240813-21:24:16 current_test_accuracies 0.9315 0.9126 0.9184 0.9406 0.9258 20240813-21:24:16 training model 1 20240813-21:24:16 training model 2 20240813-21:27:58 train_perplexity 36 model 1 1.1537719723078597 20240813-21:27:58 train_perplexity 36 model 2 1.1559548590999291 20240813-21:28:02 test_perplexity 36 model 1 1.1507376736953052 20240813-21:28:02 test_perplexity 36 model 2 1.1509989735440427 20240813-21:29:26 test_accuracy 36 model 2 val 747 / 822 20240813-21:29:28 test_accuracy 36 model 1 val 725 / 788 20240813-21:29:30 wrote gpt_001.pth 20240813-21:29:30 wrote gpt_002.pth 20240813-21:29:47 wrote non_validated_0036_01.png 20240813-21:30:05 wrote non_validated_0036_02.png 20240813-21:30:05 wrote state.pth 20240813-21:30:05 --- epoch 37 ---------------------------------------- 20240813-21:30:05 current_test_accuracies 0.9315 0.9201 0.9088 0.9406 0.9258 20240813-21:30:05 training model 2 20240813-21:30:05 training model 1 20240813-21:33:47 train_perplexity 37 model 1 1.1534886546682654 20240813-21:33:48 train_perplexity 37 model 2 1.154587898864231 20240813-21:33:50 test_perplexity 37 model 1 1.1463078423284088 20240813-21:33:51 test_perplexity 37 model 2 1.1485834282111391 20240813-21:35:12 test_accuracy 37 model 2 val 730 / 808 20240813-21:35:13 test_accuracy 37 model 1 val 752 / 805 20240813-21:35:15 wrote gpt_002.pth 20240813-21:35:16 wrote gpt_001.pth 20240813-21:35:33 wrote non_validated_0037_02.png 20240813-21:35:50 wrote non_validated_0037_01.png 20240813-21:35:50 wrote state.pth 20240813-21:35:50 --- epoch 38 ---------------------------------------- 20240813-21:35:50 current_test_accuracies 0.9315 0.9342 0.9035 0.9406 0.9258 20240813-21:35:50 training model 2 20240813-21:35:50 training model 4 20240813-21:39:33 train_perplexity 38 model 2 1.154625110945838 20240813-21:39:33 train_perplexity 38 model 4 1.155846098521991 20240813-21:39:36 test_perplexity 38 model 2 1.1498630058027013 20240813-21:39:36 test_perplexity 38 model 4 1.1493636206912707 20240813-21:40:59 test_accuracy 38 model 2 val 759 / 808 20240813-21:41:02 test_accuracy 38 model 4 val 717 / 771 20240813-21:41:04 wrote gpt_002.pth 20240813-21:41:05 wrote gpt_004.pth 20240813-21:41:22 wrote non_validated_0038_02.png 20240813-21:41:39 wrote non_validated_0038_04.png 20240813-21:41:39 wrote state.pth 20240813-21:41:39 --- epoch 39 ---------------------------------------- 20240813-21:41:39 current_test_accuracies 0.9315 0.9342 0.9394 0.9406 0.9300 20240813-21:41:39 training model 4 20240813-21:41:39 training model 0 20240813-21:45:21 train_perplexity 39 model 4 1.1558104340403554 20240813-21:45:21 train_perplexity 39 model 0 1.1544275637387142 20240813-21:45:25 test_perplexity 39 model 4 1.1491623018962864 20240813-21:45:25 test_perplexity 39 model 0 1.1468213038466306 20240813-21:46:50 test_accuracy 39 model 4 val 721 / 787 20240813-21:46:53 test_accuracy 39 model 0 val 703 / 773 20240813-21:46:55 wrote gpt_004.pth 20240813-21:46:56 wrote gpt_000.pth 20240813-21:47:13 wrote non_validated_0039_04.png 20240813-21:47:30 wrote non_validated_0039_00.png 20240813-21:47:30 wrote state.pth 20240813-21:47:30 --- epoch 40 ---------------------------------------- 20240813-21:47:30 current_test_accuracies 0.9094 0.9342 0.9394 0.9406 0.9161 20240813-21:47:30 training model 0 20240813-21:47:30 training model 4 20240813-21:51:12 train_perplexity 40 model 0 1.1537562138871578 20240813-21:51:12 train_perplexity 40 model 4 1.1540323793671354 20240813-21:51:16 test_perplexity 40 model 0 1.1504095866422481 20240813-21:51:16 test_perplexity 40 model 4 1.149743684915439 20240813-21:52:38 test_accuracy 40 model 4 val 752 / 810 20240813-21:52:41 test_accuracy 40 model 0 val 739 / 799 20240813-21:52:43 wrote gpt_000.pth 20240813-21:52:44 wrote gpt_004.pth 20240813-21:53:01 wrote non_validated_0040_00.png 20240813-21:53:18 wrote non_validated_0040_04.png 20240813-21:53:18 wrote state.pth 20240813-21:53:18 --- epoch 41 ---------------------------------------- 20240813-21:53:18 current_test_accuracies 0.9249 0.9342 0.9394 0.9406 0.9284 20240813-21:53:18 training model 0 20240813-21:53:18 training model 4 20240813-21:57:01 train_perplexity 41 model 0 1.1526705263042643 20240813-21:57:01 train_perplexity 41 model 4 1.1535288885280055 20240813-21:57:04 test_perplexity 41 model 0 1.14849723103376 20240813-21:57:04 test_perplexity 41 model 4 1.1488331998987844 20240813-21:58:28 test_accuracy 41 model 4 val 736 / 800 20240813-21:58:29 test_accuracy 41 model 0 val 746 / 793 20240813-21:58:31 wrote gpt_000.pth 20240813-21:58:32 wrote gpt_004.pth 20240813-21:58:49 wrote non_validated_0041_00.png 20240813-21:59:06 wrote non_validated_0041_04.png 20240813-21:59:06 wrote state.pth 20240813-21:59:06 --- epoch 42 ---------------------------------------- 20240813-21:59:06 current_test_accuracies 0.9407 0.9342 0.9394 0.9406 0.9200 20240813-21:59:06 training model 4 20240813-21:59:06 training model 1 20240813-22:02:49 train_perplexity 42 model 4 1.1529352299768825 20240813-22:02:49 train_perplexity 42 model 1 1.152011497484758 20240813-22:02:52 test_perplexity 42 model 4 1.1479615439227595 20240813-22:02:53 test_perplexity 42 model 1 1.1489633652070164 20240813-22:04:15 test_accuracy 42 model 1 val 713 / 800 20240813-22:04:17 test_accuracy 42 model 4 val 742 / 788 20240813-22:04:19 wrote gpt_004.pth 20240813-22:04:19 wrote gpt_001.pth 20240813-22:04:37 wrote non_validated_0042_04.png 20240813-22:04:54 wrote non_validated_0042_01.png 20240813-22:04:54 wrote state.pth 20240813-22:04:54 --- epoch 43 ---------------------------------------- 20240813-22:04:54 current_test_accuracies 0.9407 0.8913 0.9394 0.9406 0.9416 20240813-22:04:54 training model 1 20240813-22:04:54 training model 2 20240813-22:08:36 train_perplexity 43 model 1 1.1516102966759265 20240813-22:08:36 train_perplexity 43 model 2 1.154001696257472 20240813-22:08:40 test_perplexity 43 model 1 1.1480106446374663 20240813-22:08:40 test_perplexity 43 model 2 1.1480974278121236 20240813-22:10:02 test_accuracy 43 model 1 val 741 / 805 20240813-22:10:05 test_accuracy 43 model 2 val 731 / 792 20240813-22:10:07 wrote gpt_001.pth 20240813-22:10:07 wrote gpt_002.pth 20240813-22:10:25 wrote non_validated_0043_01.png 20240813-22:10:42 wrote non_validated_0043_02.png 20240813-22:10:42 wrote state.pth 20240813-22:10:42 --- epoch 44 ---------------------------------------- 20240813-22:10:42 current_test_accuracies 0.9407 0.9205 0.9230 0.9406 0.9416 20240813-22:10:42 training model 1 20240813-22:10:42 training model 2 20240813-22:14:25 train_perplexity 44 model 1 1.1512066356465567 20240813-22:14:25 train_perplexity 44 model 2 1.1525943284817985 20240813-22:14:28 test_perplexity 44 model 1 1.1453256475187796 20240813-22:14:28 test_perplexity 44 model 2 1.147046672669756 20240813-22:15:54 test_accuracy 44 model 1 val 737 / 786 20240813-22:15:56 test_accuracy 44 model 2 val 714 / 769 20240813-22:15:58 wrote gpt_001.pth 20240813-22:15:59 wrote gpt_002.pth 20240813-22:16:16 wrote non_validated_0044_01.png 20240813-22:16:33 wrote non_validated_0044_02.png 20240813-22:16:33 wrote state.pth 20240813-22:16:33 --- epoch 45 ---------------------------------------- 20240813-22:16:33 current_test_accuracies 0.9407 0.9377 0.9285 0.9406 0.9416 20240813-22:16:33 training model 2 20240813-22:16:33 training model 1 20240813-22:20:15 train_perplexity 45 model 1 1.1514442391526611 20240813-22:20:16 train_perplexity 45 model 2 1.1526435658505432 20240813-22:20:19 test_perplexity 45 model 1 1.1470841219293695 20240813-22:20:19 test_perplexity 45 model 2 1.147922056348536 20240813-22:21:42 test_accuracy 45 model 2 val 747 / 804 20240813-22:21:44 test_accuracy 45 model 1 val 750 / 787 20240813-22:21:46 wrote gpt_002.pth 20240813-22:21:46 wrote gpt_001.pth 20240813-22:22:04 wrote non_validated_0045_02.png 20240813-22:22:21 wrote non_validated_0045_01.png 20240813-22:22:21 wrote state.pth 20240813-22:22:21 --- epoch 46 ---------------------------------------- 20240813-22:22:21 current_test_accuracies 0.9407 0.9530 0.9291 0.9406 0.9416 20240813-22:22:21 training model 2 20240813-22:22:21 training model 3 20240813-22:26:04 train_perplexity 46 model 2 1.1519542220594226 20240813-22:26:04 train_perplexity 46 model 3 1.1529522450219474 20240813-22:26:07 test_perplexity 46 model 2 1.1458871782750146 20240813-22:26:07 test_perplexity 46 model 3 1.146871204935671 20240813-22:27:29 test_accuracy 46 model 3 val 746 / 805 20240813-22:27:31 test_accuracy 46 model 2 val 749 / 787 20240813-22:27:33 wrote gpt_002.pth 20240813-22:27:33 wrote gpt_003.pth 20240813-22:27:51 wrote non_validated_0046_02.png 20240813-22:28:08 wrote non_validated_0046_03.png 20240813-22:28:08 wrote state.pth 20240813-22:28:08 --- epoch 47 ---------------------------------------- 20240813-22:28:08 current_test_accuracies 0.9407 0.9530 0.9517 0.9267 0.9416 20240813-22:28:08 training model 3 20240813-22:28:08 training model 0 20240813-22:31:51 train_perplexity 47 model 0 1.1529098281755459 20240813-22:31:51 train_perplexity 47 model 3 1.1527001417937164 20240813-22:31:55 test_perplexity 47 model 0 1.1468547181810962 20240813-22:31:55 test_perplexity 47 model 3 1.1459635933770824 20240813-22:33:19 test_accuracy 47 model 0 val 736 / 786 20240813-22:33:21 test_accuracy 47 model 3 val 718 / 786 20240813-22:33:22 wrote gpt_003.pth 20240813-22:33:23 wrote gpt_000.pth 20240813-22:33:40 wrote non_validated_0047_03.png 20240813-22:33:57 wrote non_validated_0047_00.png 20240813-22:33:57 wrote state.pth 20240813-22:33:57 --- epoch 48 ---------------------------------------- 20240813-22:33:57 current_test_accuracies 0.9364 0.9530 0.9517 0.9135 0.9416 20240813-22:33:57 training model 3 20240813-22:33:57 training model 0 20240813-22:37:40 train_perplexity 48 model 0 1.1516593536631003 20240813-22:37:41 train_perplexity 48 model 3 1.1520474580462399 20240813-22:37:44 test_perplexity 48 model 0 1.1496429199412561 20240813-22:37:44 test_perplexity 48 model 3 1.1488547102731825 20240813-22:39:06 test_accuracy 48 model 0 val 766 / 810 20240813-22:39:09 test_accuracy 48 model 3 val 738 / 797 20240813-22:39:11 wrote gpt_003.pth 20240813-22:39:12 wrote gpt_000.pth 20240813-22:39:29 wrote non_validated_0048_03.png 20240813-22:39:46 wrote non_validated_0048_00.png 20240813-22:39:46 wrote state.pth 20240813-22:39:46 --- epoch 49 ---------------------------------------- 20240813-22:39:46 current_test_accuracies 0.9457 0.9530 0.9517 0.9260 0.9416 20240813-22:39:46 training model 3 20240813-22:39:46 training model 4 20240813-22:43:29 train_perplexity 49 model 3 1.1516660852754614 20240813-22:43:29 train_perplexity 49 model 4 1.1521962699752293 20240813-22:43:33 test_perplexity 49 model 3 1.1475721906481453 20240813-22:43:33 test_perplexity 49 model 4 1.148522242905035 20240813-22:44:55 test_accuracy 49 model 4 val 752 / 800 20240813-22:44:57 test_accuracy 49 model 3 val 777 / 816 20240813-22:44:59 wrote gpt_003.pth 20240813-22:44:59 wrote gpt_004.pth 20240813-22:45:16 wrote non_validated_0049_03.png 20240813-22:45:34 wrote non_validated_0049_04.png 20240813-22:45:34 wrote state.pth 20240813-22:45:34 --- epoch 50 ---------------------------------------- 20240813-22:45:34 current_test_accuracies 0.9457 0.9530 0.9517 0.9522 0.9400 20240813-22:45:34 training model 4 20240813-22:45:34 training model 0 20240813-22:49:16 train_perplexity 50 model 0 1.1514826044151338 20240813-22:49:17 train_perplexity 50 model 4 1.1518966628704204 20240813-22:49:20 test_perplexity 50 model 0 1.146338086054843 20240813-22:49:20 test_perplexity 50 model 4 1.1490381409797408 20240813-22:50:42 test_accuracy 50 model 4 val 755 / 802 20240813-22:50:42 test_accuracy 50 model 0 val 781 / 815 20240813-22:50:45 wrote gpt_004.pth 20240813-22:50:45 wrote gpt_000.pth 20240813-22:51:02 wrote non_validated_0050_04.png 20240813-22:51:20 wrote non_validated_0050_00.png 20240813-22:51:20 wrote state.pth 20240813-22:51:20 --- epoch 51 ---------------------------------------- 20240813-22:51:20 current_test_accuracies 0.9583 0.9530 0.9517 0.9522 0.9414 20240813-22:51:20 training model 4 20240813-22:51:20 training model 2 20240813-22:55:01 train_perplexity 51 model 2 1.151493339426471 20240813-22:55:05 test_perplexity 51 model 2 1.1501763089921953 20240813-22:55:05 train_perplexity 51 model 4 1.1519487011364553 20240813-22:55:07 test_perplexity 51 model 4 1.1480274332674274 20240813-22:56:30 test_accuracy 51 model 4 val 760 / 805 20240813-22:56:30 test_accuracy 51 model 2 val 761 / 810 20240813-22:56:32 wrote gpt_004.pth 20240813-22:56:33 wrote gpt_002.pth 20240813-22:56:50 wrote non_validated_0051_04.png 20240813-22:57:08 wrote non_validated_0051_02.png 20240813-22:57:08 wrote state.pth 20240813-22:57:08 --- epoch 52 ---------------------------------------- 20240813-22:57:08 current_test_accuracies 0.9583 0.9530 0.9395 0.9522 0.9441 20240813-22:57:08 training model 2 20240813-22:57:08 training model 4 20240813-23:00:51 train_perplexity 52 model 4 1.1510219774581425 20240813-23:00:51 train_perplexity 52 model 2 1.1511799343472335 20240813-23:00:54 test_perplexity 52 model 4 1.1478733697203987 20240813-23:00:55 test_perplexity 52 model 2 1.1457737321393848 20240813-23:02:20 test_accuracy 52 model 4 val 758 / 795 20240813-23:02:20 test_accuracy 52 model 2 val 744 / 792 20240813-23:02:23 wrote gpt_002.pth 20240813-23:02:23 wrote gpt_004.pth 20240813-23:02:41 wrote non_validated_0052_02.png 20240813-23:02:58 wrote non_validated_0052_04.png 20240813-23:02:58 wrote state.pth 20240813-23:02:58 --- epoch 53 ---------------------------------------- 20240813-23:02:58 current_test_accuracies 0.9583 0.9530 0.9394 0.9522 0.9535 20240813-23:02:58 training model 2 20240813-23:02:58 training model 3 20240813-23:05:03 argv ./main.py --result_dir=results_diverse --resume --seed=1234 20240813-23:05:03 args.log_filename train.log 20240813-23:05:03 args.result_dir results_diverse 20240813-23:05:03 args.seed 1234 20240813-23:05:03 args.resume True 20240813-23:05:03 args.max_percents_of_test_in_train -1 20240813-23:05:03 args.log_command None 20240813-23:05:03 args.nb_epochs 10000 20240813-23:05:03 args.batch_size 25 20240813-23:05:03 args.physical_batch_size None 20240813-23:05:03 args.inference_batch_size 25 20240813-23:05:03 args.nb_train_samples 40000 20240813-23:05:03 args.nb_test_samples 1000 20240813-23:05:03 args.nb_new_c_quizzes_for_train None 20240813-23:05:03 args.nb_new_c_quizzes_for_test None 20240813-23:05:03 args.learning_rate 0.0005 20240813-23:05:03 args.schedule_free False 20240813-23:05:03 args.model 37M 20240813-23:05:03 args.dim_model 512 20240813-23:05:03 args.dim_keys 64 20240813-23:05:03 args.dim_hidden 2048 20240813-23:05:03 args.nb_heads 8 20240813-23:05:03 args.nb_blocks 12 20240813-23:05:03 args.dropout 0.5 20240813-23:05:03 args.deterministic_synthesis False 20240813-23:05:03 args.problem grids 20240813-23:05:03 args.nb_threads 1 20240813-23:05:03 args.gpus all 20240813-23:05:03 args.nb_gpts 5 20240813-23:05:03 args.min_succeed_to_validate 2 20240813-23:05:03 args.max_fail_to_validate 3 20240813-23:05:03 args.accuracy_to_make_c_quizzes 0.95 20240813-23:05:03 args.proba_understands 0.95 20240813-23:05:03 args.proba_not_understands 0.5 20240813-23:05:03 args.temperature_hot 1.5 20240813-23:05:03 args.temperature_cold 1 20240813-23:05:03 args.prompt_noise 0.05 20240813-23:05:03 args.dirty_debug False 20240813-23:05:03 args.test None 20240813-23:05:03 args.grids_world_tasks replace_color,translate,grow,frame 20240813-23:05:03 args.sky_height 6 20240813-23:05:03 args.sky_width 8 20240813-23:05:03 args.sky_nb_birds 3 20240813-23:05:03 args.sky_nb_iterations 2 20240813-23:05:03 args.sky_speed 3 20240813-23:05:03 main_device cuda:0 gpus ['cuda:0', 'cuda:1'] 20240813-23:05:03 vocabulary_size 15 20240813-23:05:03 creating model 0 20240813-23:05:03 creating model 1 20240813-23:05:03 creating model 2 20240813-23:05:04 creating model 3 20240813-23:05:04 creating model 4 20240813-23:05:04 successfully loaded gpt_000.pth 20240813-23:05:05 successfully loaded gpt_001.pth 20240813-23:05:05 successfully loaded gpt_002.pth 20240813-23:05:05 successfully loaded gpt_003.pth 20240813-23:05:05 successfully loaded gpt_004.pth 20240813-23:05:05 successfully loaded state.pth 20240813-23:05:05 nb_parameters 37819407 (37M) 20240813-23:05:05 nb_new_c_quizzes_for_train 1000 nb_new_c_quizzes_for_test 25 20240813-23:05:05 wrote state.pth 20240813-23:05:05 --- epoch 53 ---------------------------------------- 20240813-23:05:05 current_test_accuracies 0.9583 0.9530 0.9394 0.9522 0.9535 20240813-23:05:05 training model 2 20240813-23:05:05 training model 3 20240813-23:09:15 train_perplexity 53 model 2 1.1508375171409488 20240813-23:09:16 train_perplexity 53 model 3 1.151245159501894 20240813-23:09:17 test_perplexity 53 model 2 1.1499765678726999 20240813-23:09:18 test_perplexity 53 model 3 1.1473370894824175 20240813-23:10:39 test_accuracy 53 model 2 val 759 / 803 20240813-23:10:40 test_accuracy 53 model 3 val 762 / 806 20240813-23:10:42 wrote gpt_002.pth 20240813-23:10:43 wrote gpt_003.pth 20240813-23:10:58 wrote non_validated_0053_02.png 20240813-23:11:12 wrote non_validated_0053_03.png 20240813-23:11:12 wrote state.pth 20240813-23:11:12 --- epoch 54 ---------------------------------------- 20240813-23:11:12 current_test_accuracies 0.9583 0.9530 0.9452 0.9454 0.9535 20240813-23:11:12 training model 2 20240813-23:11:12 training model 3 20240813-23:14:55 train_perplexity 54 model 3 1.1506859574511579 20240813-23:14:55 train_perplexity 54 model 2 1.1505747585186878 20240813-23:14:58 test_perplexity 54 model 3 1.1459260223232952 20240813-23:14:58 test_perplexity 54 model 2 1.1496885090442952 20240813-23:16:21 test_accuracy 54 model 3 val 748 / 805 20240813-23:16:21 test_accuracy 54 model 2 val 745 / 800 20240813-23:16:24 wrote gpt_002.pth 20240813-23:16:24 wrote gpt_003.pth 20240813-23:16:39 wrote non_validated_0054_02.png 20240813-23:16:53 wrote non_validated_0054_03.png 20240813-23:16:53 wrote state.pth 20240813-23:16:53 --- epoch 55 ---------------------------------------- 20240813-23:16:53 current_test_accuracies 0.9583 0.9530 0.9312 0.9292 0.9535 20240813-23:16:53 training model 3 20240813-23:16:53 training model 2 20240813-23:20:36 train_perplexity 55 model 2 1.1504019475148004 20240813-23:20:36 train_perplexity 55 model 3 1.1502865873653856 20240813-23:20:40 test_perplexity 55 model 2 1.1447347916570605 20240813-23:20:40 test_perplexity 55 model 3 1.1447656753611513 20240813-23:22:05 test_accuracy 55 model 2 val 748 / 778 20240813-23:22:05 test_accuracy 55 model 3 val 742 / 779 20240813-23:22:07 wrote gpt_003.pth 20240813-23:22:08 wrote gpt_002.pth 20240813-23:22:22 wrote non_validated_0055_03.png 20240813-23:22:37 wrote non_validated_0055_02.png 20240813-23:22:37 wrote state.pth 20240813-23:22:37 --- epoch 56 ---------------------------------------- 20240813-23:22:37 current_test_accuracies 0.9583 0.9530 0.9614 0.9525 0.9535 20240813-23:50:06 keep c_quizzes model 2 validated nb_validated 1499 / 5125 (finishes Wed 00:56 -- 3272/h) proportion_kept 29.25% 20240814-00:17:29 keep c_quizzes model 1 validated nb_validated 3041 / 5125 (finishes Wed 00:55 -- 3325/h) proportion_kept 29.67% 20240814-00:44:54 keep c_quizzes model 1 validated nb_validated 4511 / 5125 (finishes Wed 00:56 -- 3289/h) proportion_kept 29.34% 20240814-01:12:20 keep c_quizzes model 3 validated nb_validated 6002 / 5125 (finishes now! -- 3282/h) proportion_kept 29.28% 20240814-01:12:20 teacher model 0 to [0, 991, 744, 1315, 907] 20240814-01:12:20 teacher model 1 to [572, 0, 133, 269, 245] 20240814-01:12:20 teacher model 2 to [232, 227, 0, 78, 109] 20240814-01:12:20 teacher model 3 to [58, 57, 56, 0, 0] 20240814-01:12:20 teacher model 4 to [2, 2, 4, 1, 0] 20240814-01:12:24 nb_c_quizzes model 0 train 842 test 22 20240814-01:12:27 nb_c_quizzes model 1 train 1245 test 32 20240814-01:12:31 nb_c_quizzes model 2 train 914 test 23 20240814-01:12:34 nb_c_quizzes model 3 train 1622 test 41 20240814-01:12:38 nb_c_quizzes model 4 train 1230 test 31 20240814-01:12:38 training model 0 20240814-01:12:38 training model 1 20240814-01:16:21 train_perplexity 56 model 0 1.151598668503106 20240814-01:16:21 train_perplexity 56 model 1 1.1519989051646817 20240814-01:16:25 test_perplexity 56 model 0 1.1464127437149683 20240814-01:16:25 test_perplexity 56 model 1 1.149964561392442 20240814-01:17:46 test_accuracy 56 model 0 val 764 / 803 20240814-01:17:46 test_accuracy 56 model 1 val 753 / 801 20240814-01:17:49 wrote gpt_000.pth 20240814-01:17:49 wrote gpt_001.pth 20240814-01:18:04 wrote non_validated_0056_00.png 20240814-01:18:18 wrote non_validated_0056_01.png 20240814-01:18:18 wrote state.pth 20240814-01:18:18 --- epoch 57 ---------------------------------------- 20240814-01:18:18 current_test_accuracies 0.9514 0.9401 0.0000 0.0000 0.0000 20240814-01:18:18 training model 2 20240814-01:18:18 training model 3 20240814-01:22:01 train_perplexity 57 model 3 1.1514321821114113 20240814-01:22:01 train_perplexity 57 model 2 1.1509815216173318 20240814-01:22:05 test_perplexity 57 model 3 1.1496773340947426 20240814-01:22:05 test_perplexity 57 model 2 1.1486084631401632 20240814-01:23:27 test_accuracy 57 model 3 val 764 / 806 20240814-01:23:30 test_accuracy 57 model 2 val 749 / 799 20240814-01:23:32 wrote gpt_002.pth 20240814-01:23:33 wrote gpt_003.pth 20240814-01:23:47 wrote non_validated_0057_02.png 20240814-01:24:02 wrote non_validated_0057_03.png 20240814-01:24:02 wrote state.pth 20240814-01:24:02 --- epoch 58 ---------------------------------------- 20240814-01:24:02 current_test_accuracies 0.9514 0.9401 0.9374 0.9479 0.0000 20240814-01:24:02 training model 4 20240814-01:24:02 training model 2 20240814-01:27:44 train_perplexity 58 model 4 1.151730959024548 20240814-01:27:44 train_perplexity 58 model 2 1.1505563160047843 20240814-01:27:48 test_perplexity 58 model 4 1.148624477676756 20240814-01:27:48 test_perplexity 58 model 2 1.1494349335514262 20240814-01:29:10 test_accuracy 58 model 2 val 781 / 822 20240814-01:29:12 test_accuracy 58 model 4 val 723 / 785 20240814-01:29:14 wrote gpt_004.pth 20240814-01:29:15 wrote gpt_002.pth 20240814-01:29:29 wrote non_validated_0058_04.png 20240814-01:29:44 wrote non_validated_0058_02.png 20240814-01:29:44 wrote state.pth 20240814-01:29:44 --- epoch 59 ---------------------------------------- 20240814-01:29:44 current_test_accuracies 0.9514 0.9401 0.9501 0.9479 0.9210 20240814-01:29:44 training model 4 20240814-01:29:44 training model 1 20240814-01:33:26 train_perplexity 59 model 1 1.1516143531868235 20240814-01:33:27 train_perplexity 59 model 4 1.1513941672364028 20240814-01:33:30 test_perplexity 59 model 1 1.1472788157318186 20240814-01:33:30 test_perplexity 59 model 4 1.147756832061206 20240814-01:34:53 test_accuracy 59 model 1 val 753 / 805 20240814-01:34:55 test_accuracy 59 model 4 val 749 / 790 20240814-01:34:57 wrote gpt_004.pth 20240814-01:34:58 wrote gpt_001.pth 20240814-01:35:12 wrote non_validated_0059_04.png 20240814-01:35:27 wrote non_validated_0059_01.png 20240814-01:35:27 wrote state.pth 20240814-01:35:27 --- epoch 60 ---------------------------------------- 20240814-01:35:27 current_test_accuracies 0.9514 0.9354 0.9501 0.9479 0.9481 20240814-01:35:27 training model 1 20240814-01:35:27 training model 3 20240814-01:39:09 train_perplexity 60 model 3 1.1511668997217819 20240814-01:39:09 train_perplexity 60 model 1 1.1514567902840376 20240814-01:39:13 test_perplexity 60 model 3 1.148947106256462 20240814-01:39:13 test_perplexity 60 model 1 1.1468227942193174 20240814-01:40:35 test_accuracy 60 model 3 val 777 / 811 20240814-01:40:38 test_accuracy 60 model 1 val 750 / 788 20240814-01:40:40 wrote gpt_001.pth 20240814-01:40:40 wrote gpt_003.pth 20240814-01:40:55 wrote non_validated_0060_01.png 20240814-01:41:09 wrote non_validated_0060_03.png 20240814-01:41:09 wrote state.pth 20240814-01:41:09 --- epoch 61 ---------------------------------------- 20240814-01:41:09 current_test_accuracies 0.9514 0.9518 0.9501 0.9581 0.9481 20240814-01:41:09 training model 4 20240814-01:41:09 training model 2 20240814-01:44:51 train_perplexity 61 model 2 1.150493373410592 20240814-01:44:52 train_perplexity 61 model 4 1.151088522540461 20240814-01:44:55 test_perplexity 61 model 2 1.1467283340107475 20240814-01:44:55 test_perplexity 61 model 4 1.146429381921277 20240814-01:46:21 test_accuracy 61 model 2 val 761 / 781 20240814-01:46:23 test_accuracy 61 model 4 val 705 / 766 20240814-01:46:25 wrote gpt_004.pth 20240814-01:46:26 wrote gpt_002.pth 20240814-01:46:40 wrote non_validated_0061_04.png 20240814-01:46:55 wrote non_validated_0061_02.png 20240814-01:46:55 wrote state.pth 20240814-01:46:55 --- epoch 62 ---------------------------------------- 20240814-01:46:55 current_test_accuracies 0.9514 0.9518 0.9744 0.9581 0.9204 20240814-01:46:55 training model 4 20240814-01:46:55 training model 0 20240814-01:50:37 train_perplexity 62 model 4 1.1510177559943096 20240814-01:50:37 train_perplexity 62 model 0 1.1516400635203483 20240814-01:50:41 test_perplexity 62 model 4 1.1469944243242305 20240814-01:50:41 test_perplexity 62 model 0 1.148685789958154 20240814-01:52:07 test_accuracy 62 model 4 val 732 / 796 20240814-01:52:07 test_accuracy 62 model 0 val 734 / 793 20240814-01:52:10 wrote gpt_004.pth 20240814-01:52:11 wrote gpt_000.pth 20240814-01:52:25 wrote non_validated_0062_04.png 20240814-01:52:39 wrote non_validated_0062_00.png 20240814-01:52:39 wrote state.pth 20240814-01:52:39 --- epoch 63 ---------------------------------------- 20240814-01:52:39 current_test_accuracies 0.9256 0.9518 0.9744 0.9581 0.9196 20240814-01:52:39 training model 4 20240814-01:52:39 training model 0 20240814-01:56:22 train_perplexity 63 model 0 1.151202583295065 20240814-01:56:23 train_perplexity 63 model 4 1.1506250970246286 20240814-01:56:25 test_perplexity 63 model 0 1.1461396088174152 20240814-01:56:26 test_perplexity 63 model 4 1.1473578161580897 20240814-01:57:48 test_accuracy 63 model 0 val 764 / 806 20240814-01:57:49 test_accuracy 63 model 4 val 754 / 801 20240814-01:57:51 wrote gpt_004.pth 20240814-01:57:52 wrote gpt_000.pth 20240814-01:58:06 wrote non_validated_0063_04.png 20240814-01:58:21 wrote non_validated_0063_00.png 20240814-01:58:21 wrote state.pth 20240814-01:58:21 --- epoch 64 ---------------------------------------- 20240814-01:58:21 current_test_accuracies 0.9479 0.9518 0.9744 0.9581 0.9413 20240814-01:58:21 training model 4 20240814-01:58:21 training model 0 20240814-02:02:03 train_perplexity 64 model 4 1.150852561150083 20240814-02:02:03 train_perplexity 64 model 0 1.150850061471283 20240814-02:02:07 test_perplexity 64 model 4 1.149786715961966 20240814-02:02:07 test_perplexity 64 model 0 1.1467389411774183 20240814-02:03:31 test_accuracy 64 model 4 val 776 / 823 20240814-02:03:31 test_accuracy 64 model 0 val 779 / 812 20240814-02:03:33 wrote gpt_004.pth 20240814-02:03:34 wrote gpt_000.pth 20240814-02:03:48 wrote non_validated_0064_04.png 20240814-02:04:03 wrote non_validated_0064_00.png 20240814-02:04:03 wrote state.pth 20240814-02:04:03 --- epoch 65 ---------------------------------------- 20240814-02:04:03 current_test_accuracies 0.9594 0.9518 0.9744 0.9581 0.9429 20240814-02:04:03 training model 4 20240814-02:04:03 training model 1 20240814-02:07:45 train_perplexity 65 model 4 1.1505328853592736 20240814-02:07:46 train_perplexity 65 model 1 1.150996161218854 20240814-02:07:49 test_perplexity 65 model 4 1.1504529856871863 20240814-02:07:49 test_perplexity 65 model 1 1.1490926779923434 20240814-02:09:14 test_accuracy 65 model 4 val 756 / 795 20240814-02:09:14 test_accuracy 65 model 1 val 749 / 793 20240814-02:09:17 wrote gpt_004.pth 20240814-02:09:18 wrote gpt_001.pth 20240814-02:09:32 wrote non_validated_0065_04.png 20240814-02:09:47 wrote non_validated_0065_01.png 20240814-02:09:47 wrote state.pth 20240814-02:09:47 --- epoch 66 ---------------------------------------- 20240814-02:09:47 current_test_accuracies 0.9594 0.9445 0.9744 0.9581 0.9509 20240814-02:09:47 training model 1 20240814-02:09:47 training model 4 20240814-02:13:29 train_perplexity 66 model 1 1.1508430098660234 20240814-02:13:29 train_perplexity 66 model 4 1.1500120042994009 20240814-02:13:32 test_perplexity 66 model 1 1.1474630916951298 20240814-02:13:33 test_perplexity 66 model 4 1.1484006619727958 20240814-02:14:58 test_accuracy 66 model 1 val 753 / 796 20240814-02:14:59 test_accuracy 66 model 4 val 739 / 799 20240814-02:15:01 wrote gpt_001.pth 20240814-02:15:02 wrote gpt_004.pth 20240814-02:15:16 wrote non_validated_0066_01.png 20240814-02:15:31 wrote non_validated_0066_04.png 20240814-02:15:31 wrote state.pth 20240814-02:15:31 --- epoch 67 ---------------------------------------- 20240814-02:15:31 current_test_accuracies 0.9594 0.9460 0.9744 0.9581 0.9249 20240814-02:15:31 training model 4 20240814-02:15:31 training model 1 20240814-02:19:13 train_perplexity 67 model 4 1.150026913479397 20240814-02:19:13 train_perplexity 67 model 1 1.1505957556566868 20240814-02:19:16 test_perplexity 67 model 4 1.145169344657137 20240814-02:19:16 test_perplexity 67 model 1 1.1460287765354722 20240814-02:20:41 test_accuracy 67 model 4 val 757 / 782 20240814-02:20:42 test_accuracy 67 model 1 val 745 / 794 20240814-02:20:44 wrote gpt_004.pth 20240814-02:20:45 wrote gpt_001.pth 20240814-02:20:59 wrote non_validated_0067_04.png 20240814-02:21:14 wrote non_validated_0067_01.png 20240814-02:21:14 wrote state.pth 20240814-02:21:14 --- epoch 68 ---------------------------------------- 20240814-02:21:14 current_test_accuracies 0.9594 0.9383 0.9744 0.9581 0.9680 20240814-02:21:14 training model 1 20240814-02:21:14 training model 3 20240814-02:24:56 train_perplexity 68 model 1 1.1504843725760665 20240814-02:24:56 train_perplexity 68 model 3 1.1506154693591246 20240814-02:24:59 test_perplexity 68 model 1 1.1461868269385103 20240814-02:25:00 test_perplexity 68 model 3 1.1459805113085428 20240814-02:26:22 test_accuracy 68 model 3 val 755 / 802 20240814-02:26:25 test_accuracy 68 model 1 val 761 / 797 20240814-02:26:27 wrote gpt_001.pth 20240814-02:26:28 wrote gpt_003.pth 20240814-02:26:42 wrote non_validated_0068_01.png 20240814-02:26:57 wrote non_validated_0068_03.png 20240814-02:26:57 wrote state.pth 20240814-02:26:57 --- epoch 69 ---------------------------------------- 20240814-02:26:57 current_test_accuracies 0.9594 0.9548 0.9744 0.9414 0.9680 20240814-02:26:57 training model 3 20240814-02:26:57 training model 1 20240814-02:30:39 train_perplexity 69 model 3 1.1511884797853456 20240814-02:30:39 train_perplexity 69 model 1 1.1501754455369406 20240814-02:30:42 test_perplexity 69 model 3 1.1466293351736465 20240814-02:30:43 test_perplexity 69 model 1 1.1462359279654588 20240814-02:32:08 test_accuracy 69 model 3 val 752 / 793 20240814-02:32:08 test_accuracy 69 model 1 val 752 / 795 20240814-02:32:11 wrote gpt_003.pth 20240814-02:32:11 wrote gpt_001.pth 20240814-02:32:26 wrote non_validated_0069_03.png 20240814-02:32:40 wrote non_validated_0069_01.png 20240814-02:32:40 wrote state.pth 20240814-02:32:40 --- epoch 70 ---------------------------------------- 20240814-02:32:40 current_test_accuracies 0.9594 0.9459 0.9744 0.9483 0.9680 20240814-02:32:40 training model 1 20240814-02:32:40 training model 3 20240814-02:36:23 train_perplexity 70 model 1 1.1501495870034115 20240814-02:36:23 train_perplexity 70 model 3 1.150498463709387 20240814-02:36:26 test_perplexity 70 model 1 1.1470931151726174 20240814-02:36:26 test_perplexity 70 model 3 1.1484264638535218 20240814-02:37:53 test_accuracy 70 model 1 val 771 / 799 20240814-02:37:54 test_accuracy 70 model 3 val 769 / 798 20240814-02:37:56 wrote gpt_001.pth 20240814-02:37:57 wrote gpt_003.pth 20240814-02:38:11 wrote non_validated_0070_01.png 20240814-02:38:25 wrote non_validated_0070_03.png 20240814-02:38:25 wrote state.pth 20240814-02:38:25 --- epoch 71 ---------------------------------------- 20240814-02:38:25 current_test_accuracies 0.9594 0.9650 0.9744 0.9637 0.9680 20240814-03:05:40 keep c_quizzes model 0 validated nb_validated 1589 / 5125 (finishes Wed 04:06 -- 3498/h) proportion_kept 31.00% 20240814-03:32:56 keep c_quizzes model 4 validated nb_validated 3115 / 5125 (finishes Wed 04:08 -- 3429/h) proportion_kept 30.39% 20240814-04:00:14 keep c_quizzes model 1 validated nb_validated 4720 / 5125 (finishes Wed 04:07 -- 3461/h) proportion_kept 30.70% 20240814-04:27:34 keep c_quizzes model 4 validated nb_validated 6252 / 5125 (finishes now! -- 3437/h) proportion_kept 30.50% 20240814-04:27:34 teacher model 0 to [0, 573, 804, 862, 782] 20240814-04:27:34 teacher model 1 to [1133, 0, 366, 415, 376] 20240814-04:27:34 teacher model 2 to [238, 242, 0, 93, 74] 20240814-04:27:34 teacher model 3 to [95, 95, 93, 0, 1] 20240814-04:27:34 teacher model 4 to [1, 4, 3, 2, 0] 20240814-04:27:37 nb_c_quizzes model 0 train 2273 test 58 20240814-04:27:41 nb_c_quizzes model 1 train 2136 test 55 20240814-04:27:45 nb_c_quizzes model 2 train 2149 test 54 20240814-04:27:48 nb_c_quizzes model 3 train 2960 test 75 20240814-04:27:52 nb_c_quizzes model 4 train 2432 test 62 20240814-04:27:52 training model 0 20240814-04:27:52 training model 1 20240814-04:31:34 train_perplexity 71 model 0 1.1521544812138502 20240814-04:31:34 train_perplexity 71 model 1 1.1510899791471265 20240814-04:31:38 test_perplexity 71 model 0 1.1480688631790388 20240814-04:31:38 test_perplexity 71 model 1 1.1529362711274007 20240814-04:33:00 test_accuracy 71 model 0 val 761 / 806 20240814-04:33:02 test_accuracy 71 model 1 val 787 / 824 20240814-04:33:04 wrote gpt_000.pth 20240814-04:33:04 wrote gpt_001.pth 20240814-04:33:19 wrote non_validated_0071_00.png 20240814-04:33:33 wrote non_validated_0071_01.png 20240814-04:33:33 wrote state.pth 20240814-04:33:33 --- epoch 72 ---------------------------------------- 20240814-04:33:33 current_test_accuracies 0.9442 0.9551 0.0000 0.0000 0.0000 20240814-04:33:33 training model 2 20240814-04:33:33 training model 3 20240814-04:37:15 train_perplexity 72 model 3 1.1517504628172963 20240814-04:37:16 train_perplexity 72 model 2 1.151479446668185 20240814-04:37:19 test_perplexity 72 model 3 1.147753900198678 20240814-04:37:19 test_perplexity 72 model 2 1.1481308348462922 20240814-04:38:42 test_accuracy 72 model 3 val 768 / 809 20240814-04:38:45 test_accuracy 72 model 2 val 750 / 793 20240814-04:38:47 wrote gpt_002.pth 20240814-04:38:47 wrote gpt_003.pth 20240814-04:39:02 wrote non_validated_0072_02.png 20240814-04:39:16 wrote non_validated_0072_03.png 20240814-04:39:16 wrote state.pth 20240814-04:39:16 --- epoch 73 ---------------------------------------- 20240814-04:39:16 current_test_accuracies 0.9442 0.9551 0.9458 0.9493 0.0000 20240814-04:39:16 training model 4 20240814-04:39:16 training model 0 20240814-04:42:58 train_perplexity 73 model 4 1.1510279474077945 20240814-04:42:58 train_perplexity 73 model 0 1.15187598777219 20240814-04:43:02 test_perplexity 73 model 4 1.148763172907721 20240814-04:43:02 test_perplexity 73 model 0 1.1494216427942483 20240814-04:44:28 test_accuracy 73 model 4 val 752 / 785 20240814-04:44:29 test_accuracy 73 model 0 val 744 / 793 20240814-04:44:31 wrote gpt_004.pth 20240814-04:44:32 wrote gpt_000.pth 20240814-04:44:46 wrote non_validated_0073_04.png 20240814-04:45:01 wrote non_validated_0073_00.png 20240814-04:45:01 wrote state.pth 20240814-04:45:01 --- epoch 74 ---------------------------------------- 20240814-04:45:01 current_test_accuracies 0.9382 0.9551 0.9458 0.9493 0.9580 20240814-04:45:01 training model 0 20240814-04:45:01 training model 2 20240814-04:48:43 train_perplexity 74 model 0 1.1523994678193588 20240814-04:48:43 train_perplexity 74 model 2 1.1508774187155224 20240814-04:48:46 test_perplexity 74 model 0 1.1482848773393592 20240814-04:48:46 test_perplexity 74 model 2 1.150596688003022 20240814-04:50:09 test_accuracy 74 model 2 val 755 / 805 20240814-04:50:12 test_accuracy 74 model 0 val 753 / 791 20240814-04:50:14 wrote gpt_000.pth 20240814-04:50:15 wrote gpt_002.pth 20240814-04:50:29 wrote non_validated_0074_00.png 20240814-04:50:44 wrote non_validated_0074_02.png 20240814-04:50:44 wrote state.pth 20240814-04:50:44 --- epoch 75 ---------------------------------------- 20240814-04:50:44 current_test_accuracies 0.9520 0.9551 0.9379 0.9493 0.9580 20240814-04:50:44 training model 2 20240814-04:50:44 training model 3 20240814-04:54:26 train_perplexity 75 model 2 1.1515291941623595 20240814-04:54:26 train_perplexity 75 model 3 1.1514198254281733 20240814-04:54:29 test_perplexity 75 model 2 1.1506268232767602 20240814-04:54:29 test_perplexity 75 model 3 1.1471756970858469 20240814-04:55:55 test_accuracy 75 model 3 val 751 / 791 20240814-04:55:56 test_accuracy 75 model 2 val 749 / 783 20240814-04:55:58 wrote gpt_002.pth 20240814-04:55:59 wrote gpt_003.pth 20240814-04:56:13 wrote non_validated_0075_02.png 20240814-04:56:28 wrote non_validated_0075_03.png 20240814-04:56:28 wrote state.pth 20240814-04:56:28 --- epoch 76 ---------------------------------------- 20240814-04:56:28 current_test_accuracies 0.9520 0.9551 0.9566 0.9494 0.9580 20240814-04:56:28 training model 3 20240814-04:56:28 training model 0 20240814-05:00:10 train_perplexity 76 model 3 1.1512310801518642 20240814-05:00:10 train_perplexity 76 model 0 1.1513566292763755 20240814-05:00:13 test_perplexity 76 model 3 1.14828463543576 20240814-05:00:13 test_perplexity 76 model 0 1.149450232735321 20240814-05:01:39 test_accuracy 76 model 3 val 737 / 778 20240814-05:01:40 test_accuracy 76 model 0 val 770 / 799 20240814-05:01:42 wrote gpt_003.pth 20240814-05:01:42 wrote gpt_000.pth 20240814-05:01:57 wrote non_validated_0076_03.png 20240814-05:02:12 wrote non_validated_0076_00.png 20240814-05:02:12 wrote state.pth 20240814-05:02:12 --- epoch 77 ---------------------------------------- 20240814-05:02:12 current_test_accuracies 0.9637 0.9551 0.9566 0.9473 0.9580 20240814-05:02:12 training model 3 20240814-05:02:12 training model 1 20240814-05:05:54 train_perplexity 77 model 3 1.1511911889054336 20240814-05:05:54 train_perplexity 77 model 1 1.1504076586177134 20240814-05:05:57 test_perplexity 77 model 3 1.14846827974236 20240814-05:05:58 test_perplexity 77 model 1 1.1463323156344105 20240814-05:07:23 test_accuracy 77 model 3 val 760 / 798 20240814-05:07:23 test_accuracy 77 model 1 val 729 / 775 20240814-05:07:25 wrote gpt_003.pth 20240814-05:07:26 wrote gpt_001.pth 20240814-05:07:40 wrote non_validated_0077_03.png 20240814-05:07:55 wrote non_validated_0077_01.png 20240814-05:07:55 wrote state.pth 20240814-05:07:55 --- epoch 78 ---------------------------------------- 20240814-05:07:55 current_test_accuracies 0.9637 0.9406 0.9566 0.9524 0.9580 20240814-05:07:55 training model 1 20240814-05:07:55 training model 3 20240814-05:11:37 train_perplexity 78 model 1 1.1499759592240508 20240814-05:11:37 train_perplexity 78 model 3 1.1507530843482716 20240814-05:11:40 test_perplexity 78 model 1 1.148272405360968 20240814-05:11:40 test_perplexity 78 model 3 1.1466289763654232 20240814-05:13:06 test_accuracy 78 model 1 val 747 / 780 20240814-05:13:06 test_accuracy 78 model 3 val 749 / 782 20240814-05:13:09 wrote gpt_001.pth 20240814-05:13:09 wrote gpt_003.pth 20240814-05:13:24 wrote non_validated_0078_01.png 20240814-05:13:38 wrote non_validated_0078_03.png 20240814-05:13:38 wrote state.pth 20240814-05:13:38 --- epoch 79 ---------------------------------------- 20240814-05:13:38 current_test_accuracies 0.9637 0.9577 0.9566 0.9578 0.9580 20240814-05:40:44 keep c_quizzes model 0 validated nb_validated 1959 / 5125 (finishes Wed 06:24 -- 4338/h) proportion_kept 38.22% 20240814-06:07:46 keep c_quizzes model 0 validated nb_validated 3873 / 5125 (finishes Wed 06:25 -- 4292/h) proportion_kept 37.79% 20240814-06:34:59 keep c_quizzes model 0 validated nb_validated 5793 / 5125 (finishes now! -- 4273/h) proportion_kept 37.68% 20240814-06:34:59 teacher model 0 to [0, 767, 666, 384, 719] 20240814-06:34:59 teacher model 1 to [1045, 0, 322, 242, 327] 20240814-06:34:59 teacher model 2 to [371, 371, 0, 82, 145] 20240814-06:34:59 teacher model 3 to [115, 114, 114, 0, 1] 20240814-06:34:59 teacher model 4 to [0, 3, 2, 3, 0] 20240814-06:35:02 nb_c_quizzes model 0 train 3766 test 96 20240814-06:35:06 nb_c_quizzes model 1 train 3360 test 86 20240814-06:35:09 nb_c_quizzes model 2 train 3226 test 81 20240814-06:35:13 nb_c_quizzes model 3 train 3653 test 93 20240814-06:35:17 nb_c_quizzes model 4 train 3594 test 92 20240814-06:35:17 training model 0 20240814-06:35:17 training model 1 20240814-06:38:59 train_perplexity 79 model 0 1.1524530940221243 20240814-06:38:59 train_perplexity 79 model 1 1.1516326118697298 20240814-06:39:03 test_perplexity 79 model 0 1.151393689952507 20240814-06:39:03 test_perplexity 79 model 1 1.1483494682792519 20240814-06:40:26 test_accuracy 79 model 1 val 774 / 809 20240814-06:40:29 test_accuracy 79 model 0 val 749 / 797 20240814-06:40:31 wrote gpt_000.pth 20240814-06:40:31 wrote gpt_001.pth 20240814-06:40:45 wrote non_validated_0079_00.png 20240814-06:41:00 wrote non_validated_0079_01.png 20240814-06:41:00 wrote state.pth 20240814-06:41:00 --- epoch 80 ---------------------------------------- 20240814-06:41:00 current_test_accuracies 0.9398 0.9567 0.0000 0.0000 0.0000 20240814-06:41:00 training model 2 20240814-06:41:00 training model 3 20240814-06:44:42 train_perplexity 80 model 2 1.151790031089961 20240814-06:44:42 train_perplexity 80 model 3 1.1511056315753374 20240814-06:44:46 test_perplexity 80 model 2 1.147850549783623 20240814-06:44:46 test_perplexity 80 model 3 1.1472251789552967 20240814-06:46:10 test_accuracy 80 model 3 val 750 / 810 20240814-06:46:11 test_accuracy 80 model 2 val 727 / 790 20240814-06:46:13 wrote gpt_002.pth 20240814-06:46:14 wrote gpt_003.pth 20240814-06:46:28 wrote non_validated_0080_02.png 20240814-06:46:42 wrote non_validated_0080_03.png 20240814-06:46:42 wrote state.pth 20240814-06:46:42 --- epoch 81 ---------------------------------------- 20240814-06:46:42 current_test_accuracies 0.9398 0.9567 0.9203 0.9259 0.0000 20240814-06:46:42 training model 4 20240814-06:46:42 training model 2 20240814-06:50:25 train_perplexity 81 model 2 1.1516770462636716 20240814-06:50:25 train_perplexity 81 model 4 1.1521809520220492 20240814-06:50:28 test_perplexity 81 model 2 1.149799408703202 20240814-06:50:29 test_perplexity 81 model 4 1.1514211667373715 20240814-06:51:51 test_accuracy 81 model 2 val 778 / 821 20240814-06:51:52 test_accuracy 81 model 4 val 781 / 816 20240814-06:51:54 wrote gpt_004.pth 20240814-06:51:55 wrote gpt_002.pth 20240814-06:52:09 wrote non_validated_0081_04.png 20240814-06:52:24 wrote non_validated_0081_02.png 20240814-06:52:24 wrote state.pth 20240814-06:52:24 --- epoch 82 ---------------------------------------- 20240814-06:52:24 current_test_accuracies 0.9398 0.9567 0.9476 0.9259 0.9571 20240814-06:52:24 training model 3 20240814-06:52:24 training model 0 20240814-06:56:06 train_perplexity 82 model 0 1.1524977097462892 20240814-06:56:06 train_perplexity 82 model 3 1.150999421126687 20240814-06:56:10 test_perplexity 82 model 0 1.1500945446931878 20240814-06:56:10 test_perplexity 82 model 3 1.150209656755117 20240814-06:57:32 test_accuracy 82 model 3 val 777 / 823 20240814-06:57:34 test_accuracy 82 model 0 val 776 / 811 20240814-06:57:35 wrote gpt_003.pth 20240814-06:57:36 wrote gpt_000.pth 20240814-06:57:51 wrote non_validated_0082_03.png 20240814-06:58:05 wrote non_validated_0082_00.png 20240814-06:58:05 wrote state.pth 20240814-06:58:05 --- epoch 83 ---------------------------------------- 20240814-06:58:05 current_test_accuracies 0.9568 0.9567 0.9476 0.9441 0.9571 20240814-06:58:05 training model 3 20240814-06:58:05 training model 2 20240814-07:01:48 train_perplexity 83 model 3 1.150839641806106 20240814-07:01:48 train_perplexity 83 model 2 1.1514528537959723 20240814-07:01:51 test_perplexity 83 model 3 1.1468204970339604 20240814-07:01:51 test_perplexity 83 model 2 1.1506273522209791 20240814-07:03:15 test_accuracy 83 model 2 val 760 / 804 20240814-07:03:16 test_accuracy 83 model 3 val 753 / 797 20240814-07:03:18 wrote gpt_003.pth 20240814-07:03:19 wrote gpt_002.pth 20240814-07:03:33 wrote non_validated_0083_03.png 20240814-07:03:48 wrote non_validated_0083_02.png 20240814-07:03:48 wrote state.pth 20240814-07:03:48 --- epoch 84 ---------------------------------------- 20240814-07:03:48 current_test_accuracies 0.9568 0.9567 0.9453 0.9448 0.9571 20240814-07:03:48 training model 3 20240814-07:03:48 training model 2 20240814-07:07:30 train_perplexity 84 model 3 1.1507860246447936 20240814-07:07:30 train_perplexity 84 model 2 1.1515521166397562 20240814-07:07:34 test_perplexity 84 model 3 1.1472113058159097 20240814-07:07:34 test_perplexity 84 model 2 1.1497569560530656 20240814-07:08:59 test_accuracy 84 model 2 val 739 / 781 20240814-07:09:01 test_accuracy 84 model 3 val 753 / 784 20240814-07:09:03 wrote gpt_003.pth 20240814-07:09:03 wrote gpt_002.pth 20240814-07:09:18 wrote non_validated_0084_03.png 20240814-07:09:32 wrote non_validated_0084_02.png 20240814-07:09:32 wrote state.pth 20240814-07:09:32 --- epoch 85 ---------------------------------------- 20240814-07:09:32 current_test_accuracies 0.9568 0.9567 0.9462 0.9605 0.9571 20240814-07:09:32 training model 2 20240814-07:09:32 training model 1 20240814-07:13:15 train_perplexity 85 model 1 1.1514759119935358 20240814-07:13:16 train_perplexity 85 model 2 1.1510360759264382 20240814-07:13:18 test_perplexity 85 model 1 1.148935022782041 20240814-07:13:19 test_perplexity 85 model 2 1.1473335404360547 20240814-07:14:42 test_accuracy 85 model 1 val 776 / 807 20240814-07:14:44 test_accuracy 85 model 2 val 745 / 788 20240814-07:14:46 wrote gpt_002.pth 20240814-07:14:47 wrote gpt_001.pth 20240814-07:15:01 wrote non_validated_0085_02.png 20240814-07:15:15 wrote non_validated_0085_01.png 20240814-07:15:15 wrote state.pth 20240814-07:15:15 --- epoch 86 ---------------------------------------- 20240814-07:15:15 current_test_accuracies 0.9568 0.9616 0.9454 0.9605 0.9571 20240814-07:15:15 training model 2 20240814-07:15:15 training model 0 20240814-07:18:58 train_perplexity 86 model 2 1.151305810185555 20240814-07:18:58 train_perplexity 86 model 0 1.1524194963227667 20240814-07:19:02 test_perplexity 86 model 2 1.1488280062354765 20240814-07:19:02 test_perplexity 86 model 0 1.1481707847037075 20240814-07:20:28 test_accuracy 86 model 0 val 749 / 796 20240814-07:20:30 test_accuracy 86 model 2 val 716 / 767 20240814-07:20:32 wrote gpt_002.pth 20240814-07:20:33 wrote gpt_000.pth 20240814-07:20:47 wrote non_validated_0086_02.png 20240814-07:21:02 wrote non_validated_0086_00.png 20240814-07:21:02 wrote state.pth 20240814-07:21:02 --- epoch 87 ---------------------------------------- 20240814-07:21:02 current_test_accuracies 0.9410 0.9616 0.9335 0.9605 0.9571 20240814-07:21:02 training model 2 20240814-07:21:02 training model 0 20240814-07:24:45 train_perplexity 87 model 2 1.1510782115904776 20240814-07:24:45 train_perplexity 87 model 0 1.1521105998718784 20240814-07:24:48 test_perplexity 87 model 2 1.1463595394622985 20240814-07:24:48 test_perplexity 87 model 0 1.147106845222631 20240814-07:26:09 test_accuracy 87 model 2 val 768 / 805 20240814-07:26:11 test_accuracy 87 model 0 val 773 / 807 20240814-07:26:13 wrote gpt_002.pth 20240814-07:26:13 wrote gpt_000.pth 20240814-07:26:28 wrote non_validated_0087_02.png 20240814-07:26:42 wrote non_validated_0087_00.png 20240814-07:26:42 wrote state.pth 20240814-07:26:42 --- epoch 88 ---------------------------------------- 20240814-07:26:42 current_test_accuracies 0.9579 0.9616 0.9540 0.9605 0.9571 20240814-07:53:50 keep c_quizzes model 3 validated nb_validated 2159 / 5125 (finishes Wed 08:31 -- 4774/h) proportion_kept 42.13% 20240814-08:20:55 keep c_quizzes model 0 validated nb_validated 4382 / 5125 (finishes Wed 08:30 -- 4849/h) proportion_kept 42.75% 20240814-08:48:01 keep c_quizzes model 0 validated nb_validated 6611 / 5125 (finishes now! -- 4878/h) proportion_kept 43.00% 20240814-08:48:01 teacher model 0 to [0, 894, 1998, 1371, 964] 20240814-08:48:01 teacher model 1 to [475, 0, 233, 180, 158] 20240814-08:48:01 teacher model 2 to [68, 69, 0, 25, 21] 20240814-08:48:01 teacher model 3 to [48, 50, 48, 0, 0] 20240814-08:48:01 teacher model 4 to [3, 0, 3, 3, 0] 20240814-08:48:05 nb_c_quizzes model 0 train 4345 test 111 20240814-08:48:08 nb_c_quizzes model 1 train 4348 test 111 20240814-08:48:12 nb_c_quizzes model 2 train 5452 test 137 20240814-08:48:15 nb_c_quizzes model 3 train 5193 test 132 20240814-08:48:19 nb_c_quizzes model 4 train 4709 test 120 20240814-08:48:19 training model 0 20240814-08:48:19 training model 1 20240814-08:52:01 train_perplexity 88 model 0 1.1527769881077299 20240814-08:52:01 train_perplexity 88 model 1 1.152450305620526 20240814-08:52:05 test_perplexity 88 model 0 1.1478207987254077 20240814-08:52:05 test_perplexity 88 model 1 1.1521411277164348 20240814-08:53:29 test_accuracy 88 model 1 val 791 / 820 20240814-08:53:33 test_accuracy 88 model 0 val 731 / 770 20240814-08:53:35 wrote gpt_000.pth 20240814-08:53:36 wrote gpt_001.pth 20240814-08:53:50 wrote non_validated_0088_00.png 20240814-08:54:05 wrote non_validated_0088_01.png 20240814-08:54:05 wrote state.pth 20240814-08:54:05 --- epoch 89 ---------------------------------------- 20240814-08:54:05 current_test_accuracies 0.9494 0.9646 0.0000 0.0000 0.0000 20240814-08:54:05 training model 2 20240814-08:54:05 training model 3 20240814-08:57:48 train_perplexity 89 model 3 1.153117229076106 20240814-08:57:48 train_perplexity 89 model 2 1.153603550072353 20240814-08:57:52 test_perplexity 89 model 3 1.1503571345470183 20240814-08:57:52 test_perplexity 89 model 2 1.149194715880147 20240814-08:59:16 test_accuracy 89 model 3 val 777 / 810 20240814-08:59:17 test_accuracy 89 model 2 val 749 / 789 20240814-08:59:19 wrote gpt_002.pth 20240814-08:59:20 wrote gpt_003.pth 20240814-08:59:34 wrote non_validated_0089_02.png 20240814-08:59:49 wrote non_validated_0089_03.png 20240814-08:59:49 wrote state.pth 20240814-08:59:49 --- epoch 90 ---------------------------------------- 20240814-08:59:49 current_test_accuracies 0.9494 0.9646 0.9493 0.9593 0.0000 20240814-08:59:49 training model 4 20240814-08:59:49 training model 2 20240814-09:03:31 train_perplexity 90 model 2 1.1533007187529254 20240814-09:03:34 test_perplexity 90 model 2 1.1510850585281034 20240814-09:03:34 train_perplexity 90 model 4 1.1533414588054167 20240814-09:03:37 test_perplexity 90 model 4 1.1503693313292391 20240814-09:05:00 test_accuracy 90 model 2 val 776 / 819 20240814-09:05:01 test_accuracy 90 model 4 val 753 / 796 20240814-09:05:03 wrote gpt_004.pth 20240814-09:05:04 wrote gpt_002.pth 20240814-09:05:18 wrote non_validated_0090_04.png 20240814-09:05:33 wrote non_validated_0090_02.png 20240814-09:05:33 wrote state.pth 20240814-09:05:33 --- epoch 91 ---------------------------------------- 20240814-09:05:33 current_test_accuracies 0.9494 0.9646 0.9475 0.9593 0.9460 20240814-09:05:33 training model 4 20240814-09:05:33 training model 2 20240814-09:09:15 train_perplexity 91 model 2 1.1533098391927412 20240814-09:09:15 train_perplexity 91 model 4 1.1529913450387725 20240814-09:09:19 test_perplexity 91 model 2 1.1555780866275165 20240814-09:09:19 test_perplexity 91 model 4 1.1547574044922129 20240814-09:10:43 test_accuracy 91 model 4 val 778 / 810 20240814-09:10:44 test_accuracy 91 model 2 val 748 / 798 20240814-09:10:46 wrote gpt_004.pth 20240814-09:10:47 wrote gpt_002.pth 20240814-09:11:01 wrote non_validated_0091_04.png 20240814-09:11:16 wrote non_validated_0091_02.png 20240814-09:11:16 wrote state.pth 20240814-09:11:16 --- epoch 92 ---------------------------------------- 20240814-09:11:16 current_test_accuracies 0.9494 0.9646 0.9373 0.9593 0.9605 20240814-09:11:16 training model 2 20240814-09:11:16 training model 0 20240814-09:14:58 train_perplexity 92 model 2 1.1532698621887756 20240814-09:14:58 train_perplexity 92 model 0 1.1522650499290084 20240814-09:15:02 test_perplexity 92 model 2 1.151915967213243 20240814-09:15:02 test_perplexity 92 model 0 1.1520030827302845 20240814-09:16:25 test_accuracy 92 model 0 val 774 / 803 20240814-09:16:27 test_accuracy 92 model 2 val 748 / 792 20240814-09:16:29 wrote gpt_002.pth 20240814-09:16:29 wrote gpt_000.pth 20240814-09:16:44 wrote non_validated_0092_02.png 20240814-09:16:58 wrote non_validated_0092_00.png 20240814-09:16:58 wrote state.pth 20240814-09:16:58 --- epoch 93 ---------------------------------------- 20240814-09:16:58 current_test_accuracies 0.9639 0.9646 0.9444 0.9593 0.9605 20240814-09:16:58 training model 2 20240814-09:16:58 training model 3 20240814-09:20:41 train_perplexity 93 model 2 1.1531157264102574 20240814-09:20:41 train_perplexity 93 model 3 1.1528278923860484 20240814-09:20:45 test_perplexity 93 model 2 1.1532618931065284 20240814-09:20:45 test_perplexity 93 model 3 1.1519537406650506 20240814-09:22:07 test_accuracy 93 model 2 val 767 / 810 20240814-09:22:08 test_accuracy 93 model 3 val 776 / 819 20240814-09:22:10 wrote gpt_002.pth 20240814-09:22:11 wrote gpt_003.pth 20240814-09:22:26 wrote non_validated_0093_02.png 20240814-09:22:40 wrote non_validated_0093_03.png 20240814-09:22:40 wrote state.pth 20240814-09:22:40 --- epoch 94 ---------------------------------------- 20240814-09:22:40 current_test_accuracies 0.9639 0.9646 0.9469 0.9475 0.9605 20240814-09:22:40 training model 2 20240814-09:22:40 training model 3 20240814-09:26:22 train_perplexity 94 model 3 1.1525608684028847 20240814-09:26:22 train_perplexity 94 model 2 1.1530720709737852 20240814-09:26:26 test_perplexity 94 model 3 1.1484046094062972 20240814-09:26:26 test_perplexity 94 model 2 1.1542908843782365 20240814-09:27:47 test_accuracy 94 model 2 val 751 / 805 20240814-09:27:48 test_accuracy 94 model 3 val 775 / 810 20240814-09:27:50 wrote gpt_002.pth 20240814-09:27:51 wrote gpt_003.pth 20240814-09:28:06 wrote non_validated_0094_02.png 20240814-09:28:20 wrote non_validated_0094_03.png 20240814-09:28:20 wrote state.pth 20240814-09:28:20 --- epoch 95 ---------------------------------------- 20240814-09:28:20 current_test_accuracies 0.9639 0.9646 0.9329 0.9568 0.9605 20240814-09:28:20 training model 2 20240814-09:28:20 training model 3 20240814-09:32:02 train_perplexity 95 model 3 1.1525025252262995 20240814-09:32:02 train_perplexity 95 model 2 1.1530304311759743 20240814-09:32:06 test_perplexity 95 model 3 1.1492021617794872 20240814-09:32:06 test_perplexity 95 model 2 1.1524390912259108 20240814-09:33:27 test_accuracy 95 model 2 val 794 / 837 20240814-09:33:30 test_accuracy 95 model 3 val 763 / 794 20240814-09:33:32 wrote gpt_002.pth 20240814-09:33:33 wrote gpt_003.pth 20240814-09:33:47 wrote non_validated_0095_02.png 20240814-09:34:02 wrote non_validated_0095_03.png 20240814-09:34:02 wrote state.pth 20240814-09:34:02 --- epoch 96 ---------------------------------------- 20240814-09:34:02 current_test_accuracies 0.9639 0.9646 0.9486 0.9610 0.9605 20240814-09:34:02 training model 2 20240814-09:34:02 training model 4 20240814-09:37:44 train_perplexity 96 model 4 1.152653051244166 20240814-09:37:46 train_perplexity 96 model 2 1.1527868675750743 20240814-09:37:47 test_perplexity 96 model 4 1.1513974645213478 20240814-09:37:49 test_perplexity 96 model 2 1.1497638644123163 20240814-09:39:11 test_accuracy 96 model 2 val 768 / 805 20240814-09:39:13 test_accuracy 96 model 4 val 748 / 791 20240814-09:39:15 wrote gpt_002.pth 20240814-09:39:16 wrote gpt_004.pth 20240814-09:39:30 wrote non_validated_0096_02.png 20240814-09:39:45 wrote non_validated_0096_04.png 20240814-09:39:45 wrote state.pth 20240814-09:39:45 --- epoch 97 ---------------------------------------- 20240814-09:39:45 current_test_accuracies 0.9639 0.9646 0.9540 0.9610 0.9456 20240814-09:39:45 training model 4 20240814-09:39:45 training model 2 20240814-09:43:27 train_perplexity 97 model 4 1.152256915167444 20240814-09:43:27 train_perplexity 97 model 2 1.152618769911178 20240814-09:43:30 test_perplexity 97 model 4 1.150449417788073 20240814-09:43:30 test_perplexity 97 model 2 1.1523567166062525 20240814-09:44:57 test_accuracy 97 model 4 val 756 / 799 20240814-09:44:57 test_accuracy 97 model 2 val 748 / 794 20240814-09:44:59 wrote gpt_004.pth 20240814-09:45:00 wrote gpt_002.pth 20240814-09:45:14 wrote non_validated_0097_04.png 20240814-09:45:29 wrote non_validated_0097_02.png 20240814-09:45:29 wrote state.pth 20240814-09:45:29 --- epoch 98 ---------------------------------------- 20240814-09:45:29 current_test_accuracies 0.9639 0.9646 0.9421 0.9610 0.9462 20240814-09:45:29 training model 2 20240814-09:45:29 training model 4 20240814-09:49:11 train_perplexity 98 model 2 1.1526894826161436 20240814-09:49:11 train_perplexity 98 model 4 1.1524124132869404 20240814-09:49:15 test_perplexity 98 model 2 1.1490687251357823 20240814-09:49:15 test_perplexity 98 model 4 1.1494204102415617 20240814-09:50:39 test_accuracy 98 model 2 val 764 / 804 20240814-09:50:40 test_accuracy 98 model 4 val 743 / 785 20240814-09:50:42 wrote gpt_002.pth 20240814-09:50:43 wrote gpt_004.pth 20240814-09:50:57 wrote non_validated_0098_02.png 20240814-09:51:12 wrote non_validated_0098_04.png 20240814-09:51:12 wrote state.pth 20240814-09:51:12 --- epoch 99 ---------------------------------------- 20240814-09:51:12 current_test_accuracies 0.9639 0.9646 0.9502 0.9610 0.9465 20240814-09:51:12 training model 4 20240814-09:51:12 training model 2 20240814-09:54:54 train_perplexity 99 model 4 1.1522942110620216 20240814-09:54:54 train_perplexity 99 model 2 1.152714272159307 20240814-09:54:58 test_perplexity 99 model 4 1.1482988252748 20240814-09:54:58 test_perplexity 99 model 2 1.150089622957807 20240814-09:56:23 test_accuracy 99 model 4 val 745 / 783 20240814-09:56:25 test_accuracy 99 model 2 val 719 / 755 20240814-09:56:27 wrote gpt_004.pth 20240814-09:56:28 wrote gpt_002.pth 20240814-09:56:42 wrote non_validated_0099_04.png 20240814-09:56:57 wrote non_validated_0099_02.png 20240814-09:56:57 wrote state.pth 20240814-09:56:57 --- epoch 100 ---------------------------------------- 20240814-09:56:57 current_test_accuracies 0.9639 0.9646 0.9523 0.9610 0.9515 20240814-10:23:53 keep c_quizzes model 4 validated nb_validated 2059 / 5125 (finishes Wed 11:03 -- 4586/h) proportion_kept 40.18% 20240814-10:50:48 keep c_quizzes model 2 validated nb_validated 4170 / 5125 (finishes Wed 11:03 -- 4645/h) proportion_kept 40.68% 20240814-11:17:35 keep c_quizzes model 0 validated nb_validated 6252 / 5125 (finishes now! -- 4651/h) proportion_kept 40.66% 20240814-11:17:35 teacher model 0 to [0, 907, 1027, 799, 639] 20240814-11:17:35 teacher model 1 to [773, 0, 310, 277, 248] 20240814-11:17:35 teacher model 2 to [324, 327, 0, 125, 82] 20240814-11:17:35 teacher model 3 to [132, 133, 135, 0, 0] 20240814-11:17:35 teacher model 4 to [5, 2, 3, 4, 0] 20240814-11:17:39 nb_c_quizzes model 0 train 5548 test 142 20240814-11:17:42 nb_c_quizzes model 1 train 5683 test 145 20240814-11:17:46 nb_c_quizzes model 2 train 6891 test 173 20240814-11:17:50 nb_c_quizzes model 3 train 6368 test 162 20240814-11:17:53 nb_c_quizzes model 4 train 5654 test 144 20240814-11:17:53 training model 0 20240814-11:17:53 training model 1 20240814-11:21:36 train_perplexity 100 model 1 1.154872439302904 20240814-11:21:36 train_perplexity 100 model 0 1.1540060070683256 20240814-11:21:40 test_perplexity 100 model 1 1.153028987421448 20240814-11:21:40 test_perplexity 100 model 0 1.150129637330473 20240814-11:23:02 test_accuracy 100 model 0 val 766 / 805 20240814-11:23:04 test_accuracy 100 model 1 val 747 / 782 20240814-11:23:06 wrote gpt_000.pth 20240814-11:23:07 wrote gpt_001.pth 20240814-11:23:21 wrote non_validated_0100_00.png 20240814-11:23:36 wrote non_validated_0100_01.png 20240814-11:23:36 wrote state.pth 20240814-11:23:36 --- epoch 101 ---------------------------------------- 20240814-11:23:36 current_test_accuracies 0.9516 0.9552 0.0000 0.0000 0.0000 20240814-11:23:36 training model 2 20240814-11:23:36 training model 3 20240814-11:27:18 train_perplexity 101 model 3 1.1543646450273377 20240814-11:27:18 train_perplexity 101 model 2 1.1544066346128532 20240814-11:27:22 test_perplexity 101 model 3 1.1511551863183764 20240814-11:27:22 test_perplexity 101 model 2 1.1534570205623869 20240814-11:28:44 test_accuracy 101 model 3 val 769 / 803 20240814-11:28:46 test_accuracy 101 model 2 val 767 / 813 20240814-11:28:48 wrote gpt_002.pth 20240814-11:28:48 wrote gpt_003.pth 20240814-11:29:03 wrote non_validated_0101_02.png 20240814-11:29:17 wrote non_validated_0101_03.png 20240814-11:29:17 wrote state.pth 20240814-11:29:17 --- epoch 102 ---------------------------------------- 20240814-11:29:17 current_test_accuracies 0.9516 0.9552 0.9434 0.9577 0.0000 20240814-11:29:17 training model 4 20240814-11:29:17 training model 2 20240814-11:33:00 train_perplexity 102 model 2 1.154168484544801 20240814-11:33:00 train_perplexity 102 model 4 1.1540743366600794 20240814-11:33:03 test_perplexity 102 model 2 1.1527051480073454 20240814-11:33:03 test_perplexity 102 model 4 1.1499073204386385 20240814-11:34:30 test_accuracy 102 model 4 val 721 / 775 20240814-11:34:30 test_accuracy 102 model 2 val 754 / 793 20240814-11:34:33 wrote gpt_004.pth 20240814-11:34:33 wrote gpt_002.pth 20240814-11:34:48 wrote non_validated_0102_04.png 20240814-11:35:02 wrote non_validated_0102_02.png 20240814-11:35:02 wrote state.pth 20240814-11:35:02 --- epoch 103 ---------------------------------------- 20240814-11:35:02 current_test_accuracies 0.9516 0.9552 0.9508 0.9577 0.9303 20240814-11:35:02 training model 4 20240814-11:35:02 training model 2 20240814-11:38:45 train_perplexity 103 model 2 1.1543815274658362 20240814-11:38:45 train_perplexity 103 model 4 1.153278260198419 20240814-11:38:49 test_perplexity 103 model 2 1.1542178409935433 20240814-11:38:49 test_perplexity 103 model 4 1.1529411178555087 20240814-11:40:11 test_accuracy 103 model 4 val 784 / 812 20240814-11:40:14 test_accuracy 103 model 2 val 758 / 798 20240814-11:40:16 wrote gpt_004.pth 20240814-11:40:17 wrote gpt_002.pth 20240814-11:40:31 wrote non_validated_0103_04.png 20240814-11:40:46 wrote non_validated_0103_02.png 20240814-11:40:46 wrote state.pth 20240814-11:40:46 --- epoch 104 ---------------------------------------- 20240814-11:40:46 current_test_accuracies 0.9516 0.9552 0.9499 0.9577 0.9655 20240814-11:40:46 training model 2 20240814-11:40:46 training model 0 20240814-11:44:28 train_perplexity 104 model 0 1.1540037414452953 20240814-11:44:28 train_perplexity 104 model 2 1.1544367472989678 20240814-11:44:32 test_perplexity 104 model 0 1.1504399780161096 20240814-11:44:32 test_perplexity 104 model 2 1.152901223848584 20240814-11:45:57 test_accuracy 104 model 0 val 735 / 783 20240814-11:45:58 test_accuracy 104 model 2 val 742 / 786 20240814-11:46:00 wrote gpt_002.pth 20240814-11:46:01 wrote gpt_000.pth 20240814-11:46:15 wrote non_validated_0104_02.png 20240814-11:46:30 wrote non_validated_0104_00.png 20240814-11:46:30 wrote state.pth 20240814-11:46:30 --- epoch 105 ---------------------------------------- 20240814-11:46:30 current_test_accuracies 0.9387 0.9552 0.9440 0.9577 0.9655 20240814-11:46:30 training model 0 20240814-11:46:30 training model 2 20240814-11:50:13 train_perplexity 105 model 0 1.154081951468069 20240814-11:50:13 train_perplexity 105 model 2 1.1542323338389568 20240814-11:50:16 test_perplexity 105 model 0 1.1514340413777229 20240814-11:50:16 test_perplexity 105 model 2 1.153011007367211 20240814-11:51:40 test_accuracy 105 model 2 val 774 / 806 20240814-11:51:42 test_accuracy 105 model 0 val 747 / 792 20240814-11:51:44 wrote gpt_000.pth 20240814-11:51:45 wrote gpt_002.pth 20240814-11:51:59 wrote non_validated_0105_00.png 20240814-11:52:14 wrote non_validated_0105_02.png 20240814-11:52:14 wrote state.pth 20240814-11:52:14 --- epoch 106 ---------------------------------------- 20240814-11:52:14 current_test_accuracies 0.9432 0.9552 0.9603 0.9577 0.9655 20240814-11:52:14 training model 0 20240814-11:52:14 training model 1 20240814-11:55:57 train_perplexity 106 model 1 1.1542661417958782 20240814-11:55:57 train_perplexity 106 model 0 1.1538000128144514 20240814-11:56:00 test_perplexity 106 model 1 1.1518338072004939 20240814-11:56:00 test_perplexity 106 model 0 1.1510561633537213 20240814-11:57:23 test_accuracy 106 model 1 val 767 / 807 20240814-11:57:23 test_accuracy 106 model 0 val 769 / 807 20240814-11:57:26 wrote gpt_000.pth 20240814-11:57:27 wrote gpt_001.pth 20240814-11:57:41 wrote non_validated_0106_00.png 20240814-11:57:55 wrote non_validated_0106_01.png 20240814-11:57:55 wrote state.pth 20240814-11:57:55 --- epoch 107 ---------------------------------------- 20240814-11:57:55 current_test_accuracies 0.9529 0.9504 0.9603 0.9577 0.9655 20240814-12:25:03 keep c_quizzes model 1 validated nb_validated 1780 / 5125 (finishes Wed 13:16 -- 3936/h) proportion_kept 34.73% 20240814-12:52:04 keep c_quizzes model 4 validated nb_validated 3649 / 5125 (finishes Wed 13:13 -- 4044/h) proportion_kept 35.60% 20240814-13:19:05 keep c_quizzes model 2 validated nb_validated 5544 / 5125 (finishes now! -- 4098/h) proportion_kept 36.06% 20240814-13:19:05 teacher model 0 to [0, 1446, 844, 946, 968] 20240814-13:19:05 teacher model 1 to [236, 0, 63, 84, 81] 20240814-13:19:05 teacher model 2 to [254, 256, 0, 102, 70] 20240814-13:19:05 teacher model 3 to [64, 64, 65, 0, 0] 20240814-13:19:05 teacher model 4 to [1, 0, 0, 0, 0] 20240814-13:19:08 nb_c_quizzes model 0 train 6089 test 156 20240814-13:19:12 nb_c_quizzes model 1 train 7405 test 189 20240814-13:19:16 nb_c_quizzes model 2 train 7839 test 197 20240814-13:19:19 nb_c_quizzes model 3 train 7472 test 190 20240814-13:19:23 nb_c_quizzes model 4 train 6745 test 172 20240814-13:19:23 training model 0 20240814-13:19:23 training model 1 20240814-13:23:05 train_perplexity 107 model 0 1.1546416160970883 20240814-13:23:05 train_perplexity 107 model 1 1.1570113225114014 20240814-13:23:09 test_perplexity 107 model 0 1.152230170159787 20240814-13:23:09 test_perplexity 107 model 1 1.1569045361839196 20240814-13:24:33 test_accuracy 107 model 0 val 781 / 818 20240814-13:24:35 test_accuracy 107 model 1 val 746 / 799 20240814-13:24:37 wrote gpt_000.pth 20240814-13:24:38 wrote gpt_001.pth 20240814-13:24:52 wrote non_validated_0107_00.png 20240814-13:25:07 wrote non_validated_0107_01.png 20240814-13:25:07 wrote state.pth 20240814-13:25:07 --- epoch 108 ---------------------------------------- 20240814-13:25:07 current_test_accuracies 0.9548 0.9337 0.0000 0.0000 0.0000 20240814-13:25:07 training model 2 20240814-13:25:07 training model 3 20240814-13:28:48 train_perplexity 108 model 3 1.1556400090066894 20240814-13:28:50 train_perplexity 108 model 2 1.1553889399617934 20240814-13:28:52 test_perplexity 108 model 3 1.1554026509154056 20240814-13:28:53 test_perplexity 108 model 2 1.153055140708602 20240814-13:30:15 test_accuracy 108 model 3 val 757 / 804 20240814-13:30:15 test_accuracy 108 model 2 val 755 / 800 20240814-13:30:18 wrote gpt_002.pth 20240814-13:30:18 wrote gpt_003.pth 20240814-13:30:33 wrote non_validated_0108_02.png 20240814-13:30:47 wrote non_validated_0108_03.png 20240814-13:30:47 wrote state.pth 20240814-13:30:47 --- epoch 109 ---------------------------------------- 20240814-13:30:47 current_test_accuracies 0.9548 0.9337 0.9438 0.9415 0.0000 20240814-13:30:47 training model 4 20240814-13:30:47 training model 1 20240814-13:34:29 train_perplexity 109 model 1 1.1560659515390703 20240814-13:34:31 train_perplexity 109 model 4 1.1546794785300658 20240814-13:34:33 test_perplexity 109 model 1 1.153941388160019 20240814-13:34:34 test_perplexity 109 model 4 1.1548247030545975 20240814-13:35:56 test_accuracy 109 model 1 val 767 / 808 20240814-13:35:58 test_accuracy 109 model 4 val 769 / 819 20240814-13:36:00 wrote gpt_004.pth 20240814-13:36:01 wrote gpt_001.pth 20240814-13:36:15 wrote non_validated_0109_04.png 20240814-13:36:30 wrote non_validated_0109_01.png 20240814-13:36:30 wrote state.pth 20240814-13:36:30 --- epoch 110 ---------------------------------------- 20240814-13:36:30 current_test_accuracies 0.9548 0.9493 0.9438 0.9415 0.9389 20240814-13:36:30 training model 4 20240814-13:36:30 training model 3 20240814-13:40:12 train_perplexity 110 model 3 1.155297562832056 20240814-13:40:12 train_perplexity 110 model 4 1.154608060922202 20240814-13:40:16 test_perplexity 110 model 4 1.1519337804254715 20240814-13:40:16 test_perplexity 110 model 3 1.1555894033089789 20240814-13:41:40 test_accuracy 110 model 4 val 777 / 816 20240814-13:41:40 test_accuracy 110 model 3 val 759 / 808 20240814-13:41:43 wrote gpt_004.pth 20240814-13:41:43 wrote gpt_003.pth 20240814-13:41:58 wrote non_validated_0110_04.png 20240814-13:42:12 wrote non_validated_0110_03.png 20240814-13:42:12 wrote state.pth 20240814-13:42:12 --- epoch 111 ---------------------------------------- 20240814-13:42:12 current_test_accuracies 0.9548 0.9493 0.9438 0.9394 0.9522 20240814-13:42:12 training model 3 20240814-13:42:12 training model 2 20240814-13:45:54 train_perplexity 111 model 3 1.155394923576378 20240814-13:45:54 train_perplexity 111 model 2 1.155448777356866 20240814-13:45:58 test_perplexity 111 model 3 1.1541841190419788 20240814-13:45:58 test_perplexity 111 model 2 1.1563082996752903 20240814-13:47:21 test_accuracy 111 model 3 val 758 / 802 20240814-13:47:21 test_accuracy 111 model 2 val 778 / 812 20240814-13:47:23 wrote gpt_003.pth 20240814-13:47:24 wrote gpt_002.pth 20240814-13:47:39 wrote non_validated_0111_03.png 20240814-13:47:53 wrote non_validated_0111_02.png 20240814-13:47:53 wrote state.pth 20240814-13:47:53 --- epoch 112 ---------------------------------------- 20240814-13:47:53 current_test_accuracies 0.9548 0.9493 0.9581 0.9451 0.9522 20240814-13:47:53 training model 3 20240814-13:47:53 training model 1 20240814-13:51:35 train_perplexity 112 model 3 1.1548133324290666 20240814-13:51:35 train_perplexity 112 model 1 1.1561182937541035 20240814-13:51:39 test_perplexity 112 model 3 1.1545764864030637 20240814-13:51:39 test_perplexity 112 model 1 1.1541082949015256 20240814-13:53:02 test_accuracy 112 model 3 val 763 / 805 20240814-13:53:05 test_accuracy 112 model 1 val 755 / 792 20240814-13:53:07 wrote gpt_003.pth 20240814-13:53:08 wrote gpt_001.pth 20240814-13:53:22 wrote non_validated_0112_03.png 20240814-13:53:37 wrote non_validated_0112_01.png 20240814-13:53:37 wrote state.pth 20240814-13:53:37 --- epoch 113 ---------------------------------------- 20240814-13:53:37 current_test_accuracies 0.9548 0.9533 0.9581 0.9478 0.9522 20240814-13:53:37 training model 3 20240814-13:53:37 training model 4 20240814-13:57:19 train_perplexity 113 model 3 1.155156741060225 20240814-13:57:19 train_perplexity 113 model 4 1.15460555193041 20240814-13:57:23 test_perplexity 113 model 3 1.153905865848499 20240814-13:57:23 test_perplexity 113 model 4 1.1531779859569786 20240814-13:58:46 test_accuracy 113 model 3 val 780 / 821 20240814-13:58:48 test_accuracy 113 model 4 val 741 / 793 20240814-13:58:50 wrote gpt_003.pth 20240814-13:58:51 wrote gpt_004.pth 20240814-13:59:05 wrote non_validated_0113_03.png 20240814-13:59:20 wrote non_validated_0113_04.png 20240814-13:59:20 wrote state.pth 20240814-13:59:20 --- epoch 114 ---------------------------------------- 20240814-13:59:20 current_test_accuracies 0.9548 0.9533 0.9581 0.9501 0.9344 20240814-13:59:20 training model 4 20240814-13:59:20 training model 3 20240814-14:03:02 train_perplexity 114 model 4 1.1550906689120093 20240814-14:03:02 train_perplexity 114 model 3 1.1550253348055863 20240814-14:03:06 test_perplexity 114 model 4 1.1563375224796437 20240814-14:03:06 test_perplexity 114 model 3 1.155129430004665 20240814-14:04:28 test_accuracy 114 model 3 val 754 / 812 20240814-14:04:30 test_accuracy 114 model 4 val 735 / 776 20240814-14:04:32 wrote gpt_004.pth 20240814-14:04:33 wrote gpt_003.pth 20240814-14:04:47 wrote non_validated_0114_04.png 20240814-14:05:02 wrote non_validated_0114_03.png 20240814-14:05:02 wrote state.pth 20240814-14:05:02 --- epoch 115 ---------------------------------------- 20240814-14:05:02 current_test_accuracies 0.9548 0.9533 0.9581 0.9286 0.9472 20240814-14:05:02 training model 3 20240814-14:05:02 training model 4 20240814-14:08:44 train_perplexity 115 model 3 1.1551158685732377 20240814-14:08:44 train_perplexity 115 model 4 1.1546848814084345 20240814-14:08:48 test_perplexity 115 model 3 1.1542633793169321 20240814-14:08:48 test_perplexity 115 model 4 1.1520450230552544 20240814-14:10:12 test_accuracy 115 model 3 val 786 / 822 20240814-14:10:14 test_accuracy 115 model 4 val 743 / 790 20240814-14:10:16 wrote gpt_003.pth 20240814-14:10:17 wrote gpt_004.pth 20240814-14:10:32 wrote non_validated_0115_03.png 20240814-14:10:46 wrote non_validated_0115_04.png 20240814-14:10:46 wrote state.pth 20240814-14:10:46 --- epoch 116 ---------------------------------------- 20240814-14:10:46 current_test_accuracies 0.9548 0.9533 0.9581 0.9562 0.9405 20240814-14:10:46 training model 4 20240814-14:10:46 training model 1 20240814-14:14:28 train_perplexity 116 model 4 1.153958273097448 20240814-14:14:28 train_perplexity 116 model 1 1.1564687120999095 20240814-14:14:32 test_perplexity 116 model 4 1.1522489578975725 20240814-14:14:32 test_perplexity 116 model 1 1.1534563596897824 20240814-14:15:57 test_accuracy 116 model 4 val 758 / 793 20240814-14:15:57 test_accuracy 116 model 1 val 731 / 781 20240814-14:15:59 wrote gpt_004.pth 20240814-14:16:00 wrote gpt_001.pth 20240814-14:16:14 wrote non_validated_0116_04.png 20240814-14:16:29 wrote non_validated_0116_01.png 20240814-14:16:29 wrote state.pth 20240814-14:16:29 --- epoch 117 ---------------------------------------- 20240814-14:16:29 current_test_accuracies 0.9548 0.9360 0.9581 0.9562 0.9559 20240814-14:16:29 training model 1 20240814-14:16:29 training model 0 20240814-14:20:11 train_perplexity 117 model 1 1.1562452075824496 20240814-14:20:11 train_perplexity 117 model 0 1.1542239763626383 20240814-14:20:15 test_perplexity 117 model 1 1.1532561374544033 20240814-14:20:15 test_perplexity 117 model 0 1.1531642738826333 20240814-14:21:37 test_accuracy 117 model 0 val 787 / 818 20240814-14:21:37 test_accuracy 117 model 1 val 758 / 805 20240814-14:21:40 wrote gpt_001.pth 20240814-14:21:41 wrote gpt_000.pth 20240814-14:21:55 wrote non_validated_0117_01.png 20240814-14:22:10 wrote non_validated_0117_00.png 20240814-14:22:10 wrote state.pth 20240814-14:22:10 --- epoch 118 ---------------------------------------- 20240814-14:22:10 current_test_accuracies 0.9621 0.9416 0.9581 0.9562 0.9559 20240814-14:22:10 training model 1 20240814-14:22:10 training model 4 20240814-14:25:51 train_perplexity 118 model 4 1.1542125020829144 20240814-14:25:53 train_perplexity 118 model 1 1.156350392376272 20240814-14:25:55 test_perplexity 118 model 4 1.1524217314937772 20240814-14:25:56 test_perplexity 118 model 1 1.1538123133038471 20240814-14:27:21 test_accuracy 118 model 1 val 754 / 795 20240814-14:27:22 test_accuracy 118 model 4 val 736 / 781 20240814-14:27:24 wrote gpt_001.pth 20240814-14:27:25 wrote gpt_004.pth 20240814-14:27:39 wrote non_validated_0118_01.png 20240814-14:27:53 wrote non_validated_0118_04.png 20240814-14:27:53 wrote state.pth 20240814-14:27:53 --- epoch 119 ---------------------------------------- 20240814-14:27:53 current_test_accuracies 0.9621 0.9484 0.9581 0.9562 0.9424 20240814-14:27:53 training model 4 20240814-14:27:53 training model 1 20240814-14:31:36 train_perplexity 119 model 1 1.1562934943289462 20240814-14:31:36 train_perplexity 119 model 4 1.1538953062800972 20240814-14:31:39 test_perplexity 119 model 1 1.1555083490767804 20240814-14:31:40 test_perplexity 119 model 4 1.1547753805911838 20240814-14:33:04 test_accuracy 119 model 1 val 760 / 814 20240814-14:33:06 test_accuracy 119 model 4 val 723 / 789 20240814-14:33:08 wrote gpt_004.pth 20240814-14:33:09 wrote gpt_001.pth 20240814-14:33:23 wrote non_validated_0119_04.png 20240814-14:33:38 wrote non_validated_0119_01.png 20240814-14:33:38 wrote state.pth 20240814-14:33:38 --- epoch 120 ---------------------------------------- 20240814-14:33:38 current_test_accuracies 0.9621 0.9337 0.9581 0.9562 0.9163 20240814-14:33:38 training model 4 20240814-14:33:38 training model 1 20240814-14:37:20 train_perplexity 120 model 4 1.1541010180590625 20240814-14:37:20 train_perplexity 120 model 1 1.155949090088602 20240814-14:37:23 test_perplexity 120 model 4 1.1509098311654817 20240814-14:37:24 test_perplexity 120 model 1 1.1544766491743244 20240814-14:38:48 test_accuracy 120 model 1 val 750 / 784 20240814-14:38:49 test_accuracy 120 model 4 val 748 / 785 20240814-14:38:51 wrote gpt_004.pth 20240814-14:38:52 wrote gpt_001.pth 20240814-14:39:06 wrote non_validated_0120_04.png 20240814-14:39:21 wrote non_validated_0120_01.png 20240814-14:39:21 wrote state.pth 20240814-14:39:21 --- epoch 121 ---------------------------------------- 20240814-14:39:21 current_test_accuracies 0.9621 0.9566 0.9581 0.9562 0.9529 20240814-15:06:26 keep c_quizzes model 2 validated nb_validated 2031 / 5125 (finishes Wed 15:47 -- 4498/h) proportion_kept 39.63% 20240814-15:33:18 keep c_quizzes model 3 validated nb_validated 4072 / 5125 (finishes Wed 15:47 -- 4528/h) proportion_kept 39.73% 20240814-16:00:17 keep c_quizzes model 3 validated nb_validated 6141 / 5125 (finishes now! -- 4552/h) proportion_kept 39.94% 20240814-16:00:17 teacher model 0 to [0, 1151, 534, 796, 1073] 20240814-16:00:17 teacher model 1 to [548, 0, 97, 173, 210] 20240814-16:00:17 teacher model 2 to [495, 495, 0, 135, 221] 20240814-16:00:17 teacher model 3 to [70, 71, 71, 0, 1] 20240814-16:00:17 teacher model 4 to [0, 0, 0, 0, 0] 20240814-16:00:21 nb_c_quizzes model 0 train 7174 test 184 20240814-16:00:24 nb_c_quizzes model 1 train 9080 test 231 20240814-16:00:28 nb_c_quizzes model 2 train 8523 test 215 20240814-16:00:32 nb_c_quizzes model 3 train 8549 test 217 20240814-16:00:35 nb_c_quizzes model 4 train 8213 test 209 20240814-16:00:35 training model 0 20240814-16:00:35 training model 1 20240814-16:04:18 train_perplexity 121 model 0 1.1561476474696404 20240814-16:04:18 train_perplexity 121 model 1 1.1588769892308914 20240814-16:04:21 test_perplexity 121 model 0 1.1518267456628186 20240814-16:04:21 test_perplexity 121 model 1 1.160348273998495 20240814-16:05:44 test_accuracy 121 model 1 val 760 / 814 20240814-16:05:47 test_accuracy 121 model 0 val 758 / 798 20240814-16:05:49 wrote gpt_000.pth 20240814-16:05:49 wrote gpt_001.pth 20240814-16:06:04 wrote non_validated_0121_00.png 20240814-16:06:18 wrote non_validated_0121_01.png 20240814-16:06:18 wrote state.pth 20240814-16:06:18 --- epoch 122 ---------------------------------------- 20240814-16:06:18 current_test_accuracies 0.9499 0.9337 0.0000 0.0000 0.0000 20240814-16:06:18 training model 2 20240814-16:06:18 training model 3 20240814-16:10:01 train_perplexity 122 model 3 1.1569332821016138 20240814-16:10:01 train_perplexity 122 model 2 1.1565401040672016 20240814-16:10:04 test_perplexity 122 model 3 1.1549402711292955 20240814-16:10:05 test_perplexity 122 model 2 1.1548676839877112 20240814-16:11:28 test_accuracy 122 model 3 val 772 / 817 20240814-16:11:29 test_accuracy 122 model 2 val 760 / 794 20240814-16:11:31 wrote gpt_002.pth 20240814-16:11:32 wrote gpt_003.pth 20240814-16:11:46 wrote non_validated_0122_02.png 20240814-16:12:01 wrote non_validated_0122_03.png 20240814-16:12:01 wrote state.pth 20240814-16:12:01 --- epoch 123 ---------------------------------------- 20240814-16:12:01 current_test_accuracies 0.9499 0.9337 0.9572 0.9449 0.0000 20240814-16:12:01 training model 4 20240814-16:12:01 training model 1 20240814-16:15:43 train_perplexity 123 model 4 1.1569475603959245 20240814-16:15:43 train_perplexity 123 model 1 1.1588423242880794 20240814-16:15:47 test_perplexity 123 model 4 1.1553431816028155 20240814-16:15:47 test_perplexity 123 model 1 1.1571893304080088 20240814-16:17:10 test_accuracy 123 model 4 val 775 / 812 20240814-16:17:12 test_accuracy 123 model 1 val 741 / 781 20240814-16:17:14 wrote gpt_004.pth 20240814-16:17:14 wrote gpt_001.pth 20240814-16:17:29 wrote non_validated_0123_04.png 20240814-16:17:43 wrote non_validated_0123_01.png 20240814-16:17:43 wrote state.pth 20240814-16:17:43 --- epoch 124 ---------------------------------------- 20240814-16:17:43 current_test_accuracies 0.9499 0.9488 0.9572 0.9449 0.9544 20240814-16:17:43 training model 3 20240814-16:17:43 training model 1 20240814-16:21:25 train_perplexity 124 model 3 1.1565097244063811 20240814-16:21:25 train_perplexity 124 model 1 1.1585454009775453 20240814-16:21:29 test_perplexity 124 model 3 1.1530455691633814 20240814-16:21:29 test_perplexity 124 model 1 1.15623349422061 20240814-16:22:52 test_accuracy 124 model 3 val 776 / 807 20240814-16:22:55 test_accuracy 124 model 1 val 748 / 796 20240814-16:22:56 wrote gpt_003.pth 20240814-16:22:57 wrote gpt_001.pth 20240814-16:23:12 wrote non_validated_0124_03.png 20240814-16:23:26 wrote non_validated_0124_01.png 20240814-16:23:26 wrote state.pth 20240814-16:23:26 --- epoch 125 ---------------------------------------- 20240814-16:23:26 current_test_accuracies 0.9499 0.9397 0.9572 0.9616 0.9544 20240814-16:23:26 training model 1 20240814-16:23:26 training model 0 20240814-16:27:08 train_perplexity 125 model 1 1.1592021511160089 20240814-16:27:08 train_perplexity 125 model 0 1.1561537871047036 20240814-16:27:12 test_perplexity 125 model 1 1.1559759940606984 20240814-16:27:12 test_perplexity 125 model 0 1.1557133344218395 20240814-16:28:36 test_accuracy 125 model 0 val 769 / 816 20240814-16:28:36 test_accuracy 125 model 1 val 758 / 801 20240814-16:28:39 wrote gpt_001.pth 20240814-16:28:39 wrote gpt_000.pth 20240814-16:28:54 wrote non_validated_0125_01.png 20240814-16:29:08 wrote non_validated_0125_00.png 20240814-16:29:08 wrote state.pth 20240814-16:29:08 --- epoch 126 ---------------------------------------- 20240814-16:29:08 current_test_accuracies 0.9424 0.9463 0.9572 0.9616 0.9544 20240814-16:29:08 training model 0 20240814-16:29:08 training model 1 20240814-16:32:51 train_perplexity 126 model 0 1.1557778033854238 20240814-16:32:51 train_perplexity 126 model 1 1.1584881304965058 20240814-16:32:55 test_perplexity 126 model 0 1.1536447924693107 20240814-16:32:55 test_perplexity 126 model 1 1.1559328446316093 20240814-16:34:19 test_accuracy 126 model 0 val 770 / 816 20240814-16:34:21 test_accuracy 126 model 1 val 744 / 793 20240814-16:34:23 wrote gpt_000.pth 20240814-16:34:24 wrote gpt_001.pth 20240814-16:34:38 wrote non_validated_0126_00.png 20240814-16:34:53 wrote non_validated_0126_01.png 20240814-16:34:53 wrote state.pth 20240814-16:34:53 --- epoch 127 ---------------------------------------- 20240814-16:34:53 current_test_accuracies 0.9436 0.9382 0.9572 0.9616 0.9544 20240814-16:34:53 training model 1 20240814-16:34:53 training model 0 20240814-16:38:35 train_perplexity 127 model 0 1.1558520298678514 20240814-16:38:36 train_perplexity 127 model 1 1.1584431962355501 20240814-16:38:39 test_perplexity 127 model 0 1.155597109762139 20240814-16:38:39 test_perplexity 127 model 1 1.158308472033217 20240814-16:40:02 test_accuracy 127 model 1 val 772 / 808 20240814-16:40:04 test_accuracy 127 model 0 val 773 / 810 20240814-16:40:05 wrote gpt_001.pth 20240814-16:40:06 wrote gpt_000.pth 20240814-16:40:21 wrote non_validated_0127_01.png 20240814-16:40:35 wrote non_validated_0127_00.png 20240814-16:40:35 wrote state.pth 20240814-16:40:35 --- epoch 128 ---------------------------------------- 20240814-16:40:35 current_test_accuracies 0.9543 0.9554 0.9572 0.9616 0.9544 20240814-17:07:50 keep c_quizzes model 4 validated nb_validated 2011 / 5125 (finishes Wed 17:50 -- 4429/h) proportion_kept 39.24% 20240814-17:34:48 keep c_quizzes model 4 validated nb_validated 3898 / 5125 (finishes Wed 17:51 -- 4313/h) proportion_kept 38.03% 20240814-18:01:56 keep c_quizzes model 4 validated nb_validated 5791 / 5125 (finishes now! -- 4270/h) proportion_kept 37.67% 20240814-18:01:56 teacher model 0 to [0, 613, 1120, 457, 686] 20240814-18:01:56 teacher model 1 to [898, 0, 545, 208, 355] 20240814-18:01:56 teacher model 2 to [166, 170, 0, 47, 59] 20240814-18:01:56 teacher model 3 to [154, 153, 156, 0, 4] 20240814-18:01:56 teacher model 4 to [0, 0, 0, 0, 0] 20240814-18:02:00 nb_c_quizzes model 0 train 8362 test 214 20240814-18:02:04 nb_c_quizzes model 1 train 9993 test 254 20240814-18:02:07 nb_c_quizzes model 2 train 10299 test 260 20240814-18:02:11 nb_c_quizzes model 3 train 9243 test 235 20240814-18:02:14 nb_c_quizzes model 4 train 9290 test 236 20240814-18:02:14 training model 0 20240814-18:02:14 training model 1 20240814-18:05:58 train_perplexity 128 model 0 1.159034449629181 20240814-18:05:58 train_perplexity 128 model 1 1.1604928536561154 20240814-18:06:01 test_perplexity 128 model 0 1.1552758209779155 20240814-18:06:01 test_perplexity 128 model 1 1.159769185569271 20240814-18:07:25 test_accuracy 128 model 1 val 761 / 812 20240814-18:07:27 test_accuracy 128 model 0 val 742 / 789 20240814-18:07:29 wrote gpt_000.pth 20240814-18:07:30 wrote gpt_001.pth 20240814-18:07:44 wrote non_validated_0128_00.png 20240814-18:07:59 wrote non_validated_0128_01.png 20240814-18:07:59 wrote state.pth 20240814-18:07:59 --- epoch 129 ---------------------------------------- 20240814-18:07:59 current_test_accuracies 0.9404 0.9372 0.0000 0.0000 0.0000 20240814-18:07:59 training model 2 20240814-18:07:59 training model 3 20240814-18:11:41 train_perplexity 129 model 2 1.1608735619895452 20240814-18:11:41 train_perplexity 129 model 3 1.158401088993521 20240814-18:11:45 test_perplexity 129 model 2 1.159804244608934 20240814-18:11:45 test_perplexity 129 model 3 1.1529366392108709 20240814-18:13:08 test_accuracy 129 model 2 val 758 / 810 20240814-18:13:10 test_accuracy 129 model 3 val 740 / 783 20240814-18:13:12 wrote gpt_002.pth 20240814-18:13:13 wrote gpt_003.pth 20240814-18:13:27 wrote non_validated_0129_02.png 20240814-18:13:41 wrote non_validated_0129_03.png 20240814-18:13:41 wrote state.pth 20240814-18:13:41 --- epoch 130 ---------------------------------------- 20240814-18:13:41 current_test_accuracies 0.9404 0.9372 0.9358 0.9451 0.0000 20240814-18:13:41 training model 4 20240814-18:13:41 training model 2 20240814-18:17:24 train_perplexity 130 model 2 1.1607820649095038 20240814-18:17:24 train_perplexity 130 model 4 1.1592168313866145 20240814-18:17:28 test_perplexity 130 model 2 1.1602055684139112 20240814-18:17:28 test_perplexity 130 model 4 1.1571773822574278 20240814-18:18:55 test_accuracy 130 model 2 val 737 / 798 20240814-18:18:55 test_accuracy 130 model 4 val 753 / 794 20240814-18:18:57 wrote gpt_004.pth 20240814-18:18:58 wrote gpt_002.pth 20240814-18:19:13 wrote non_validated_0130_04.png 20240814-18:19:27 wrote non_validated_0130_02.png 20240814-18:19:27 wrote state.pth 20240814-18:19:27 --- epoch 131 ---------------------------------------- 20240814-18:19:27 current_test_accuracies 0.9404 0.9372 0.9236 0.9451 0.9484 20240814-18:19:27 training model 2 20240814-18:19:27 training model 1 20240814-18:23:09 train_perplexity 131 model 1 1.1601546377565337 20240814-18:23:10 train_perplexity 131 model 2 1.1607473905842243 20240814-18:23:13 test_perplexity 131 model 1 1.1597204304443818 20240814-18:23:14 test_perplexity 131 model 2 1.1573539232237522 20240814-18:24:36 test_accuracy 131 model 2 val 771 / 816 20240814-18:24:38 test_accuracy 131 model 1 val 742 / 787 20240814-18:24:40 wrote gpt_002.pth 20240814-18:24:41 wrote gpt_001.pth 20240814-18:24:55 wrote non_validated_0131_02.png 20240814-18:25:10 wrote non_validated_0131_01.png 20240814-18:25:10 wrote state.pth 20240814-18:25:10 --- epoch 132 ---------------------------------------- 20240814-18:25:10 current_test_accuracies 0.9404 0.9428 0.9449 0.9451 0.9484 20240814-18:25:10 training model 0 20240814-18:25:10 training model 1 20240814-18:28:52 train_perplexity 132 model 0 1.158540069962573 20240814-18:28:52 train_perplexity 132 model 1 1.1598100923066914 20240814-18:28:56 test_perplexity 132 model 0 1.1569287605598726 20240814-18:28:56 test_perplexity 132 model 1 1.1568695143746393 20240814-18:30:20 test_accuracy 132 model 1 val 775 / 815 20240814-18:30:21 test_accuracy 132 model 0 val 758 / 799 20240814-18:30:23 wrote gpt_000.pth 20240814-18:30:24 wrote gpt_001.pth 20240814-18:30:38 wrote non_validated_0132_00.png 20240814-18:30:53 wrote non_validated_0132_01.png 20240814-18:30:53 wrote state.pth 20240814-18:30:53 --- epoch 133 ---------------------------------------- 20240814-18:30:53 current_test_accuracies 0.9487 0.9509 0.9449 0.9451 0.9484 20240814-18:30:53 training model 2 20240814-18:30:53 training model 3 20240814-18:34:34 train_perplexity 133 model 3 1.1579726281927951 20240814-18:34:35 train_perplexity 133 model 2 1.1608206913816348 20240814-18:34:38 test_perplexity 133 model 3 1.1554549478234826 20240814-18:34:38 test_perplexity 133 model 2 1.1611691030321791 20240814-18:36:01 test_accuracy 133 model 3 val 767 / 805 20240814-18:36:03 test_accuracy 133 model 2 val 769 / 819 20240814-18:36:05 wrote gpt_002.pth 20240814-18:36:05 wrote gpt_003.pth 20240814-18:36:20 wrote non_validated_0133_02.png 20240814-18:36:34 wrote non_validated_0133_03.png 20240814-18:36:34 wrote state.pth 20240814-18:36:34 --- epoch 134 ---------------------------------------- 20240814-18:36:34 current_test_accuracies 0.9487 0.9509 0.9389 0.9528 0.9484 20240814-18:36:34 training model 2 20240814-18:36:34 training model 4 20240814-18:40:16 train_perplexity 134 model 4 1.1591331118886519 20240814-18:40:16 train_perplexity 134 model 2 1.1605777555581673 20240814-18:40:20 test_perplexity 134 model 4 1.157549689132685 20240814-18:40:20 test_perplexity 134 model 2 1.158481280080177 20240814-18:41:44 test_accuracy 134 model 2 val 771 / 810 20240814-18:41:45 test_accuracy 134 model 4 val 743 / 779 20240814-18:41:47 wrote gpt_002.pth 20240814-18:41:48 wrote gpt_004.pth 20240814-18:42:02 wrote non_validated_0134_02.png 20240814-18:42:16 wrote non_validated_0134_04.png 20240814-18:42:16 wrote state.pth 20240814-18:42:16 --- epoch 135 ---------------------------------------- 20240814-18:42:16 current_test_accuracies 0.9487 0.9509 0.9519 0.9528 0.9538 20240814-18:42:16 training model 0 20240814-18:42:16 training model 1 20240814-18:45:59 train_perplexity 135 model 0 1.1578738876663537 20240814-18:45:59 train_perplexity 135 model 1 1.159959404632028 20240814-18:46:03 test_perplexity 135 model 0 1.1556215263182323 20240814-18:46:03 test_perplexity 135 model 1 1.1568275160633799 20240814-18:47:25 test_accuracy 135 model 0 val 763 / 811 20240814-18:47:25 test_accuracy 135 model 1 val 761 / 801 20240814-18:47:28 wrote gpt_000.pth 20240814-18:47:28 wrote gpt_001.pth 20240814-18:47:43 wrote non_validated_0135_00.png 20240814-18:47:57 wrote non_validated_0135_01.png 20240814-18:47:57 wrote state.pth 20240814-18:47:57 --- epoch 136 ---------------------------------------- 20240814-18:47:57 current_test_accuracies 0.9408 0.9501 0.9519 0.9528 0.9538 20240814-18:47:57 training model 0 20240814-18:47:57 training model 1 20240814-18:51:39 train_perplexity 136 model 1 1.1598670878732942 20240814-18:51:40 train_perplexity 136 model 0 1.1578085166135237 20240814-18:51:43 test_perplexity 136 model 1 1.1594468497786634 20240814-18:51:43 test_perplexity 136 model 0 1.1566877314922017 20240814-18:53:06 test_accuracy 136 model 1 val 761 / 805 20240814-18:53:06 test_accuracy 136 model 0 val 779 / 815 20240814-18:53:09 wrote gpt_000.pth 20240814-18:53:09 wrote gpt_001.pth 20240814-18:53:24 wrote non_validated_0136_00.png 20240814-18:53:38 wrote non_validated_0136_01.png 20240814-18:53:38 wrote state.pth 20240814-18:53:38 --- epoch 137 ---------------------------------------- 20240814-18:53:38 current_test_accuracies 0.9558 0.9453 0.9519 0.9528 0.9538 20240814-18:53:38 training model 1 20240814-18:53:38 training model 2 20240814-18:57:20 train_perplexity 137 model 2 1.160551389032661 20240814-18:57:21 train_perplexity 137 model 1 1.159752746330601 20240814-18:57:24 test_perplexity 137 model 2 1.1572142341188498 20240814-18:57:24 test_perplexity 137 model 1 1.1603804110672433 20240814-18:58:44 test_accuracy 137 model 1 val 788 / 831 20240814-18:58:48 test_accuracy 137 model 2 val 755 / 797 20240814-18:58:50 wrote gpt_001.pth 20240814-18:58:51 wrote gpt_002.pth 20240814-18:59:05 wrote non_validated_0137_01.png 20240814-18:59:20 wrote non_validated_0137_02.png 20240814-18:59:20 wrote state.pth 20240814-18:59:20 --- epoch 138 ---------------------------------------- 20240814-18:59:20 current_test_accuracies 0.9558 0.9483 0.9473 0.9528 0.9538 20240814-18:59:20 training model 2 20240814-18:59:20 training model 1 20240814-19:03:02 train_perplexity 138 model 2 1.1603936402879187 20240814-19:03:02 train_perplexity 138 model 1 1.1597250563341612 20240814-19:03:05 test_perplexity 138 model 2 1.1584210278621256 20240814-19:03:06 test_perplexity 138 model 1 1.1591890107580969 20240814-19:04:28 test_accuracy 138 model 1 val 754 / 805 20240814-19:04:29 test_accuracy 138 model 2 val 771 / 820 20240814-19:04:31 wrote gpt_002.pth 20240814-19:04:32 wrote gpt_001.pth 20240814-19:04:47 wrote non_validated_0138_02.png 20240814-19:05:01 wrote non_validated_0138_01.png 20240814-19:05:01 wrote state.pth 20240814-19:05:01 --- epoch 139 ---------------------------------------- 20240814-19:05:01 current_test_accuracies 0.9558 0.9366 0.9402 0.9528 0.9538 20240814-19:05:01 training model 1 20240814-19:05:01 training model 2 20240814-19:08:43 train_perplexity 139 model 2 1.1601151952136242 20240814-19:08:43 train_perplexity 139 model 1 1.1599919898749387 20240814-19:08:47 test_perplexity 139 model 2 1.1587828795657402 20240814-19:08:47 test_perplexity 139 model 1 1.157106100059159 20240814-19:10:10 test_accuracy 139 model 2 val 772 / 812 20240814-19:10:12 test_accuracy 139 model 1 val 757 / 790 20240814-19:10:14 wrote gpt_001.pth 20240814-19:10:15 wrote gpt_002.pth 20240814-19:10:29 wrote non_validated_0139_01.png 20240814-19:10:44 wrote non_validated_0139_02.png 20240814-19:10:44 wrote state.pth 20240814-19:10:44 --- epoch 140 ---------------------------------------- 20240814-19:10:44 current_test_accuracies 0.9558 0.9582 0.9507 0.9528 0.9538 20240814-19:38:18 keep c_quizzes model 3 validated nb_validated 2680 / 5125 (finishes Wed 20:03 -- 5831/h) proportion_kept 52.29% 20240814-20:05:55 keep c_quizzes model 0 validated nb_validated 5390 / 5125 (finishes now! -- 5860/h) proportion_kept 52.59% 20240814-20:05:55 teacher model 0 to [0, 145, 286, 786, 874] 20240814-20:05:55 teacher model 1 to [1242, 0, 249, 656, 880] 20240814-20:05:55 teacher model 2 to [83, 81, 0, 17, 45] 20240814-20:05:55 teacher model 3 to [14, 14, 14, 0, 1] 20240814-20:05:55 teacher model 4 to [0, 1, 2, 0, 0] 20240814-20:05:58 nb_c_quizzes model 0 train 9668 test 247 20240814-20:06:02 nb_c_quizzes model 1 train 10228 test 260 20240814-20:06:06 nb_c_quizzes model 2 train 10836 test 274 20240814-20:06:09 nb_c_quizzes model 3 train 10666 test 271 20240814-20:06:13 nb_c_quizzes model 4 train 11046 test 280 20240814-20:06:13 training model 0 20240814-20:06:13 training model 1 20240814-20:09:55 train_perplexity 140 model 0 1.1616595231333098 20240814-20:09:55 train_perplexity 140 model 1 1.1603534155184674 20240814-20:09:59 test_perplexity 140 model 0 1.1587209666148544 20240814-20:09:59 test_perplexity 140 model 1 1.15967843707062 20240814-20:11:22 test_accuracy 140 model 0 val 764 / 820 20240814-20:11:25 test_accuracy 140 model 1 val 751 / 799 20240814-20:11:27 wrote gpt_000.pth 20240814-20:11:27 wrote gpt_001.pth 20240814-20:11:42 wrote non_validated_0140_00.png 20240814-20:11:56 wrote non_validated_0140_01.png 20240814-20:11:56 wrote state.pth 20240814-20:11:56 --- epoch 141 ---------------------------------------- 20240814-20:11:56 current_test_accuracies 0.9317 0.9399 0.0000 0.0000 0.0000 20240814-20:11:56 training model 2 20240814-20:11:56 training model 3 20240814-20:15:39 train_perplexity 141 model 3 1.1618110924549465 20240814-20:15:39 train_perplexity 141 model 2 1.1614580968145825 20240814-20:15:42 test_perplexity 141 model 3 1.1611668813588962 20240814-20:15:43 test_perplexity 141 model 2 1.160870530687646 20240814-20:17:06 test_accuracy 141 model 3 val 758 / 818 20240814-20:17:08 test_accuracy 141 model 2 val 761 / 792 20240814-20:17:10 wrote gpt_002.pth 20240814-20:17:11 wrote gpt_003.pth 20240814-20:17:26 wrote non_validated_0141_02.png 20240814-20:17:40 wrote non_validated_0141_03.png 20240814-20:17:40 wrote state.pth 20240814-20:17:40 --- epoch 142 ---------------------------------------- 20240814-20:17:40 current_test_accuracies 0.9317 0.9399 0.9609 0.9267 0.0000 20240814-20:17:40 training model 4 20240814-20:17:40 training model 3 20240814-20:21:22 train_perplexity 142 model 4 1.163805980507708 20240814-20:21:22 train_perplexity 142 model 3 1.1620318014843338 20240814-20:21:26 test_perplexity 142 model 4 1.1621454176937054 20240814-20:21:26 test_perplexity 142 model 3 1.1612718704488396 20240814-20:22:52 test_accuracy 142 model 4 val 748 / 791 20240814-20:22:52 test_accuracy 142 model 3 val 752 / 797 20240814-20:22:55 wrote gpt_004.pth 20240814-20:22:56 wrote gpt_003.pth 20240814-20:23:10 wrote non_validated_0142_04.png 20240814-20:23:24 wrote non_validated_0142_03.png 20240814-20:23:24 wrote state.pth 20240814-20:23:24 --- epoch 143 ---------------------------------------- 20240814-20:23:24 current_test_accuracies 0.9317 0.9399 0.9609 0.9435 0.9456 20240814-20:23:24 training model 0 20240814-20:23:24 training model 1 20240814-20:27:06 train_perplexity 143 model 0 1.1616228071332506 20240814-20:27:06 train_perplexity 143 model 1 1.1602254183745286 20240814-20:27:10 test_perplexity 143 model 0 1.1608564209671233 20240814-20:27:10 test_perplexity 143 model 1 1.1593952740179714 20240814-20:28:33 test_accuracy 143 model 0 val 760 / 811 20240814-20:28:35 test_accuracy 143 model 1 val 731 / 784 20240814-20:28:37 wrote gpt_000.pth 20240814-20:28:38 wrote gpt_001.pth 20240814-20:28:52 wrote non_validated_0143_00.png 20240814-20:29:06 wrote non_validated_0143_01.png 20240814-20:29:06 wrote state.pth 20240814-20:29:06 --- epoch 144 ---------------------------------------- 20240814-20:29:06 current_test_accuracies 0.9371 0.9324 0.9609 0.9435 0.9456 20240814-20:29:06 training model 1 20240814-20:29:06 training model 0 20240814-20:32:48 train_perplexity 144 model 1 1.160134619272074 20240814-20:32:48 train_perplexity 144 model 0 1.1615226992131142 20240814-20:32:52 test_perplexity 144 model 1 1.1589469806220334 20240814-20:32:52 test_perplexity 144 model 0 1.160209700351305 20240814-20:34:18 test_accuracy 144 model 0 val 752 / 796 20240814-20:34:19 test_accuracy 144 model 1 val 752 / 786 20240814-20:34:21 wrote gpt_001.pth 20240814-20:34:22 wrote gpt_000.pth 20240814-20:34:36 wrote non_validated_0144_01.png 20240814-20:34:51 wrote non_validated_0144_00.png 20240814-20:34:51 wrote state.pth 20240814-20:34:51 --- epoch 145 ---------------------------------------- 20240814-20:34:51 current_test_accuracies 0.9447 0.9567 0.9609 0.9435 0.9456 20240814-20:34:51 training model 3 20240814-20:34:51 training model 0 20240814-20:38:33 train_perplexity 145 model 3 1.1620010016898967 20240814-20:38:33 train_perplexity 145 model 0 1.1613947035156642 20240814-20:38:37 test_perplexity 145 model 3 1.1573949011111404 20240814-20:38:37 test_perplexity 145 model 0 1.1584880332744747 20240814-20:40:00 test_accuracy 145 model 0 val 765 / 805 20240814-20:40:02 test_accuracy 145 model 3 val 758 / 796 20240814-20:40:04 wrote gpt_003.pth 20240814-20:40:04 wrote gpt_000.pth 20240814-20:40:19 wrote non_validated_0145_03.png 20240814-20:40:33 wrote non_validated_0145_00.png 20240814-20:40:33 wrote state.pth 20240814-20:40:33 --- epoch 146 ---------------------------------------- 20240814-20:40:33 current_test_accuracies 0.9503 0.9567 0.9609 0.9523 0.9456 20240814-20:40:33 training model 4 20240814-20:40:33 training model 0 20240814-20:44:15 train_perplexity 146 model 4 1.1635296272746707 20240814-20:44:15 train_perplexity 146 model 0 1.1612311359017424 20240814-20:44:19 test_perplexity 146 model 4 1.1639217262265174 20240814-20:44:19 test_perplexity 146 model 0 1.1587948091111848 20240814-20:45:45 test_accuracy 146 model 4 val 737 / 785 20240814-20:45:47 test_accuracy 146 model 0 val 720 / 769 20240814-20:45:49 wrote gpt_004.pth 20240814-20:45:50 wrote gpt_000.pth 20240814-20:46:04 wrote non_validated_0146_04.png 20240814-20:46:19 wrote non_validated_0146_00.png 20240814-20:46:19 wrote state.pth 20240814-20:46:19 --- epoch 147 ---------------------------------------- 20240814-20:46:19 current_test_accuracies 0.9363 0.9567 0.9609 0.9523 0.9389 20240814-20:46:19 training model 0 20240814-20:46:19 training model 4 20240814-20:50:01 train_perplexity 147 model 0 1.1613182103943525 20240814-20:50:01 train_perplexity 147 model 4 1.1628003211629276 20240814-20:50:05 test_perplexity 147 model 0 1.1580778856925953 20240814-20:50:05 test_perplexity 147 model 4 1.1605283745328359 20240814-20:51:28 test_accuracy 147 model 0 val 771 / 813 20240814-20:51:28 test_accuracy 147 model 4 val 745 / 802 20240814-20:51:30 wrote gpt_000.pth 20240814-20:51:31 wrote gpt_004.pth 20240814-20:51:46 wrote non_validated_0147_00.png 20240814-20:52:00 wrote non_validated_0147_04.png 20240814-20:52:00 wrote state.pth 20240814-20:52:00 --- epoch 148 ---------------------------------------- 20240814-20:52:00 current_test_accuracies 0.9483 0.9567 0.9609 0.9523 0.9289 20240814-20:52:00 training model 4 20240814-20:52:00 training model 0 20240814-20:55:43 train_perplexity 148 model 0 1.1608717844921 20240814-20:55:43 train_perplexity 148 model 4 1.1630308294199 20240814-20:55:46 test_perplexity 148 model 0 1.1609772069696551 20240814-20:55:47 test_perplexity 148 model 4 1.1639354851326862 20240814-20:57:09 test_accuracy 148 model 4 val 762 / 807 20240814-20:57:11 test_accuracy 148 model 0 val 749 / 807 20240814-20:57:13 wrote gpt_004.pth 20240814-20:57:13 wrote gpt_000.pth 20240814-20:57:28 wrote non_validated_0148_04.png 20240814-20:57:42 wrote non_validated_0148_00.png 20240814-20:57:42 wrote state.pth 20240814-20:57:42 --- epoch 149 ---------------------------------------- 20240814-20:57:42 current_test_accuracies 0.9281 0.9567 0.9609 0.9523 0.9442 20240814-20:57:42 training model 0 20240814-20:57:42 training model 4 20240814-21:01:24 train_perplexity 149 model 0 1.1605138058859223 20240814-21:01:24 train_perplexity 149 model 4 1.1634417208877554 20240814-21:01:27 test_perplexity 149 model 0 1.1572350195941419 20240814-21:01:28 test_perplexity 149 model 4 1.1619672076017178 20240814-21:02:53 test_accuracy 149 model 4 val 747 / 789 20240814-21:02:55 test_accuracy 149 model 0 val 753 / 788 20240814-21:02:57 wrote gpt_000.pth 20240814-21:02:58 wrote gpt_004.pth 20240814-21:03:12 wrote non_validated_0149_00.png 20240814-21:03:27 wrote non_validated_0149_04.png 20240814-21:03:27 wrote state.pth 20240814-21:03:27 --- epoch 150 ---------------------------------------- 20240814-21:03:27 current_test_accuracies 0.9556 0.9567 0.9609 0.9523 0.9468 20240814-21:03:27 training model 4 20240814-21:03:27 training model 3 20240814-21:07:09 train_perplexity 150 model 4 1.1633877424593229 20240814-21:07:09 train_perplexity 150 model 3 1.1613046612017133 20240814-21:07:13 test_perplexity 150 model 4 1.1635112473802773 20240814-21:07:13 test_perplexity 150 model 3 1.1576129302621234 20240814-21:08:36 test_accuracy 150 model 4 val 764 / 808 20240814-21:08:38 test_accuracy 150 model 3 val 753 / 787 20240814-21:08:40 wrote gpt_004.pth 20240814-21:08:41 wrote gpt_003.pth 20240814-21:08:55 wrote non_validated_0150_04.png 20240814-21:09:10 wrote non_validated_0150_03.png 20240814-21:09:10 wrote state.pth 20240814-21:09:10 --- epoch 151 ---------------------------------------- 20240814-21:09:10 current_test_accuracies 0.9556 0.9567 0.9609 0.9568 0.9455 20240814-21:09:10 training model 4 20240814-21:09:10 training model 0 20240814-21:12:51 train_perplexity 151 model 0 1.160560794893953 20240814-21:12:52 train_perplexity 151 model 4 1.1626357158176557 20240814-21:12:55 test_perplexity 151 model 0 1.1584695235826825 20240814-21:12:55 test_perplexity 151 model 4 1.1617978451213955 20240814-21:14:18 test_accuracy 151 model 4 val 745 / 803 20240814-21:14:21 test_accuracy 151 model 0 val 728 / 788 20240814-21:14:23 wrote gpt_004.pth 20240814-21:14:24 wrote gpt_000.pth 20240814-21:14:38 wrote non_validated_0151_04.png 20240814-21:14:52 wrote non_validated_0151_00.png 20240814-21:14:52 wrote state.pth 20240814-21:14:52 --- epoch 152 ---------------------------------------- 20240814-21:14:52 current_test_accuracies 0.9239 0.9567 0.9609 0.9568 0.9278 20240814-21:14:52 training model 0 20240814-21:14:52 training model 4 20240814-21:18:34 train_perplexity 152 model 0 1.1605690530450452 20240814-21:18:34 train_perplexity 152 model 4 1.1627072477612717 20240814-21:18:38 test_perplexity 152 model 0 1.1566495020510532 20240814-21:18:38 test_perplexity 152 model 4 1.1609017420315535 20240814-21:20:00 test_accuracy 152 model 0 val 761 / 805 20240814-21:20:03 test_accuracy 152 model 4 val 719 / 782 20240814-21:20:05 wrote gpt_000.pth 20240814-21:20:05 wrote gpt_004.pth 20240814-21:20:20 wrote non_validated_0152_00.png 20240814-21:20:34 wrote non_validated_0152_04.png 20240814-21:20:34 wrote state.pth 20240814-21:20:34 --- epoch 153 ---------------------------------------- 20240814-21:20:34 current_test_accuracies 0.9453 0.9567 0.9609 0.9568 0.9194 20240814-21:20:34 training model 4 20240814-21:20:34 training model 0 20240814-21:24:16 train_perplexity 153 model 4 1.1625710262003761 20240814-21:24:16 train_perplexity 153 model 0 1.1609787816948947 20240814-21:24:20 test_perplexity 153 model 4 1.1620822096932855 20240814-21:24:20 test_perplexity 153 model 0 1.1593227744137544 20240814-21:25:46 test_accuracy 153 model 4 val 734 / 794 20240814-21:25:47 test_accuracy 153 model 0 val 750 / 793 20240814-21:25:49 wrote gpt_004.pth 20240814-21:25:50 wrote gpt_000.pth 20240814-21:26:04 wrote non_validated_0153_04.png 20240814-21:26:19 wrote non_validated_0153_00.png 20240814-21:26:19 wrote state.pth 20240814-21:26:19 --- epoch 154 ---------------------------------------- 20240814-21:26:19 current_test_accuracies 0.9458 0.9567 0.9609 0.9568 0.9244 20240814-21:26:19 training model 4 20240814-21:26:19 training model 0 20240814-21:30:01 train_perplexity 154 model 0 1.1606052497451635 20240814-21:30:02 train_perplexity 154 model 4 1.1623653948349513 20240814-21:30:05 test_perplexity 154 model 0 1.1600954136226131 20240814-21:30:05 test_perplexity 154 model 4 1.1638404998762435 20240814-21:31:26 test_accuracy 154 model 4 val 762 / 808 20240814-21:31:28 test_accuracy 154 model 0 val 751 / 810 20240814-21:31:30 wrote gpt_004.pth 20240814-21:31:31 wrote gpt_000.pth 20240814-21:31:45 wrote non_validated_0154_04.png 20240814-21:31:59 wrote non_validated_0154_00.png 20240814-21:31:59 wrote state.pth 20240814-21:31:59 --- epoch 155 ---------------------------------------- 20240814-21:31:59 current_test_accuracies 0.9272 0.9567 0.9609 0.9568 0.9431 20240814-21:31:59 training model 0 20240814-21:31:59 training model 4 20240814-21:35:42 train_perplexity 155 model 4 1.1624169779699176 20240814-21:35:42 train_perplexity 155 model 0 1.1611123330757749 20240814-21:35:46 test_perplexity 155 model 0 1.1556699132366168 20240814-21:35:46 test_perplexity 155 model 4 1.1609985104886886 20240814-21:37:09 test_accuracy 155 model 4 val 757 / 806 20240814-21:37:11 test_accuracy 155 model 0 val 755 / 794 20240814-21:37:13 wrote gpt_000.pth 20240814-21:37:14 wrote gpt_004.pth 20240814-21:37:28 wrote non_validated_0155_00.png 20240814-21:37:43 wrote non_validated_0155_04.png 20240814-21:37:43 wrote state.pth 20240814-21:37:43 --- epoch 156 ---------------------------------------- 20240814-21:37:43 current_test_accuracies 0.9509 0.9567 0.9609 0.9568 0.9392 20240814-21:37:43 training model 4 20240814-21:37:43 training model 0 20240814-21:41:25 train_perplexity 156 model 4 1.1630413522606402 20240814-21:41:25 train_perplexity 156 model 0 1.1607724318822459 20240814-21:41:28 test_perplexity 156 model 4 1.159861952942481 20240814-21:41:29 test_perplexity 156 model 0 1.1572381916590662 20240814-21:42:53 test_accuracy 156 model 4 val 748 / 786 20240814-21:42:54 test_accuracy 156 model 0 val 766 / 795 20240814-21:42:56 wrote gpt_004.pth 20240814-21:42:57 wrote gpt_000.pth 20240814-21:43:11 wrote non_validated_0156_04.png 20240814-21:43:26 wrote non_validated_0156_00.png 20240814-21:43:26 wrote state.pth 20240814-21:43:26 --- epoch 157 ---------------------------------------- 20240814-21:43:26 current_test_accuracies 0.9635 0.9567 0.9609 0.9568 0.9517 20240814-22:11:20 keep c_quizzes model 2 validated nb_validated 2433 / 5125 (finishes Wed 22:42 -- 5231/h) proportion_kept 47.47% 20240814-22:39:07 keep c_quizzes model 4 validated nb_validated 5019 / 5125 (finishes Wed 22:40 -- 5407/h) proportion_kept 48.97% 20240814-23:07:00 keep c_quizzes model 0 validated nb_validated 7622 / 5125 (finishes now! -- 5472/h) proportion_kept 49.57% 20240814-23:07:00 teacher model 0 to [0, 1550, 2172, 1250, 646] 20240814-23:07:00 teacher model 1 to [502, 0, 276, 194, 124] 20240814-23:07:00 teacher model 2 to [185, 189, 0, 101, 28] 20240814-23:07:00 teacher model 3 to [131, 129, 132, 0, 1] 20240814-23:07:00 teacher model 4 to [1, 5, 2, 4, 0] 20240814-23:07:03 nb_c_quizzes model 0 train 10467 test 267 20240814-23:07:07 nb_c_quizzes model 1 train 12055 test 306 20240814-23:07:11 nb_c_quizzes model 2 train 13355 test 337 20240814-23:07:14 nb_c_quizzes model 3 train 12177 test 309 20240814-23:07:18 nb_c_quizzes model 4 train 11825 test 300 20240814-23:07:18 training model 0 20240814-23:07:18 training model 1 20240814-23:11:01 train_perplexity 157 model 1 1.1657541573354169 20240814-23:11:02 train_perplexity 157 model 0 1.162699350187192 20240814-23:11:04 test_perplexity 157 model 1 1.1659006766711202 20240814-23:11:05 test_perplexity 157 model 0 1.1616223751956793 20240814-23:12:27 test_accuracy 157 model 0 val 741 / 800 20240814-23:12:28 test_accuracy 157 model 1 val 772 / 822 20240814-23:12:31 wrote gpt_000.pth 20240814-23:12:31 wrote gpt_001.pth 20240814-23:12:46 wrote non_validated_0157_00.png 20240814-23:13:00 wrote non_validated_0157_01.png 20240814-23:13:00 wrote state.pth 20240814-23:13:00 --- epoch 158 ---------------------------------------- 20240814-23:13:00 current_test_accuracies 0.9262 0.9392 0.0000 0.0000 0.0000 20240814-23:13:00 training model 2 20240814-23:13:00 training model 3 20240814-23:16:43 train_perplexity 158 model 2 1.1693441998048621 20240814-23:16:43 train_perplexity 158 model 3 1.1660422837502789 20240814-23:16:46 test_perplexity 158 model 2 1.1669558570986653 20240814-23:16:47 test_perplexity 158 model 3 1.1653955239893803 20240814-23:18:13 test_accuracy 158 model 2 val 729 / 792 20240814-23:18:13 test_accuracy 158 model 3 val 748 / 794 20240814-23:18:16 wrote gpt_002.pth 20240814-23:18:17 wrote gpt_003.pth 20240814-23:18:31 wrote non_validated_0158_02.png 20240814-23:18:46 wrote non_validated_0158_03.png 20240814-23:18:46 wrote state.pth 20240814-23:18:46 --- epoch 159 ---------------------------------------- 20240814-23:18:46 current_test_accuracies 0.9262 0.9392 0.9205 0.9421 0.0000 20240814-23:18:46 training model 4 20240814-23:18:46 training model 2 20240814-23:22:28 train_perplexity 159 model 2 1.1689638636089157 20240814-23:22:28 train_perplexity 159 model 4 1.1645384960218248 20240814-23:22:32 test_perplexity 159 model 2 1.1664192273859868 20240814-23:22:32 test_perplexity 159 model 4 1.1636153569360776 20240814-23:23:55 test_accuracy 159 model 2 val 718 / 786 20240814-23:23:57 test_accuracy 159 model 4 val 750 / 794 20240814-23:23:59 wrote gpt_004.pth 20240814-23:24:00 wrote gpt_002.pth 20240814-23:24:14 wrote non_validated_0159_04.png 20240814-23:24:29 wrote non_validated_0159_02.png 20240814-23:24:29 wrote state.pth 20240814-23:24:29 --- epoch 160 ---------------------------------------- 20240814-23:24:29 current_test_accuracies 0.9262 0.9392 0.9135 0.9421 0.9446 20240814-23:24:29 training model 2 20240814-23:24:29 training model 0 20240814-23:28:10 train_perplexity 160 model 2 1.16857830765191 20240814-23:28:11 train_perplexity 160 model 0 1.1627863979722721 20240814-23:28:14 test_perplexity 160 model 2 1.1655292127397259 20240814-23:28:14 test_perplexity 160 model 0 1.1590206833802021 20240814-23:29:36 test_accuracy 160 model 0 val 756 / 810 20240814-23:29:39 test_accuracy 160 model 2 val 730 / 787 20240814-23:29:41 wrote gpt_002.pth 20240814-23:29:42 wrote gpt_000.pth 20240814-23:29:56 wrote non_validated_0160_02.png 20240814-23:30:11 wrote non_validated_0160_00.png 20240814-23:30:11 wrote state.pth 20240814-23:30:11 --- epoch 161 ---------------------------------------- 20240814-23:30:11 current_test_accuracies 0.9333 0.9392 0.9276 0.9421 0.9446 20240814-23:30:11 training model 2 20240814-23:30:11 training model 0 20240814-23:33:53 train_perplexity 161 model 2 1.1686656874655137 20240814-23:33:53 train_perplexity 161 model 0 1.1622439400273903 20240814-23:33:57 test_perplexity 161 model 2 1.168861160850953 20240814-23:33:57 test_perplexity 161 model 0 1.1593523383673865 20240814-23:35:24 test_accuracy 161 model 2 val 732 / 789 20240814-23:35:24 test_accuracy 161 model 0 val 737 / 788 20240814-23:35:26 wrote gpt_002.pth 20240814-23:35:27 wrote gpt_000.pth 20240814-23:35:41 wrote non_validated_0161_02.png 20240814-23:35:56 wrote non_validated_0161_00.png 20240814-23:35:56 wrote state.pth 20240814-23:35:56 --- epoch 162 ---------------------------------------- 20240814-23:35:56 current_test_accuracies 0.9353 0.9392 0.9278 0.9421 0.9446 20240814-23:35:56 training model 2 20240814-23:35:56 training model 0 20240814-23:39:39 train_perplexity 162 model 0 1.162606866456734 20240814-23:39:40 train_perplexity 162 model 2 1.1683878815295263 20240814-23:39:42 test_perplexity 162 model 0 1.1583439611533093 20240814-23:39:43 test_perplexity 162 model 2 1.1671252121368676 20240814-23:41:08 test_accuracy 162 model 0 val 739 / 777 20240814-23:41:08 test_accuracy 162 model 2 val 737 / 795 20240814-23:41:11 wrote gpt_002.pth 20240814-23:41:11 wrote gpt_000.pth 20240814-23:41:26 wrote non_validated_0162_02.png 20240814-23:41:40 wrote non_validated_0162_00.png 20240814-23:41:40 wrote state.pth 20240814-23:41:40 --- epoch 163 ---------------------------------------- 20240814-23:41:40 current_test_accuracies 0.9511 0.9392 0.9270 0.9421 0.9446 20240814-23:41:40 training model 2 20240814-23:41:40 training model 1 20240814-23:45:23 train_perplexity 163 model 2 1.1684780359131837 20240814-23:45:23 train_perplexity 163 model 1 1.1654529750992286 20240814-23:45:26 test_perplexity 163 model 2 1.167161901866076 20240814-23:45:26 test_perplexity 163 model 1 1.162704047700251 20240814-23:46:49 test_accuracy 163 model 1 val 753 / 801 20240814-23:46:51 test_accuracy 163 model 2 val 738 / 798 20240814-23:46:53 wrote gpt_002.pth 20240814-23:46:54 wrote gpt_001.pth 20240814-23:47:08 wrote non_validated_0163_02.png 20240814-23:47:23 wrote non_validated_0163_01.png 20240814-23:47:23 wrote state.pth 20240814-23:47:23 --- epoch 164 ---------------------------------------- 20240814-23:47:23 current_test_accuracies 0.9511 0.9401 0.9248 0.9421 0.9446 20240814-23:47:23 training model 2 20240814-23:47:23 training model 1 20240814-23:51:05 train_perplexity 164 model 2 1.1681266052905115 20240814-23:51:05 train_perplexity 164 model 1 1.1652336618570989 20240814-23:51:09 test_perplexity 164 model 2 1.1660671323345468 20240814-23:51:09 test_perplexity 164 model 1 1.1609816158597088 20240814-23:52:32 test_accuracy 164 model 1 val 758 / 808 20240814-23:52:35 test_accuracy 164 model 2 val 737 / 797 20240814-23:52:37 wrote gpt_002.pth 20240814-23:52:38 wrote gpt_001.pth 20240814-23:52:52 wrote non_validated_0164_02.png 20240814-23:53:07 wrote non_validated_0164_01.png 20240814-23:53:07 wrote state.pth 20240814-23:53:07 --- epoch 165 ---------------------------------------- 20240814-23:53:07 current_test_accuracies 0.9511 0.9381 0.9247 0.9421 0.9446 20240814-23:53:07 training model 2 20240814-23:53:07 training model 1 20240814-23:56:49 train_perplexity 165 model 2 1.167713196299711 20240814-23:56:49 train_perplexity 165 model 1 1.1650618357086702 20240814-23:56:52 test_perplexity 165 model 2 1.1661551468923372 20240814-23:56:53 test_perplexity 165 model 1 1.165825503635423 20240814-23:58:17 test_accuracy 165 model 1 val 771 / 813 20240814-23:58:19 test_accuracy 165 model 2 val 736 / 786 20240814-23:58:21 wrote gpt_002.pth 20240814-23:58:21 wrote gpt_001.pth 20240814-23:58:36 wrote non_validated_0165_02.png 20240814-23:58:50 wrote non_validated_0165_01.png 20240814-23:58:50 wrote state.pth 20240814-23:58:50 --- epoch 166 ---------------------------------------- 20240814-23:58:50 current_test_accuracies 0.9511 0.9483 0.9364 0.9421 0.9446 20240814-23:58:50 training model 2 20240814-23:58:50 training model 3