20240806-20:07:28 argv ./main.py --result_dir=results_noise_L --model=122M --nb_train_samples=40000 --nb_test_samples=2000 --grids_world_tasks=replace_color,translate,grow,half_fill,frame,detect,corners,contact --accuracy_to_make_c_quizzes=0.95 --prompt_noise=0.05 --batch_size=10 --inference_batch_size=10 --nb_gpts=5 20240806-20:07:28 args.log_filename train.log 20240806-20:07:28 args.result_dir results_noise_L 20240806-20:07:28 args.seed 0 20240806-20:07:28 args.resume False 20240806-20:07:28 args.max_percents_of_test_in_train -1 20240806-20:07:28 args.log_command None 20240806-20:07:28 args.nb_epochs 10000 20240806-20:07:28 args.batch_size 10 20240806-20:07:28 args.physical_batch_size None 20240806-20:07:28 args.inference_batch_size 10 20240806-20:07:28 args.nb_train_samples 40000 20240806-20:07:28 args.nb_test_samples 2000 20240806-20:07:28 args.nb_new_c_quizzes_for_train None 20240806-20:07:28 args.nb_new_c_quizzes_for_test None 20240806-20:07:28 args.learning_rate 0.0005 20240806-20:07:28 args.model 122M 20240806-20:07:28 args.dim_model 768 20240806-20:07:28 args.dim_keys 64 20240806-20:07:28 args.dim_hidden 2048 20240806-20:07:28 args.nb_heads 8 20240806-20:07:28 args.nb_blocks 24 20240806-20:07:28 args.dropout 0.1 20240806-20:07:28 args.deterministic_synthesis False 20240806-20:07:28 args.problem grids 20240806-20:07:28 args.nb_threads 1 20240806-20:07:28 args.gpus all 20240806-20:07:28 args.nb_gpts 5 20240806-20:07:28 args.max_fail_to_validate 2 20240806-20:07:28 args.accuracy_to_make_c_quizzes 0.95 20240806-20:07:28 args.proba_understands 0.95 20240806-20:07:28 args.proba_not_understands 0.5 20240806-20:07:28 args.temperature_hot 1.5 20240806-20:07:28 args.temperature_cold 1 20240806-20:07:28 args.prompt_noise 0.05 20240806-20:07:28 args.nb_averaging_rounds 3 20240806-20:07:28 args.dirty_debug False 20240806-20:07:28 args.test None 20240806-20:07:28 args.grids_world_tasks replace_color,translate,grow,half_fill,frame,detect,corners,contact 20240806-20:07:28 args.grids_science_tasks None 20240806-20:07:28 args.sky_height 6 20240806-20:07:28 args.sky_width 8 20240806-20:07:28 args.sky_nb_birds 3 20240806-20:07:28 args.sky_nb_iterations 2 20240806-20:07:28 args.sky_speed 3 20240806-20:07:41 main_device cuda:0 gpus ['cuda:0'] 20240806-20:07:41 vocabulary_size 15 20240806-20:07:41 creating model 0 and its w_quizzes 20240806-20:07:48 creating model 1 and its w_quizzes 20240806-20:08:02 creating model 2 and its w_quizzes 20240806-20:08:15 creating model 3 and its w_quizzes 20240806-20:08:29 creating model 4 and its w_quizzes 20240806-20:08:42 nb_parameters 122847759 (122M) 20240806-20:08:42 nb_new_c_quizzes_for_train 400 nb_new_c_quizzes_for_test 20 20240806-20:08:42 wrote state.pth 20240806-20:08:42 --- epoch 0 ---------------------------------------- 20240806-20:08:42 current_test_accuracies 0.0000 0.0000 0.0000 0.0000 0.0000 20240806-20:08:42 training model 0 20240806-20:25:25 train_perplexity 0 model 0 3.2185840425774055 20240806-20:25:41 test_perplexity 0 model 0 1.3487759429090869 20240806-20:31:54 test_accuracy 0 model 0 val 22 / 1606 20240806-20:31:57 wrote gpt_000.pth 20240806-20:33:00 wrote non_validated_0000_00.png 20240806-20:33:00 wrote state.pth 20240806-20:33:00 --- epoch 1 ---------------------------------------- 20240806-20:33:00 current_test_accuracies 0.0137 0.0000 0.0000 0.0000 0.0000 20240806-20:33:00 training model 1 20240806-20:49:43 train_perplexity 1 model 1 3.278556564953795 20240806-20:49:58 test_perplexity 1 model 1 1.3336603146245438 20240806-20:56:16 test_accuracy 1 model 1 val 30 / 1577 20240806-20:56:19 wrote gpt_001.pth 20240806-20:57:20 wrote non_validated_0001_01.png 20240806-20:57:20 wrote state.pth 20240806-20:57:20 --- epoch 2 ---------------------------------------- 20240806-20:57:20 current_test_accuracies 0.0137 0.0190 0.0000 0.0000 0.0000 20240806-20:57:20 training model 2 20240806-21:14:05 train_perplexity 2 model 2 3.0629028079637455 20240806-21:14:20 test_perplexity 2 model 2 1.3414876834031724 20240806-21:20:36 test_accuracy 2 model 2 val 25 / 1570 20240806-21:20:39 wrote gpt_002.pth 20240806-21:21:40 wrote non_validated_0002_02.png 20240806-21:21:40 wrote state.pth 20240806-21:21:40 --- epoch 3 ---------------------------------------- 20240806-21:21:40 current_test_accuracies 0.0137 0.0190 0.0159 0.0000 0.0000 20240806-21:21:40 training model 3 20240806-21:38:24 train_perplexity 3 model 3 3.3706758919640865 20240806-21:38:40 test_perplexity 3 model 3 1.345222486138485 20240806-21:44:49 test_accuracy 3 model 3 val 28 / 1583 20240806-21:44:52 wrote gpt_003.pth 20240806-21:45:54 wrote non_validated_0003_03.png 20240806-21:45:54 wrote state.pth 20240806-21:45:54 --- epoch 4 ---------------------------------------- 20240806-21:45:54 current_test_accuracies 0.0137 0.0190 0.0159 0.0177 0.0000 20240806-21:45:54 training model 4 20240806-22:02:38 train_perplexity 4 model 4 2.893582774096571 20240806-22:02:53 test_perplexity 4 model 4 1.352360108957994 20240806-22:09:14 test_accuracy 4 model 4 val 35 / 1564 20240806-22:09:17 wrote gpt_004.pth 20240806-22:10:18 wrote non_validated_0004_04.png 20240806-22:10:18 wrote state.pth 20240806-22:10:18 --- epoch 5 ---------------------------------------- 20240806-22:10:18 current_test_accuracies 0.0137 0.0190 0.0159 0.0177 0.0224 20240806-22:10:18 training model 0 20240806-22:27:03 train_perplexity 5 model 0 1.3239257482685136 20240806-22:27:19 test_perplexity 5 model 0 1.2453434972331823 20240806-22:33:27 test_accuracy 5 model 0 val 159 / 1592 20240806-22:33:31 wrote gpt_000.pth 20240806-22:34:33 wrote non_validated_0005_00.png 20240806-22:34:33 wrote state.pth 20240806-22:34:33 --- epoch 6 ---------------------------------------- 20240806-22:34:33 current_test_accuracies 0.0999 0.0190 0.0159 0.0177 0.0224 20240806-22:34:33 training model 2 20240806-22:51:18 train_perplexity 6 model 2 1.3170711080169204 20240806-22:51:34 test_perplexity 6 model 2 1.2443483367768104 20240806-22:57:41 test_accuracy 6 model 2 val 210 / 1609 20240806-22:57:44 wrote gpt_002.pth 20240806-22:58:44 wrote non_validated_0006_02.png 20240806-22:58:44 wrote state.pth 20240806-22:58:44 --- epoch 7 ---------------------------------------- 20240806-22:58:44 current_test_accuracies 0.0999 0.0190 0.1305 0.0177 0.0224 20240806-22:58:44 training model 3 20240806-23:15:29 train_perplexity 7 model 3 1.3074341475947744 20240806-23:15:45 test_perplexity 7 model 3 1.2366520100658305 20240806-23:21:46 test_accuracy 7 model 3 val 231 / 1628 20240806-23:21:50 wrote gpt_003.pth 20240806-23:22:51 wrote non_validated_0007_03.png 20240806-23:22:51 wrote state.pth 20240806-23:22:51 --- epoch 8 ---------------------------------------- 20240806-23:22:51 current_test_accuracies 0.0999 0.0190 0.1305 0.1419 0.0224 20240806-23:22:51 training model 1 20240806-23:39:36 train_perplexity 8 model 1 1.3074803915397402 20240806-23:39:52 test_perplexity 8 model 1 1.2397734359284214 20240806-23:45:58 test_accuracy 8 model 1 val 232 / 1604 20240806-23:46:01 wrote gpt_001.pth 20240806-23:47:03 wrote non_validated_0008_01.png 20240806-23:47:03 wrote state.pth 20240806-23:47:03 --- epoch 9 ---------------------------------------- 20240806-23:47:03 current_test_accuracies 0.0999 0.1446 0.1305 0.1419 0.0224 20240806-23:47:03 training model 4 20240807-00:03:48 train_perplexity 9 model 4 1.331723750757531 20240807-00:04:04 test_perplexity 9 model 4 1.2565589390505048 20240807-00:10:08 test_accuracy 9 model 4 val 171 / 1617 20240807-00:10:11 wrote gpt_004.pth 20240807-00:11:13 wrote non_validated_0009_04.png 20240807-00:11:13 wrote state.pth 20240807-00:11:13 --- epoch 10 ---------------------------------------- 20240807-00:11:13 current_test_accuracies 0.0999 0.1446 0.1305 0.1419 0.1058 20240807-00:11:13 training model 0 20240807-00:27:57 train_perplexity 10 model 0 1.2303318839294781 20240807-00:28:12 test_perplexity 10 model 0 1.1935633597755322 20240807-00:34:20 test_accuracy 10 model 0 val 633 / 1618 20240807-00:34:23 wrote gpt_000.pth 20240807-00:35:25 wrote non_validated_0010_00.png 20240807-00:35:25 wrote state.pth 20240807-00:35:25 --- epoch 11 ---------------------------------------- 20240807-00:35:25 current_test_accuracies 0.3912 0.1446 0.1305 0.1419 0.1058 20240807-00:35:25 training model 4 20240807-00:52:08 train_perplexity 11 model 4 1.2420675954489433 20240807-00:52:24 test_perplexity 11 model 4 1.2026720525657353 20240807-00:58:35 test_accuracy 11 model 4 val 462 / 1604 20240807-00:58:38 wrote gpt_004.pth 20240807-00:59:39 wrote non_validated_0011_04.png 20240807-00:59:39 wrote state.pth 20240807-00:59:39 --- epoch 12 ---------------------------------------- 20240807-00:59:39 current_test_accuracies 0.3912 0.1446 0.1305 0.1419 0.2880 20240807-00:59:39 training model 2 20240807-01:16:24 train_perplexity 12 model 2 1.2309752317109672 20240807-01:16:40 test_perplexity 12 model 2 1.199877439872343 20240807-01:22:55 test_accuracy 12 model 2 val 485 / 1589 20240807-01:22:59 wrote gpt_002.pth 20240807-01:24:01 wrote non_validated_0012_02.png 20240807-01:24:01 wrote state.pth 20240807-01:24:01 --- epoch 13 ---------------------------------------- 20240807-01:24:01 current_test_accuracies 0.3912 0.1446 0.3052 0.1419 0.2880 20240807-01:24:01 training model 3 20240807-01:40:45 train_perplexity 13 model 3 1.2229192991078177 20240807-01:41:01 test_perplexity 13 model 3 1.1902067984611786 20240807-01:47:12 test_accuracy 13 model 3 val 702 / 1600 20240807-01:47:16 wrote gpt_003.pth 20240807-01:48:18 wrote non_validated_0013_03.png 20240807-01:48:18 wrote state.pth 20240807-01:48:18 --- epoch 14 ---------------------------------------- 20240807-01:48:18 current_test_accuracies 0.3912 0.1446 0.3052 0.4387 0.2880 20240807-01:48:18 training model 1 20240807-02:05:01 train_perplexity 14 model 1 1.2279914474001823 20240807-02:05:16 test_perplexity 14 model 1 1.1956492073860128 20240807-02:11:23 test_accuracy 14 model 1 val 657 / 1610 20240807-02:11:26 wrote gpt_001.pth 20240807-02:12:28 wrote non_validated_0014_01.png 20240807-02:12:28 wrote state.pth 20240807-02:12:28 --- epoch 15 ---------------------------------------- 20240807-02:12:28 current_test_accuracies 0.3912 0.4081 0.3052 0.4387 0.2880 20240807-02:12:28 training model 4 20240807-02:29:12 train_perplexity 15 model 4 1.1981083416567286 20240807-02:29:28 test_perplexity 15 model 4 1.184416203724093 20240807-02:35:35 test_accuracy 15 model 4 val 839 / 1615 20240807-02:35:39 wrote gpt_004.pth 20240807-02:36:41 wrote non_validated_0015_04.png 20240807-02:36:41 wrote state.pth 20240807-02:36:41 --- epoch 16 ---------------------------------------- 20240807-02:36:41 current_test_accuracies 0.3912 0.4081 0.3052 0.4387 0.5195 20240807-02:36:41 training model 2 20240807-02:53:24 train_perplexity 16 model 2 1.1953744104138524 20240807-02:53:40 test_perplexity 16 model 2 1.1791291871290503 20240807-02:59:53 test_accuracy 16 model 2 val 921 / 1581 20240807-02:59:57 wrote gpt_002.pth 20240807-03:00:58 wrote non_validated_0016_02.png 20240807-03:00:59 wrote state.pth 20240807-03:00:59 --- epoch 17 ---------------------------------------- 20240807-03:00:59 current_test_accuracies 0.3912 0.4081 0.5825 0.4387 0.5195 20240807-03:00:59 training model 0 20240807-03:17:41 train_perplexity 17 model 0 1.192597244264324 20240807-03:17:57 test_perplexity 17 model 0 1.1757349432057056 20240807-03:24:02 test_accuracy 17 model 0 val 1017 / 1613 20240807-03:24:05 wrote gpt_000.pth 20240807-03:25:08 wrote non_validated_0017_00.png 20240807-03:25:08 wrote state.pth 20240807-03:25:08 --- epoch 18 ---------------------------------------- 20240807-03:25:08 current_test_accuracies 0.6305 0.4081 0.5825 0.4387 0.5195 20240807-03:25:08 training model 1 20240807-03:41:49 train_perplexity 18 model 1 1.195971236683325 20240807-03:42:05 test_perplexity 18 model 1 1.180472635538814 20240807-03:48:21 test_accuracy 18 model 1 val 887 / 1572 20240807-03:48:25 wrote gpt_001.pth 20240807-03:49:25 wrote non_validated_0018_01.png 20240807-03:49:25 wrote state.pth 20240807-03:49:25 --- epoch 19 ---------------------------------------- 20240807-03:49:25 current_test_accuracies 0.6305 0.5642 0.5825 0.4387 0.5195 20240807-03:49:25 training model 3 20240807-04:06:08 train_perplexity 19 model 3 1.1927912570890098 20240807-04:06:24 test_perplexity 19 model 3 1.1791664075215391 20240807-04:12:33 test_accuracy 19 model 3 val 1004 / 1616 20240807-04:12:36 wrote gpt_003.pth 20240807-04:13:38 wrote non_validated_0019_03.png 20240807-04:13:38 wrote state.pth 20240807-04:13:38 --- epoch 20 ---------------------------------------- 20240807-04:13:38 current_test_accuracies 0.6305 0.5642 0.5825 0.6213 0.5195 20240807-04:13:38 training model 4 20240807-04:30:22 train_perplexity 20 model 4 1.1804215619669771 20240807-04:30:37 test_perplexity 20 model 4 1.173839516756204 20240807-04:36:42 test_accuracy 20 model 4 val 1172 / 1616 20240807-04:36:46 wrote gpt_004.pth 20240807-04:37:48 wrote non_validated_0020_04.png 20240807-04:37:48 wrote state.pth 20240807-04:37:48 --- epoch 21 ---------------------------------------- 20240807-04:37:48 current_test_accuracies 0.6305 0.5642 0.5825 0.6213 0.7252 20240807-04:37:48 training model 1 20240807-04:54:32 train_perplexity 21 model 1 1.1820956133753984 20240807-04:54:47 test_perplexity 21 model 1 1.1707333420435435 20240807-05:00:55 test_accuracy 21 model 1 val 1146 / 1601 20240807-05:00:58 wrote gpt_001.pth 20240807-05:02:00 wrote non_validated_0021_01.png 20240807-05:02:00 wrote state.pth 20240807-05:02:00 --- epoch 22 ---------------------------------------- 20240807-05:02:00 current_test_accuracies 0.6305 0.7158 0.5825 0.6213 0.7252 20240807-05:02:00 training model 2 20240807-05:18:43 train_perplexity 22 model 2 1.18040249835277 20240807-05:18:59 test_perplexity 22 model 2 1.170591735531479 20240807-05:25:06 test_accuracy 22 model 2 val 1116 / 1601 20240807-05:25:09 wrote gpt_002.pth 20240807-05:26:11 wrote non_validated_0022_02.png 20240807-05:26:11 wrote state.pth 20240807-05:26:11 --- epoch 23 ---------------------------------------- 20240807-05:26:11 current_test_accuracies 0.6305 0.7158 0.6971 0.6213 0.7252 20240807-05:26:11 training model 3 20240807-05:42:54 train_perplexity 23 model 3 1.178724004415615 20240807-05:43:10 test_perplexity 23 model 3 1.167911773070343 20240807-05:49:29 test_accuracy 23 model 3 val 1173 / 1567 20240807-05:49:33 wrote gpt_003.pth 20240807-05:50:33 wrote non_validated_0023_03.png 20240807-05:50:33 wrote state.pth 20240807-05:50:33 --- epoch 24 ---------------------------------------- 20240807-05:50:33 current_test_accuracies 0.6305 0.7158 0.6971 0.7486 0.7252 20240807-05:50:33 training model 0 20240807-06:07:14 train_perplexity 24 model 0 1.1781630085124022 20240807-06:07:30 test_perplexity 24 model 0 1.1711919334742869 20240807-06:13:46 test_accuracy 24 model 0 val 1172 / 1586 20240807-06:13:49 wrote gpt_000.pth 20240807-06:14:52 wrote non_validated_0024_00.png 20240807-06:14:53 wrote state.pth 20240807-06:14:53 --- epoch 25 ---------------------------------------- 20240807-06:14:53 current_test_accuracies 0.7390 0.7158 0.6971 0.7486 0.7252 20240807-06:14:53 training model 2 20240807-06:31:34 train_perplexity 25 model 2 1.173549011373629 20240807-06:31:50 test_perplexity 25 model 2 1.1659171990322872 20240807-06:38:02 test_accuracy 25 model 2 val 1242 / 1601 20240807-06:38:05 wrote gpt_002.pth 20240807-06:39:06 wrote non_validated_0025_02.png 20240807-06:39:06 wrote state.pth 20240807-06:39:06 --- epoch 26 ---------------------------------------- 20240807-06:39:06 current_test_accuracies 0.7390 0.7158 0.7758 0.7486 0.7252 20240807-06:39:06 training model 1 20240807-06:55:47 train_perplexity 26 model 1 1.1752221120814286 20240807-06:56:03 test_perplexity 26 model 1 1.170465098381922 20240807-07:02:08 test_accuracy 26 model 1 val 1282 / 1621 20240807-07:02:11 wrote gpt_001.pth 20240807-07:03:13 wrote non_validated_0026_01.png 20240807-07:03:13 wrote state.pth 20240807-07:03:13 --- epoch 27 ---------------------------------------- 20240807-07:03:13 current_test_accuracies 0.7390 0.7909 0.7758 0.7486 0.7252 20240807-07:03:13 training model 4 20240807-07:19:54 train_perplexity 27 model 4 1.1731595948253406 20240807-07:20:10 test_perplexity 27 model 4 1.1666666573565574 20240807-07:26:24 test_accuracy 27 model 4 val 1243 / 1585 20240807-07:26:27 wrote gpt_004.pth 20240807-07:27:31 wrote non_validated_0027_04.png 20240807-07:27:31 wrote state.pth 20240807-07:27:31 --- epoch 28 ---------------------------------------- 20240807-07:27:31 current_test_accuracies 0.7390 0.7909 0.7758 0.7486 0.7842 20240807-07:27:31 training model 0 20240807-07:44:14 train_perplexity 28 model 0 1.1723177597565073 20240807-07:44:29 test_perplexity 28 model 0 1.1673149645316996 20240807-07:50:32 test_accuracy 28 model 0 val 1327 / 1610 20240807-07:50:35 wrote gpt_000.pth 20240807-07:51:37 wrote non_validated_0028_00.png 20240807-07:51:37 wrote state.pth 20240807-07:51:37 --- epoch 29 ---------------------------------------- 20240807-07:51:37 current_test_accuracies 0.8242 0.7909 0.7758 0.7486 0.7842 20240807-07:51:37 training model 3 20240807-08:08:20 train_perplexity 29 model 3 1.1735956767929216 20240807-08:08:36 test_perplexity 29 model 3 1.167420251661146 20240807-08:14:45 test_accuracy 29 model 3 val 1352 / 1602 20240807-08:14:48 wrote gpt_003.pth 20240807-08:15:51 wrote non_validated_0029_03.png 20240807-08:15:51 wrote state.pth 20240807-08:15:51 --- epoch 30 ---------------------------------------- 20240807-08:15:51 current_test_accuracies 0.8242 0.7909 0.7758 0.8439 0.7842 20240807-08:15:51 training model 2 20240807-08:32:33 train_perplexity 30 model 2 1.170025012668535 20240807-08:32:48 test_perplexity 30 model 2 1.1653533433977716 20240807-08:38:58 test_accuracy 30 model 2 val 1314 / 1592 20240807-08:39:02 wrote gpt_002.pth 20240807-08:40:03 wrote non_validated_0030_02.png 20240807-08:40:03 wrote state.pth 20240807-08:40:03 --- epoch 31 ---------------------------------------- 20240807-08:40:03 current_test_accuracies 0.8242 0.7909 0.8254 0.8439 0.7842 20240807-08:40:03 training model 4 20240807-08:56:45 train_perplexity 31 model 4 1.1693566209153796 20240807-08:57:00 test_perplexity 31 model 4 1.16618808549889 20240807-09:03:06 test_accuracy 31 model 4 val 1318 / 1601 20240807-09:03:09 wrote gpt_004.pth 20240807-09:04:11 wrote non_validated_0031_04.png 20240807-09:04:11 wrote state.pth 20240807-09:04:11 --- epoch 32 ---------------------------------------- 20240807-09:04:11 current_test_accuracies 0.8242 0.7909 0.8254 0.8439 0.8232 20240807-09:04:11 training model 1 20240807-09:20:52 train_perplexity 32 model 1 1.1707454376578632 20240807-09:21:08 test_perplexity 32 model 1 1.1676945222917765 20240807-09:27:06 test_accuracy 32 model 1 val 1344 / 1632 20240807-09:27:10 wrote gpt_001.pth 20240807-09:28:11 wrote non_validated_0032_01.png 20240807-09:28:12 wrote state.pth 20240807-09:28:12 --- epoch 33 ---------------------------------------- 20240807-09:28:12 current_test_accuracies 0.8242 0.8235 0.8254 0.8439 0.8232 20240807-09:28:12 training model 4 20240807-09:44:56 train_perplexity 33 model 4 1.167548992383453 20240807-09:45:11 test_perplexity 33 model 4 1.1634132785067284 20240807-09:51:33 test_accuracy 33 model 4 val 1405 / 1569 20240807-09:51:37 wrote gpt_004.pth 20240807-09:52:38 wrote non_validated_0033_04.png 20240807-09:52:38 wrote state.pth 20240807-09:52:38 --- epoch 34 ---------------------------------------- 20240807-09:52:38 current_test_accuracies 0.8242 0.8235 0.8254 0.8439 0.8955 20240807-09:52:38 training model 1 20240807-10:09:21 train_perplexity 34 model 1 1.1684434846239802 20240807-10:09:37 test_perplexity 34 model 1 1.1656082727595773 20240807-10:15:40 test_accuracy 34 model 1 val 1396 / 1634 20240807-10:15:43 wrote gpt_001.pth 20240807-10:16:45 wrote non_validated_0034_01.png 20240807-10:16:45 wrote state.pth 20240807-10:16:45 --- epoch 35 ---------------------------------------- 20240807-10:16:45 current_test_accuracies 0.8242 0.8543 0.8254 0.8439 0.8955 20240807-10:16:45 training model 0 20240807-10:33:29 train_perplexity 35 model 0 1.1688885205466377 20240807-10:33:44 test_perplexity 35 model 0 1.162525125329255 20240807-10:39:48 test_accuracy 35 model 0 val 1368 / 1628 20240807-10:39:51 wrote gpt_000.pth 20240807-10:40:53 wrote non_validated_0035_00.png 20240807-10:40:53 wrote state.pth 20240807-10:40:53 --- epoch 36 ---------------------------------------- 20240807-10:40:53 current_test_accuracies 0.8403 0.8543 0.8254 0.8439 0.8955 20240807-10:40:53 training model 2 20240807-10:57:37 train_perplexity 36 model 2 1.1671041727384182 20240807-10:57:53 test_perplexity 36 model 2 1.162696256429166 20240807-11:04:09 test_accuracy 36 model 2 val 1421 / 1598 20240807-11:04:12 wrote gpt_002.pth 20240807-11:05:13 wrote non_validated_0036_02.png 20240807-11:05:13 wrote state.pth 20240807-11:05:13 --- epoch 37 ---------------------------------------- 20240807-11:05:13 current_test_accuracies 0.8403 0.8543 0.8892 0.8439 0.8955 20240807-11:05:13 training model 0 20240807-11:21:57 train_perplexity 37 model 0 1.1661145737655436 20240807-11:22:13 test_perplexity 37 model 0 1.1639510892351002 20240807-11:28:22 test_accuracy 37 model 0 val 1389 / 1602 20240807-11:28:25 wrote gpt_000.pth 20240807-11:29:27 wrote non_validated_0037_00.png 20240807-11:29:27 wrote state.pth 20240807-11:29:27 --- epoch 38 ---------------------------------------- 20240807-11:29:27 current_test_accuracies 0.8670 0.8543 0.8892 0.8439 0.8955 20240807-11:29:27 training model 3 20240807-11:46:10 train_perplexity 38 model 3 1.1691781882839427 20240807-11:46:26 test_perplexity 38 model 3 1.1634226016971332 20240807-11:52:29 test_accuracy 38 model 3 val 1383 / 1612 20240807-11:52:32 wrote gpt_003.pth 20240807-11:53:34 wrote non_validated_0038_03.png 20240807-11:53:34 wrote state.pth 20240807-11:53:34 --- epoch 39 ---------------------------------------- 20240807-11:53:34 current_test_accuracies 0.8670 0.8543 0.8892 0.8579 0.8955 20240807-11:53:34 training model 1 20240807-12:10:18 train_perplexity 39 model 1 1.1665159923811266 20240807-12:10:33 test_perplexity 39 model 1 1.162907235570981 20240807-12:16:39 test_accuracy 39 model 1 val 1417 / 1601 20240807-12:16:42 wrote gpt_001.pth 20240807-12:17:43 wrote non_validated_0039_01.png 20240807-12:17:43 wrote state.pth 20240807-12:17:43 --- epoch 40 ---------------------------------------- 20240807-12:17:43 current_test_accuracies 0.8670 0.8851 0.8892 0.8579 0.8955 20240807-12:17:43 training model 3 20240807-12:34:28 train_perplexity 40 model 3 1.167399135854805 20240807-12:34:43 test_perplexity 40 model 3 1.1634363015231963 20240807-12:40:53 test_accuracy 40 model 3 val 1356 / 1592 20240807-12:40:56 wrote gpt_003.pth 20240807-12:41:58 wrote non_validated_0040_03.png 20240807-12:41:58 wrote state.pth 20240807-12:41:58 --- epoch 41 ---------------------------------------- 20240807-12:41:58 current_test_accuracies 0.8670 0.8851 0.8892 0.8518 0.8955 20240807-12:41:58 training model 3 20240807-12:58:41 train_perplexity 41 model 3 1.1655301783777123 20240807-12:58:56 test_perplexity 41 model 3 1.1620589432127466 20240807-13:05:08 test_accuracy 41 model 3 val 1374 / 1586 20240807-13:05:12 wrote gpt_003.pth 20240807-13:06:13 wrote non_validated_0041_03.png 20240807-13:06:13 wrote state.pth 20240807-13:06:13 --- epoch 42 ---------------------------------------- 20240807-13:06:13 current_test_accuracies 0.8670 0.8851 0.8892 0.8663 0.8955 20240807-13:06:13 training model 3 20240807-13:22:56 train_perplexity 42 model 3 1.1641135830075224 20240807-13:23:12 test_perplexity 42 model 3 1.1591699466311751 20240807-13:29:26 test_accuracy 42 model 3 val 1424 / 1581 20240807-13:29:29 wrote gpt_003.pth 20240807-13:30:31 wrote non_validated_0042_03.png 20240807-13:30:31 wrote state.pth 20240807-13:30:31 --- epoch 43 ---------------------------------------- 20240807-13:30:31 current_test_accuracies 0.8670 0.8851 0.8892 0.9007 0.8955 20240807-13:30:31 training model 0 20240807-13:47:14 train_perplexity 43 model 0 1.1648963867392126 20240807-13:47:30 test_perplexity 43 model 0 1.1635048466340276 20240807-13:53:27 test_accuracy 43 model 0 val 1468 / 1632 20240807-13:53:31 wrote gpt_000.pth 20240807-13:54:31 wrote non_validated_0043_00.png 20240807-13:54:31 wrote state.pth 20240807-13:54:31 --- epoch 44 ---------------------------------------- 20240807-13:54:31 current_test_accuracies 0.8995 0.8851 0.8892 0.9007 0.8955 20240807-13:54:31 training model 1 20240807-14:11:15 train_perplexity 44 model 1 1.1648586265049607 20240807-14:11:31 test_perplexity 44 model 1 1.1623639046585919 20240807-14:17:37 test_accuracy 44 model 1 val 1365 / 1612 20240807-14:17:41 wrote gpt_001.pth 20240807-14:18:43 wrote non_validated_0044_01.png 20240807-14:18:43 wrote state.pth 20240807-14:18:43 --- epoch 45 ---------------------------------------- 20240807-14:18:43 current_test_accuracies 0.8995 0.8468 0.8892 0.9007 0.8955 20240807-14:18:43 training model 1 20240807-14:35:26 train_perplexity 45 model 1 1.163864061127823 20240807-14:35:42 test_perplexity 45 model 1 1.1615995360681455 20240807-14:41:56 test_accuracy 45 model 1 val 1422 / 1598 20240807-14:41:59 wrote gpt_001.pth 20240807-14:43:01 wrote non_validated_0045_01.png 20240807-14:43:01 wrote state.pth 20240807-14:43:01 --- epoch 46 ---------------------------------------- 20240807-14:43:01 current_test_accuracies 0.8995 0.8899 0.8892 0.9007 0.8955 20240807-14:43:01 training model 2 20240807-14:59:45 train_perplexity 46 model 2 1.1651954997238918 20240807-15:00:01 test_perplexity 46 model 2 1.1625367873439596 20240807-15:06:10 test_accuracy 46 model 2 val 1456 / 1611 20240807-15:06:14 wrote gpt_002.pth 20240807-15:07:15 wrote non_validated_0046_02.png 20240807-15:07:15 wrote state.pth 20240807-15:07:15 --- epoch 47 ---------------------------------------- 20240807-15:07:15 current_test_accuracies 0.8995 0.8899 0.9038 0.9007 0.8955 20240807-15:07:15 training model 1 20240807-15:23:59 train_perplexity 47 model 1 1.1628237289015473 20240807-15:24:14 test_perplexity 47 model 1 1.157753273207181 20240807-15:30:23 test_accuracy 47 model 1 val 1441 / 1601 20240807-15:30:26 wrote gpt_001.pth 20240807-15:31:28 wrote non_validated_0047_01.png 20240807-15:31:28 wrote state.pth 20240807-15:31:28 --- epoch 48 ---------------------------------------- 20240807-15:31:28 current_test_accuracies 0.8995 0.9001 0.9038 0.9007 0.8955 20240807-15:31:28 training model 4 20240807-15:48:12 train_perplexity 48 model 4 1.1651960381737736 20240807-15:48:28 test_perplexity 48 model 4 1.1606992882320843 20240807-15:54:37 test_accuracy 48 model 4 val 1418 / 1606 20240807-15:54:41 wrote gpt_004.pth 20240807-15:55:42 wrote non_validated_0048_04.png 20240807-15:55:42 wrote state.pth 20240807-15:55:42 --- epoch 49 ---------------------------------------- 20240807-15:55:42 current_test_accuracies 0.8995 0.9001 0.9038 0.9007 0.8829 20240807-15:55:42 training model 4 20240807-16:12:26 train_perplexity 49 model 4 1.1637465821463662 20240807-16:12:42 test_perplexity 49 model 4 1.1604818795717036 20240807-16:18:46 test_accuracy 49 model 4 val 1473 / 1627 20240807-16:18:49 wrote gpt_004.pth 20240807-16:19:51 wrote non_validated_0049_04.png 20240807-16:19:51 wrote state.pth 20240807-16:19:51 --- epoch 50 ---------------------------------------- 20240807-16:19:51 current_test_accuracies 0.8995 0.9001 0.9038 0.9007 0.9053 20240807-16:19:51 training model 0 20240807-16:36:35 train_perplexity 50 model 0 1.1641129937790315 20240807-16:36:51 test_perplexity 50 model 0 1.1598396958216879 20240807-16:43:05 test_accuracy 50 model 0 val 1442 / 1595 20240807-16:43:08 wrote gpt_000.pth 20240807-16:44:13 wrote non_validated_0050_00.png 20240807-16:44:13 wrote state.pth 20240807-16:44:13 --- epoch 51 ---------------------------------------- 20240807-16:44:13 current_test_accuracies 0.9041 0.9001 0.9038 0.9007 0.9053 20240807-16:44:13 training model 1 20240807-17:00:55 train_perplexity 51 model 1 1.1618904933075307 20240807-17:01:11 test_perplexity 51 model 1 1.1606623146649533 20240807-17:07:24 test_accuracy 51 model 1 val 1465 / 1585 20240807-17:07:28 wrote gpt_001.pth 20240807-17:08:30 wrote non_validated_0051_01.png 20240807-17:08:30 wrote state.pth 20240807-17:08:30 --- epoch 52 ---------------------------------------- 20240807-17:08:30 current_test_accuracies 0.9041 0.9243 0.9038 0.9007 0.9053 20240807-17:08:30 training model 3 20240807-17:25:13 train_perplexity 52 model 3 1.1635486462861693 20240807-17:25:28 test_perplexity 52 model 3 1.1591207899002083 20240807-17:31:39 test_accuracy 52 model 3 val 1461 / 1595 20240807-17:31:43 wrote gpt_003.pth 20240807-17:32:45 wrote non_validated_0052_03.png 20240807-17:32:45 wrote state.pth 20240807-17:32:45 --- epoch 53 ---------------------------------------- 20240807-17:32:45 current_test_accuracies 0.9041 0.9243 0.9038 0.9160 0.9053 20240807-17:32:45 training model 2 20240807-17:49:28 train_perplexity 53 model 2 1.1641016071888566 20240807-17:49:44 test_perplexity 53 model 2 1.160834916546838 20240807-17:55:55 test_accuracy 53 model 2 val 1471 / 1590 20240807-17:55:58 wrote gpt_002.pth 20240807-17:57:00 wrote non_validated_0053_02.png 20240807-17:57:00 wrote state.pth 20240807-17:57:00 --- epoch 54 ---------------------------------------- 20240807-17:57:00 current_test_accuracies 0.9041 0.9243 0.9252 0.9160 0.9053 20240807-17:57:00 training model 0 20240807-18:13:42 train_perplexity 54 model 0 1.1627405995135323 20240807-18:13:58 test_perplexity 54 model 0 1.159022834281905 20240807-18:20:15 test_accuracy 54 model 0 val 1462 / 1577 20240807-18:20:18 wrote gpt_000.pth 20240807-18:21:20 wrote non_validated_0054_00.png 20240807-18:21:20 wrote state.pth 20240807-18:21:20 --- epoch 55 ---------------------------------------- 20240807-18:21:20 current_test_accuracies 0.9271 0.9243 0.9252 0.9160 0.9053 20240807-18:21:20 training model 4 20240807-18:38:03 train_perplexity 55 model 4 1.1627133850711322 20240807-18:38:18 test_perplexity 55 model 4 1.1590362448658187 20240807-18:44:35 test_accuracy 55 model 4 val 1463 / 1577 20240807-18:44:39 wrote gpt_004.pth 20240807-18:45:39 wrote non_validated_0055_04.png 20240807-18:45:39 wrote state.pth 20240807-18:45:39 --- epoch 56 ---------------------------------------- 20240807-18:45:39 current_test_accuracies 0.9271 0.9243 0.9252 0.9160 0.9277 20240807-18:45:39 training model 3 20240807-19:02:22 train_perplexity 56 model 3 1.1624764425329588 20240807-19:02:38 test_perplexity 56 model 3 1.1595731689648634 20240807-19:08:51 test_accuracy 56 model 3 val 1453 / 1587 20240807-19:08:55 wrote gpt_003.pth 20240807-19:09:56 wrote non_validated_0056_03.png 20240807-19:09:56 wrote state.pth 20240807-19:09:56 --- epoch 57 ---------------------------------------- 20240807-19:09:56 current_test_accuracies 0.9271 0.9243 0.9252 0.9156 0.9277 20240807-19:09:56 training model 3 20240807-19:17:29 argv ./main.py --result_dir=results_noise_L --model=122M --nb_train_samples=40000 --nb_test_samples=2000 --grids_world_tasks=replace_color,translate,grow,half_fill,frame,detect,corners,contact --accuracy_to_make_c_quizzes=0.95 --prompt_noise=0.05 --batch_size=10 --inference_batch_size=10 --nb_gpts=5 --max_fail_to_validate=3 --proba_understands=0.75 --seed=12345 --resume 20240807-19:17:29 args.log_filename train.log 20240807-19:17:29 args.result_dir results_noise_L 20240807-19:17:29 args.seed 12345 20240807-19:17:29 args.resume True 20240807-19:17:29 args.max_percents_of_test_in_train -1 20240807-19:17:29 args.log_command None 20240807-19:17:29 args.nb_epochs 10000 20240807-19:17:29 args.batch_size 10 20240807-19:17:29 args.physical_batch_size None 20240807-19:17:29 args.inference_batch_size 10 20240807-19:17:29 args.nb_train_samples 40000 20240807-19:17:29 args.nb_test_samples 2000 20240807-19:17:29 args.nb_new_c_quizzes_for_train None 20240807-19:17:29 args.nb_new_c_quizzes_for_test None 20240807-19:17:29 args.learning_rate 0.0005 20240807-19:17:29 args.model 122M 20240807-19:17:29 args.dim_model 768 20240807-19:17:29 args.dim_keys 64 20240807-19:17:29 args.dim_hidden 2048 20240807-19:17:29 args.nb_heads 8 20240807-19:17:29 args.nb_blocks 24 20240807-19:17:29 args.dropout 0.1 20240807-19:17:29 args.deterministic_synthesis False 20240807-19:17:29 args.problem grids 20240807-19:17:29 args.nb_threads 1 20240807-19:17:29 args.gpus all 20240807-19:17:29 args.nb_gpts 5 20240807-19:17:29 args.max_fail_to_validate 3 20240807-19:17:29 args.accuracy_to_make_c_quizzes 0.95 20240807-19:17:29 args.proba_understands 0.75 20240807-19:17:29 args.proba_not_understands 0.5 20240807-19:17:29 args.temperature_hot 1.5 20240807-19:17:29 args.temperature_cold 1 20240807-19:17:29 args.prompt_noise 0.05 20240807-19:17:29 args.nb_averaging_rounds 3 20240807-19:17:29 args.dirty_debug False 20240807-19:17:29 args.test None 20240807-19:17:29 args.grids_world_tasks replace_color,translate,grow,half_fill,frame,detect,corners,contact 20240807-19:17:29 args.grids_science_tasks None 20240807-19:17:29 args.sky_height 6 20240807-19:17:29 args.sky_width 8 20240807-19:17:29 args.sky_nb_birds 3 20240807-19:17:29 args.sky_nb_iterations 2 20240807-19:17:29 args.sky_speed 3 20240807-19:17:29 main_device cuda:0 gpus ['cuda:0'] 20240807-19:17:29 vocabulary_size 15 20240807-19:17:29 creating model 0 and its w_quizzes 20240807-19:17:42 creating model 1 and its w_quizzes 20240807-19:17:56 creating model 2 and its w_quizzes 20240807-19:18:10 creating model 3 and its w_quizzes 20240807-19:18:23 creating model 4 and its w_quizzes 20240807-19:18:38 successfully loaded gpt_000.pth 20240807-19:18:38 successfully loaded gpt_001.pth 20240807-19:18:39 successfully loaded gpt_002.pth 20240807-19:18:39 successfully loaded gpt_003.pth 20240807-19:18:40 successfully loaded gpt_004.pth 20240807-19:18:40 cannot find c_quizzes.pth 20240807-19:18:40 successfully loaded state.pth 20240807-19:18:40 nb_parameters 122847759 (122M) 20240807-19:18:40 nb_new_c_quizzes_for_train 400 nb_new_c_quizzes_for_test 20 20240807-19:18:40 wrote state.pth 20240807-19:18:40 --- epoch 57 ---------------------------------------- 20240807-19:18:40 current_test_accuracies 0.9271 0.9243 0.9252 0.9156 0.9277 20240807-19:18:40 training model 3 20240807-19:19:45 argv ./main.py --result_dir=results_noise_L --model=122M --nb_train_samples=40000 --nb_test_samples=2000 --grids_world_tasks=replace_color,translate,grow,half_fill,frame,detect,corners,contact --accuracy_to_make_c_quizzes=0.95 --prompt_noise=0.05 --batch_size=10 --inference_batch_size=10 --nb_gpts=5 --max_fail_to_validate=3 --proba_understands=0.75 --seed=12345 --resume 20240807-19:19:45 args.log_filename train.log 20240807-19:19:45 args.result_dir results_noise_L 20240807-19:19:45 args.seed 12345 20240807-19:19:45 args.resume True 20240807-19:19:45 args.max_percents_of_test_in_train -1 20240807-19:19:45 args.log_command None 20240807-19:19:45 args.nb_epochs 10000 20240807-19:19:45 args.batch_size 10 20240807-19:19:45 args.physical_batch_size None 20240807-19:19:45 args.inference_batch_size 10 20240807-19:19:45 args.nb_train_samples 40000 20240807-19:19:45 args.nb_test_samples 2000 20240807-19:19:45 args.nb_new_c_quizzes_for_train None 20240807-19:19:45 args.nb_new_c_quizzes_for_test None 20240807-19:19:45 args.learning_rate 0.0005 20240807-19:19:45 args.model 122M 20240807-19:19:45 args.dim_model 768 20240807-19:19:45 args.dim_keys 64 20240807-19:19:45 args.dim_hidden 2048 20240807-19:19:45 args.nb_heads 8 20240807-19:19:45 args.nb_blocks 24 20240807-19:19:45 args.dropout 0.1 20240807-19:19:45 args.deterministic_synthesis False 20240807-19:19:45 args.problem grids 20240807-19:19:45 args.nb_threads 1 20240807-19:19:45 args.gpus all 20240807-19:19:45 args.nb_gpts 5 20240807-19:19:45 args.max_fail_to_validate 3 20240807-19:19:45 args.accuracy_to_make_c_quizzes 0.95 20240807-19:19:45 args.proba_understands 0.75 20240807-19:19:45 args.proba_not_understands 0.5 20240807-19:19:45 args.temperature_hot 1.5 20240807-19:19:45 args.temperature_cold 1 20240807-19:19:45 args.prompt_noise 0.05 20240807-19:19:45 args.nb_averaging_rounds 3 20240807-19:19:45 args.dirty_debug False 20240807-19:19:45 args.test None 20240807-19:19:45 args.grids_world_tasks replace_color,translate,grow,half_fill,frame,detect,corners,contact 20240807-19:19:45 args.grids_science_tasks None 20240807-19:19:45 args.sky_height 6 20240807-19:19:45 args.sky_width 8 20240807-19:19:45 args.sky_nb_birds 3 20240807-19:19:45 args.sky_nb_iterations 2 20240807-19:19:45 args.sky_speed 3 20240807-19:19:45 main_device cuda:0 gpus ['cuda:0'] 20240807-19:19:45 vocabulary_size 15 20240807-19:19:45 creating model 0 and its w_quizzes 20240807-19:19:58 creating model 1 and its w_quizzes 20240807-19:20:12 creating model 2 and its w_quizzes 20240807-19:20:25 creating model 3 and its w_quizzes 20240807-19:20:38 creating model 4 and its w_quizzes 20240807-19:20:52 successfully loaded gpt_000.pth 20240807-19:20:53 successfully loaded gpt_001.pth 20240807-19:20:53 successfully loaded gpt_002.pth 20240807-19:20:54 successfully loaded gpt_003.pth 20240807-19:20:54 successfully loaded gpt_004.pth 20240807-19:20:54 cannot find c_quizzes.pth 20240807-19:20:54 successfully loaded state.pth 20240807-19:20:54 nb_parameters 122847759 (122M) 20240807-19:20:54 nb_new_c_quizzes_for_train 400 nb_new_c_quizzes_for_test 20 20240807-19:20:54 wrote state.pth 20240807-19:20:54 --- epoch 57 ---------------------------------------- 20240807-19:20:54 current_test_accuracies 0.9271 0.9243 0.9252 0.9156 0.9277 20240807-19:20:54 training model 3 20240807-19:37:36 train_perplexity 57 model 3 1.161359401467538 20240807-19:37:51 test_perplexity 57 model 3 1.1601606813082572 20240807-19:43:57 test_accuracy 57 model 3 val 1510 / 1620 20240807-19:44:01 wrote gpt_003.pth 20240807-19:45:05 wrote non_validated_0057_03.png 20240807-19:45:05 wrote state.pth 20240807-19:45:05 --- epoch 58 ---------------------------------------- 20240807-19:45:05 current_test_accuracies 0.9271 0.9243 0.9252 0.9321 0.9277 20240807-19:45:05 training model 1 20240807-20:01:45 train_perplexity 58 model 1 1.1619614593433862 20240807-20:02:01 test_perplexity 58 model 1 1.1603010800493796 20240807-20:08:09 test_accuracy 58 model 1 val 1485 / 1601 20240807-20:08:12 wrote gpt_001.pth 20240807-20:09:14 wrote non_validated_0058_01.png 20240807-20:09:14 wrote state.pth 20240807-20:09:14 --- epoch 59 ---------------------------------------- 20240807-20:09:14 current_test_accuracies 0.9271 0.9275 0.9252 0.9321 0.9277 20240807-20:09:14 training model 2 20240807-20:25:56 train_perplexity 59 model 2 1.1632406249223617 20240807-20:26:11 test_perplexity 59 model 2 1.1621496189255025 20240807-20:32:23 test_accuracy 59 model 2 val 1476 / 1589 20240807-20:32:26 wrote gpt_002.pth 20240807-20:33:28 wrote non_validated_0059_02.png 20240807-20:33:28 wrote state.pth 20240807-20:33:28 --- epoch 60 ---------------------------------------- 20240807-20:33:28 current_test_accuracies 0.9271 0.9275 0.9289 0.9321 0.9277 20240807-20:33:28 training model 0 20240807-20:50:09 train_perplexity 60 model 0 1.1619058241518772 20240807-20:50:25 test_perplexity 60 model 0 1.1605439007493488 20240807-20:56:33 test_accuracy 60 model 0 val 1501 / 1614 20240807-20:56:37 wrote gpt_000.pth 20240807-20:57:39 wrote non_validated_0060_00.png 20240807-20:57:39 wrote state.pth 20240807-20:57:39 --- epoch 61 ---------------------------------------- 20240807-20:57:39 current_test_accuracies 0.9300 0.9275 0.9289 0.9321 0.9277 20240807-20:57:39 training model 1 20240807-21:14:20 train_perplexity 61 model 1 1.1607720478077594 20240807-21:14:36 test_perplexity 61 model 1 1.1585623816063795 20240807-21:20:48 test_accuracy 61 model 1 val 1518 / 1599 20240807-21:20:51 wrote gpt_001.pth 20240807-21:21:53 wrote non_validated_0061_01.png 20240807-21:21:53 wrote state.pth 20240807-21:21:53 --- epoch 62 ---------------------------------------- 20240807-21:21:53 current_test_accuracies 0.9300 0.9493 0.9289 0.9321 0.9277 20240807-21:21:53 training model 4 20240807-21:38:34 train_perplexity 62 model 4 1.1617806513297513 20240807-21:38:50 test_perplexity 62 model 4 1.156354863227653 20240807-21:44:55 test_accuracy 62 model 4 val 1486 / 1620 20240807-21:44:59 wrote gpt_004.pth 20240807-21:46:00 wrote non_validated_0062_04.png 20240807-21:46:00 wrote state.pth 20240807-21:46:00 --- epoch 63 ---------------------------------------- 20240807-21:46:00 current_test_accuracies 0.9300 0.9493 0.9289 0.9321 0.9173 20240807-21:46:00 training model 4 20240807-22:02:41 train_perplexity 63 model 4 1.161545851383735 20240807-22:02:57 test_perplexity 63 model 4 1.161194381915986 20240807-22:09:04 test_accuracy 63 model 4 val 1454 / 1613 20240807-22:09:08 wrote gpt_004.pth 20240807-22:10:10 wrote non_validated_0063_04.png 20240807-22:10:10 wrote state.pth 20240807-22:10:10 --- epoch 64 ---------------------------------------- 20240807-22:10:10 current_test_accuracies 0.9300 0.9493 0.9289 0.9321 0.9014 20240807-22:10:10 training model 4 20240807-22:26:51 train_perplexity 64 model 4 1.1605655515227915 20240807-22:27:07 test_perplexity 64 model 4 1.15842678080896 20240807-22:33:18 test_accuracy 64 model 4 val 1494 / 1594 20240807-22:33:22 wrote gpt_004.pth 20240807-22:34:24 wrote non_validated_0064_04.png 20240807-22:34:24 wrote state.pth 20240807-22:34:24 --- epoch 65 ---------------------------------------- 20240807-22:34:24 current_test_accuracies 0.9300 0.9493 0.9289 0.9321 0.9373 20240807-22:34:24 training model 2 20240807-22:51:05 train_perplexity 65 model 2 1.1623008847689837 20240807-22:51:21 test_perplexity 65 model 2 1.1604737593750263 20240807-22:57:44 test_accuracy 65 model 2 val 1424 / 1575 20240807-22:57:47 wrote gpt_002.pth 20240807-22:58:49 wrote non_validated_0065_02.png 20240807-22:58:49 wrote state.pth 20240807-22:58:49 --- epoch 66 ---------------------------------------- 20240807-22:58:49 current_test_accuracies 0.9300 0.9493 0.9041 0.9321 0.9373 20240807-22:58:49 training model 2 20240807-23:15:32 train_perplexity 66 model 2 1.1609854579978742 20240807-23:15:48 test_perplexity 66 model 2 1.157688243340268 20240807-23:21:59 test_accuracy 66 model 2 val 1491 / 1602 20240807-23:22:02 wrote gpt_002.pth 20240807-23:23:04 wrote non_validated_0066_02.png 20240807-23:23:04 wrote state.pth 20240807-23:23:04 --- epoch 67 ---------------------------------------- 20240807-23:23:04 current_test_accuracies 0.9300 0.9493 0.9307 0.9321 0.9373 20240807-23:23:04 training model 0 20240807-23:39:45 train_perplexity 67 model 0 1.1616109680385143 20240807-23:40:01 test_perplexity 67 model 0 1.1581218339620285 20240807-23:46:09 test_accuracy 67 model 0 val 1535 / 1617 20240807-23:46:12 wrote gpt_000.pth 20240807-23:47:15 wrote non_validated_0067_00.png 20240807-23:47:15 wrote state.pth 20240807-23:47:15 --- epoch 68 ---------------------------------------- 20240807-23:47:15 current_test_accuracies 0.9493 0.9493 0.9307 0.9321 0.9373 20240807-23:47:15 training model 2 20240808-00:03:58 train_perplexity 68 model 2 1.1609973167347865 20240808-00:04:13 test_perplexity 68 model 2 1.1578852193900886 20240808-00:10:31 test_accuracy 68 model 2 val 1459 / 1573 20240808-00:10:34 wrote gpt_002.pth 20240808-00:11:36 wrote non_validated_0068_02.png 20240808-00:11:36 wrote state.pth 20240808-00:11:36 --- epoch 69 ---------------------------------------- 20240808-00:11:36 current_test_accuracies 0.9493 0.9493 0.9275 0.9321 0.9373 20240808-00:11:36 training model 2 20240808-00:28:18 train_perplexity 69 model 2 1.1605210591821307 20240808-00:28:34 test_perplexity 69 model 2 1.1568893865850094 20240808-00:34:51 test_accuracy 69 model 2 val 1494 / 1579 20240808-00:34:54 wrote gpt_002.pth 20240808-00:35:57 wrote non_validated_0069_02.png 20240808-00:35:57 wrote state.pth 20240808-00:35:57 --- epoch 70 ---------------------------------------- 20240808-00:35:57 current_test_accuracies 0.9493 0.9493 0.9462 0.9321 0.9373 20240808-00:35:57 training model 3 20240808-00:52:39 train_perplexity 70 model 3 1.1610148739318924 20240808-00:52:54 test_perplexity 70 model 3 1.1579514791152603 20240808-00:59:05 test_accuracy 70 model 3 val 1444 / 1598 20240808-00:59:08 wrote gpt_003.pth 20240808-01:00:10 wrote non_validated_0070_03.png 20240808-01:00:10 wrote state.pth 20240808-01:00:10 --- epoch 71 ---------------------------------------- 20240808-01:00:10 current_test_accuracies 0.9493 0.9493 0.9462 0.9036 0.9373 20240808-01:00:10 training model 3 20240808-01:16:52 train_perplexity 71 model 3 1.1606477315880515 20240808-01:17:07 test_perplexity 71 model 3 1.1594135623298762 20240808-01:23:19 test_accuracy 71 model 3 val 1507 / 1605 20240808-01:23:22 wrote gpt_003.pth 20240808-01:24:24 wrote non_validated_0071_03.png 20240808-01:24:24 wrote state.pth 20240808-01:24:24 --- epoch 72 ---------------------------------------- 20240808-01:24:24 current_test_accuracies 0.9493 0.9493 0.9462 0.9389 0.9373 20240808-01:24:24 training model 4 20240808-01:41:05 train_perplexity 72 model 4 1.1602787484301471 20240808-01:41:20 test_perplexity 72 model 4 1.1590983920296907 20240808-01:47:35 test_accuracy 72 model 4 val 1493 / 1592 20240808-01:47:39 wrote gpt_004.pth 20240808-01:48:42 wrote non_validated_0072_04.png 20240808-01:48:42 wrote state.pth 20240808-01:48:42 --- epoch 73 ---------------------------------------- 20240808-01:48:42 current_test_accuracies 0.9493 0.9493 0.9462 0.9389 0.9378 20240808-01:48:42 training model 4 20240808-02:05:22 train_perplexity 73 model 4 1.1600339706878529 20240808-02:05:38 test_perplexity 73 model 4 1.1584402944557877 20240808-02:11:49 test_accuracy 73 model 4 val 1499 / 1606 20240808-02:11:52 wrote gpt_004.pth 20240808-02:12:53 wrote non_validated_0073_04.png 20240808-02:12:53 wrote state.pth 20240808-02:12:53 --- epoch 74 ---------------------------------------- 20240808-02:12:53 current_test_accuracies 0.9493 0.9493 0.9462 0.9389 0.9334 20240808-02:12:53 training model 4 20240808-02:29:35 train_perplexity 74 model 4 1.1596955161977915 20240808-02:29:50 test_perplexity 74 model 4 1.1592670798526759 20240808-02:36:02 test_accuracy 74 model 4 val 1497 / 1605 20240808-02:36:05 wrote gpt_004.pth 20240808-02:37:07 wrote non_validated_0074_04.png 20240808-02:37:07 wrote state.pth 20240808-02:37:07 --- epoch 75 ---------------------------------------- 20240808-02:37:07 current_test_accuracies 0.9493 0.9493 0.9462 0.9389 0.9327 20240808-02:37:07 training model 4 20240808-02:53:48 train_perplexity 75 model 4 1.1594020259039495 20240808-02:54:03 test_perplexity 75 model 4 1.1554891914661631 20240808-03:00:21 test_accuracy 75 model 4 val 1482 / 1564 20240808-03:00:24 wrote gpt_004.pth 20240808-03:01:26 wrote non_validated_0075_04.png 20240808-03:01:26 wrote state.pth 20240808-03:01:26 --- epoch 76 ---------------------------------------- 20240808-03:01:26 current_test_accuracies 0.9493 0.9493 0.9462 0.9389 0.9476 20240808-03:01:26 training model 3 20240808-03:18:07 train_perplexity 76 model 3 1.160450836002672 20240808-03:18:22 test_perplexity 76 model 3 1.1589900501304864 20240808-03:24:36 test_accuracy 76 model 3 val 1493 / 1587 20240808-03:24:40 wrote gpt_003.pth 20240808-03:25:42 wrote non_validated_0076_03.png 20240808-03:25:42 wrote state.pth 20240808-03:25:42 --- epoch 77 ---------------------------------------- 20240808-03:25:42 current_test_accuracies 0.9493 0.9493 0.9462 0.9408 0.9476 20240808-03:25:42 training model 3 20240808-03:42:23 train_perplexity 77 model 3 1.1594588220596438 20240808-03:42:38 test_perplexity 77 model 3 1.1586674202587786 20240808-03:48:49 test_accuracy 77 model 3 val 1491 / 1591 20240808-03:48:52 wrote gpt_003.pth 20240808-03:49:52 wrote non_validated_0077_03.png 20240808-03:49:52 wrote state.pth 20240808-03:49:52 --- epoch 78 ---------------------------------------- 20240808-03:49:52 current_test_accuracies 0.9493 0.9493 0.9462 0.9371 0.9476 20240808-03:49:52 training model 3 20240808-04:06:32 train_perplexity 78 model 3 1.1598791853639094 20240808-04:06:48 test_perplexity 78 model 3 1.156245629040479 20240808-04:13:11 test_accuracy 78 model 3 val 1499 / 1577 20240808-04:13:14 wrote gpt_003.pth 20240808-04:14:14 wrote non_validated_0078_03.png 20240808-04:14:14 wrote state.pth 20240808-04:14:14 --- epoch 79 ---------------------------------------- 20240808-04:14:14 current_test_accuracies 0.9493 0.9493 0.9462 0.9505 0.9476 20240808-04:14:14 training model 2 20240808-04:30:54 train_perplexity 79 model 2 1.1600452686468692 20240808-04:31:10 test_perplexity 79 model 2 1.1571939856786582 20240808-04:37:18 test_accuracy 79 model 2 val 1541 / 1615 20240808-04:37:22 wrote gpt_002.pth 20240808-04:38:22 wrote non_validated_0079_02.png 20240808-04:38:22 wrote state.pth 20240808-04:38:22 --- epoch 80 ---------------------------------------- 20240808-04:38:22 current_test_accuracies 0.9493 0.9493 0.9542 0.9505 0.9476 20240808-04:38:22 training model 4 20240808-04:55:02 train_perplexity 80 model 4 1.1594601112674472 20240808-04:55:17 test_perplexity 80 model 4 1.1580643649272526 20240808-05:01:29 test_accuracy 80 model 4 val 1511 / 1605 20240808-05:01:32 wrote gpt_004.pth 20240808-05:02:33 wrote non_validated_0080_04.png 20240808-05:02:33 wrote state.pth 20240808-05:02:33 --- epoch 81 ---------------------------------------- 20240808-05:02:33 current_test_accuracies 0.9493 0.9493 0.9542 0.9505 0.9414 20240808-05:02:33 training model 4 20240808-05:19:13 train_perplexity 81 model 4 1.1586797401340883 20240808-05:19:29 test_perplexity 81 model 4 1.1579084472092795 20240808-05:25:39 test_accuracy 81 model 4 val 1527 / 1603 20240808-05:25:42 wrote gpt_004.pth 20240808-05:26:44 wrote non_validated_0081_04.png 20240808-05:26:44 wrote state.pth 20240808-05:26:44 --- epoch 82 ---------------------------------------- 20240808-05:26:44 current_test_accuracies 0.9493 0.9493 0.9542 0.9505 0.9526 20240808-05:26:44 training model 0 20240808-05:43:25 train_perplexity 82 model 0 1.1606957425641482 20240808-05:43:40 test_perplexity 82 model 0 1.156736047826147 20240808-05:49:44 test_accuracy 82 model 0 val 1539 / 1631 20240808-05:49:48 wrote gpt_000.pth 20240808-05:50:50 wrote non_validated_0082_00.png 20240808-05:50:50 wrote state.pth 20240808-05:50:50 --- epoch 83 ---------------------------------------- 20240808-05:50:50 current_test_accuracies 0.9436 0.9493 0.9542 0.9505 0.9526 20240808-05:50:50 training model 0 20240808-06:07:30 train_perplexity 83 model 0 1.1606393291480566 20240808-06:07:45 test_perplexity 83 model 0 1.1591902034774724 20240808-06:13:53 test_accuracy 83 model 0 val 1536 / 1600 20240808-06:13:56 wrote gpt_000.pth 20240808-06:14:58 wrote non_validated_0083_00.png 20240808-06:14:58 wrote state.pth 20240808-06:14:58 --- epoch 84 ---------------------------------------- 20240808-06:14:58 current_test_accuracies 0.9600 0.9493 0.9542 0.9505 0.9526 20240808-06:14:58 training model 1 20240808-06:31:39 train_perplexity 84 model 1 1.1602200699365661 20240808-06:31:54 test_perplexity 84 model 1 1.1591893586416517 20240808-06:38:01 test_accuracy 84 model 1 val 1489 / 1613 20240808-06:38:04 wrote gpt_001.pth 20240808-06:39:04 wrote non_validated_0084_01.png 20240808-06:39:04 wrote state.pth 20240808-06:39:04 --- epoch 85 ---------------------------------------- 20240808-06:39:04 current_test_accuracies 0.9600 0.9231 0.9542 0.9505 0.9526 20240808-06:39:04 training model 1 20240808-06:55:44 train_perplexity 85 model 1 1.1602137666761434 20240808-06:56:00 test_perplexity 85 model 1 1.1585177824099009 20240808-07:02:08 test_accuracy 85 model 1 val 1523 / 1618 20240808-07:02:11 wrote gpt_001.pth 20240808-07:03:13 wrote non_validated_0085_01.png 20240808-07:03:13 wrote state.pth 20240808-07:03:13 --- epoch 86 ---------------------------------------- 20240808-07:03:13 current_test_accuracies 0.9600 0.9413 0.9542 0.9505 0.9526 20240808-07:03:13 training model 1 20240808-07:19:53 train_perplexity 86 model 1 1.1603552183202588 20240808-07:20:08 test_perplexity 86 model 1 1.156298161045216 20240808-07:26:24 test_accuracy 86 model 1 val 1529 / 1595 20240808-07:26:27 wrote gpt_001.pth 20240808-07:27:29 wrote non_validated_0086_01.png 20240808-07:27:29 wrote state.pth 20240808-07:27:29 --- epoch 87 ---------------------------------------- 20240808-07:27:29 current_test_accuracies 0.9600 0.9586 0.9542 0.9505 0.9526 20240808-07:34:37 keep c_quizzes model 0 validated 42 / 420 (10.00%) nb_accumulated 42 / 420 (finishes Thu 08:38 -- 353/h) 20240808-07:41:31 keep c_quizzes model 4 validated 33 / 420 (7.86%) nb_accumulated 75 / 420 (finishes Thu 08:46 -- 320/h) 20240808-07:48:32 keep c_quizzes model 0 validated 24 / 420 (5.71%) nb_accumulated 99 / 420 (finishes Thu 08:56 -- 282/h) 20240808-07:55:33 keep c_quizzes model 0 validated 39 / 420 (9.29%) nb_accumulated 138 / 420 (finishes Thu 08:52 -- 295/h) 20240808-08:02:32 keep c_quizzes model 1 validated 17 / 420 (4.05%) nb_accumulated 155 / 420 (finishes Thu 09:02 -- 265/h) 20240808-08:09:33 keep c_quizzes model 1 validated 20 / 420 (4.76%) nb_accumulated 175 / 420 (finishes Thu 09:08 -- 249/h) 20240808-08:16:34 keep c_quizzes model 1 validated 22 / 420 (5.24%) nb_accumulated 197 / 420 (finishes Thu 09:12 -- 240/h) 20240808-08:23:35 keep c_quizzes model 3 validated 23 / 420 (5.48%) nb_accumulated 220 / 420 (finishes Thu 09:14 -- 235/h) 20240808-08:30:38 keep c_quizzes model 1 validated 31 / 420 (7.38%) nb_accumulated 251 / 420 (finishes Thu 09:13 -- 238/h) 20240808-08:37:40 keep c_quizzes model 4 validated 18 / 420 (4.29%) nb_accumulated 269 / 420 (finishes Thu 09:17 -- 229/h) 20240808-08:44:40 keep c_quizzes model 4 validated 17 / 420 (4.05%) nb_accumulated 286 / 420 (finishes Thu 09:20 -- 222/h) 20240808-08:51:39 keep c_quizzes model 4 validated 13 / 420 (3.10%) nb_accumulated 299 / 420 (finishes Thu 09:25 -- 213/h) 20240808-08:58:36 keep c_quizzes model 4 validated 12 / 420 (2.86%) nb_accumulated 311 / 420 (finishes Thu 09:30 -- 204/h) 20240808-09:05:35 keep c_quizzes model 3 validated 17 / 420 (4.05%) nb_accumulated 328 / 420 (finishes Thu 09:33 -- 200/h) 20240808-09:12:34 keep c_quizzes model 2 validated 14 / 420 (3.33%) nb_accumulated 342 / 420 (finishes Thu 09:36 -- 195/h) 20240808-09:19:32 keep c_quizzes model 4 validated 20 / 420 (4.76%) nb_accumulated 362 / 420 (finishes Thu 09:37 -- 193/h) 20240808-09:26:28 keep c_quizzes model 3 validated 20 / 420 (4.76%) nb_accumulated 382 / 420 (finishes Thu 09:38 -- 192/h) 20240808-09:33:26 keep c_quizzes model 0 validated 21 / 420 (5.00%) nb_accumulated 403 / 420 (finishes Thu 09:38 -- 191/h) 20240808-09:40:23 keep c_quizzes model 1 validated 20 / 420 (4.76%) nb_accumulated 423 / 420 (finishes now! -- 190/h) 20240808-09:40:54 wrote c_quizzes.pth 20240808-09:40:54 training model 0 20240808-09:57:36 train_perplexity 87 model 0 1.1604459925767128 20240808-09:57:52 test_perplexity 87 model 0 1.1593087353736538 20240808-10:04:00 test_accuracy 87 model 0 val 1521 / 1607 20240808-10:04:03 wrote gpt_000.pth 20240808-10:05:05 wrote non_validated_0087_00.png 20240808-10:05:05 wrote state.pth 20240808-10:05:05 --- epoch 88 ---------------------------------------- 20240808-10:05:05 current_test_accuracies 0.9465 0.0000 0.0000 0.0000 0.0000 20240808-10:05:05 training model 1 20240808-10:21:47 train_perplexity 88 model 1 1.159767928358701 20240808-10:22:02 test_perplexity 88 model 1 1.159143199085257 20240808-10:28:07 test_accuracy 88 model 1 val 1521 / 1619 20240808-10:28:11 wrote gpt_001.pth 20240808-10:29:12 wrote non_validated_0088_01.png 20240808-10:29:12 wrote state.pth 20240808-10:29:12 --- epoch 89 ---------------------------------------- 20240808-10:29:12 current_test_accuracies 0.9465 0.9395 0.0000 0.0000 0.0000 20240808-10:29:12 training model 2 20240808-10:45:54 train_perplexity 89 model 2 1.1600255739076264 20240808-10:46:10 test_perplexity 89 model 2 1.1566386399521502 20240808-10:52:15 test_accuracy 89 model 2 val 1549 / 1620 20240808-10:52:19 wrote gpt_002.pth 20240808-10:53:21 wrote non_validated_0089_02.png 20240808-10:53:21 wrote state.pth 20240808-10:53:21 --- epoch 90 ---------------------------------------- 20240808-10:53:21 current_test_accuracies 0.9465 0.9395 0.9562 0.0000 0.0000 20240808-10:53:21 training model 3 20240808-11:10:02 train_perplexity 90 model 3 1.1601497927446331 20240808-11:10:17 test_perplexity 90 model 3 1.156145001476056 20240808-11:16:26 test_accuracy 90 model 3 val 1518 / 1615 20240808-11:16:29 wrote gpt_003.pth 20240808-11:17:31 wrote non_validated_0090_03.png 20240808-11:17:31 wrote state.pth 20240808-11:17:31 --- epoch 91 ---------------------------------------- 20240808-11:17:31 current_test_accuracies 0.9465 0.9395 0.9562 0.9399 0.0000 20240808-11:17:31 training model 4 20240808-11:34:13 train_perplexity 91 model 4 1.1595560524621653 20240808-11:34:29 test_perplexity 91 model 4 1.1586162636549302 20240808-11:40:35 test_accuracy 91 model 4 val 1529 / 1611 20240808-11:40:38 wrote gpt_004.pth 20240808-11:41:40 wrote non_validated_0091_04.png 20240808-11:41:40 wrote state.pth 20240808-11:41:40 --- epoch 92 ---------------------------------------- 20240808-11:41:40 current_test_accuracies 0.9465 0.9395 0.9562 0.9399 0.9491 20240808-11:41:40 training model 1 20240808-11:58:20 train_perplexity 92 model 1 1.1595753904053594 20240808-11:58:36 test_perplexity 92 model 1 1.1584929632002217 20240808-12:04:43 test_accuracy 92 model 1 val 1508 / 1600 20240808-12:04:47 wrote gpt_001.pth 20240808-12:05:48 wrote non_validated_0092_01.png 20240808-12:05:48 wrote state.pth 20240808-12:05:48 --- epoch 93 ---------------------------------------- 20240808-12:05:48 current_test_accuracies 0.9465 0.9425 0.9562 0.9399 0.9491 20240808-12:05:48 training model 3 20240808-12:22:29 train_perplexity 93 model 3 1.1595270818338976 20240808-12:22:44 test_perplexity 93 model 3 1.158142571540779 20240808-12:28:53 test_accuracy 93 model 3 val 1510 / 1600 20240808-12:28:56 wrote gpt_003.pth 20240808-12:29:56 wrote non_validated_0093_03.png 20240808-12:29:56 wrote state.pth 20240808-12:29:56 --- epoch 94 ---------------------------------------- 20240808-12:29:56 current_test_accuracies 0.9465 0.9425 0.9562 0.9438 0.9491 20240808-12:29:56 training model 1 20240808-12:46:37 train_perplexity 94 model 1 1.1595845391511086 20240808-12:46:53 test_perplexity 94 model 1 1.1548403205314952 20240808-12:52:54 test_accuracy 94 model 1 val 1515 / 1597 20240808-12:52:57 wrote gpt_001.pth 20240808-12:53:58 wrote non_validated_0094_01.png 20240808-12:53:58 wrote state.pth 20240808-12:53:58 --- epoch 95 ---------------------------------------- 20240808-12:53:58 current_test_accuracies 0.9465 0.9487 0.9562 0.9438 0.9491 20240808-12:53:58 training model 3 20240808-13:10:39 train_perplexity 95 model 3 1.1590920748839606 20240808-13:10:55 test_perplexity 95 model 3 1.1578500361645407 20240808-13:17:04 test_accuracy 95 model 3 val 1537 / 1600 20240808-13:17:07 wrote gpt_003.pth 20240808-13:18:08 wrote non_validated_0095_03.png 20240808-13:18:08 wrote state.pth 20240808-13:18:08 --- epoch 96 ---------------------------------------- 20240808-13:18:08 current_test_accuracies 0.9465 0.9487 0.9562 0.9606 0.9491 20240808-13:18:08 training model 0 20240808-13:34:50 train_perplexity 96 model 0 1.1603575613318833 20240808-13:35:05 test_perplexity 96 model 0 1.1582032079917655 20240808-13:41:13 test_accuracy 96 model 0 val 1519 / 1612 20240808-13:41:17 wrote gpt_000.pth 20240808-13:42:18 wrote non_validated_0096_00.png 20240808-13:42:18 wrote state.pth 20240808-13:42:18 --- epoch 97 ---------------------------------------- 20240808-13:42:18 current_test_accuracies 0.9423 0.9487 0.9562 0.9606 0.9491 20240808-13:42:18 training model 0 20240808-13:58:59 train_perplexity 97 model 0 1.1596328069210051 20240808-13:59:15 test_perplexity 97 model 0 1.1597927991169037 20240808-14:05:22 test_accuracy 97 model 0 val 1524 / 1613 20240808-14:05:25 wrote gpt_000.pth 20240808-14:06:27 wrote non_validated_0097_00.png 20240808-14:06:27 wrote state.pth 20240808-14:06:27 --- epoch 98 ---------------------------------------- 20240808-14:06:27 current_test_accuracies 0.9448 0.9487 0.9562 0.9606 0.9491 20240808-14:06:27 training model 0 20240808-14:23:09 train_perplexity 98 model 0 1.1593916227435601 20240808-14:23:25 test_perplexity 98 model 0 1.1584818317967212 20240808-14:29:36 test_accuracy 98 model 0 val 1501 / 1593 20240808-14:29:39 wrote gpt_000.pth 20240808-14:30:41 wrote non_validated_0098_00.png 20240808-14:30:41 wrote state.pth 20240808-14:30:41 --- epoch 99 ---------------------------------------- 20240808-14:30:41 current_test_accuracies 0.9422 0.9487 0.9562 0.9606 0.9491 20240808-14:30:41 training model 0 20240808-14:47:23 train_perplexity 99 model 0 1.1596355437670574 20240808-14:47:39 test_perplexity 99 model 0 1.1583951181192114 20240808-14:53:54 test_accuracy 99 model 0 val 1527 / 1590 20240808-14:53:58 wrote gpt_000.pth 20240808-14:54:59 wrote non_validated_0099_00.png 20240808-14:54:59 wrote state.pth 20240808-14:54:59 --- epoch 100 ---------------------------------------- 20240808-14:54:59 current_test_accuracies 0.9604 0.9487 0.9562 0.9606 0.9491 20240808-14:54:59 training model 1 20240808-15:11:41 train_perplexity 100 model 1 1.1592132027952042 20240808-15:11:57 test_perplexity 100 model 1 1.1547439061043874 20240808-15:18:12 test_accuracy 100 model 1 val 1507 / 1571 20240808-15:18:15 wrote gpt_001.pth 20240808-15:19:15 wrote non_validated_0100_01.png 20240808-15:19:15 wrote state.pth 20240808-15:19:15 --- epoch 101 ---------------------------------------- 20240808-15:19:15 current_test_accuracies 0.9604 0.9593 0.9562 0.9606 0.9491 20240808-15:19:15 training model 4 20240808-15:35:57 train_perplexity 101 model 4 1.1589384148331874 20240808-15:36:12 test_perplexity 101 model 4 1.1583344093967776 20240808-15:42:22 test_accuracy 101 model 4 val 1525 / 1594 20240808-15:42:26 wrote gpt_004.pth 20240808-15:43:28 wrote non_validated_0101_04.png 20240808-15:43:28 wrote state.pth 20240808-15:43:28 --- epoch 102 ---------------------------------------- 20240808-15:43:28 current_test_accuracies 0.9604 0.9593 0.9562 0.9606 0.9567 20240808-15:50:28 keep c_quizzes model 2 validated 23 / 420 (5.48%) nb_accumulated 23 / 420 (finishes Thu 17:51 -- 197/h) 20240808-15:57:21 keep c_quizzes model 2 validated 39 / 420 (9.29%) nb_accumulated 62 / 420 (finishes Thu 17:17 -- 267/h) 20240808-16:04:14 keep c_quizzes model 0 validated 23 / 420 (5.48%) nb_accumulated 85 / 420 (finishes Thu 17:26 -- 245/h) 20240808-16:11:04 keep c_quizzes model 4 validated 31 / 420 (7.38%) nb_accumulated 116 / 420 (finishes Thu 17:23 -- 252/h) 20240808-16:17:56 keep c_quizzes model 4 validated 29 / 420 (6.90%) nb_accumulated 145 / 420 (finishes Thu 17:23 -- 252/h) 20240808-16:24:51 keep c_quizzes model 4 validated 17 / 420 (4.05%) nb_accumulated 162 / 420 (finishes Thu 17:30 -- 234/h) 20240808-16:31:45 keep c_quizzes model 0 validated 18 / 420 (4.29%) nb_accumulated 180 / 420 (finishes Thu 17:36 -- 223/h) 20240808-16:38:43 keep c_quizzes model 1 validated 20 / 420 (4.76%) nb_accumulated 200 / 420 (finishes Thu 17:39 -- 217/h) 20240808-16:45:38 keep c_quizzes model 2 validated 34 / 420 (8.10%) nb_accumulated 234 / 420 (finishes Thu 17:35 -- 225/h) 20240808-16:52:34 keep c_quizzes model 1 validated 25 / 420 (5.95%) nb_accumulated 259 / 420 (finishes Thu 17:35 -- 224/h) 20240808-16:59:29 keep c_quizzes model 0 validated 26 / 420 (6.19%) nb_accumulated 285 / 420 (finishes Thu 17:35 -- 224/h) 20240808-17:06:28 keep c_quizzes model 0 validated 18 / 420 (4.29%) nb_accumulated 303 / 420 (finishes Thu 17:38 -- 219/h) 20240808-17:13:24 keep c_quizzes model 3 validated 18 / 420 (4.29%) nb_accumulated 321 / 420 (finishes Thu 17:41 -- 214/h) 20240808-17:20:19 keep c_quizzes model 1 validated 17 / 420 (4.05%) nb_accumulated 338 / 420 (finishes Thu 17:43 -- 209/h) 20240808-17:27:19 keep c_quizzes model 1 validated 25 / 420 (5.95%) nb_accumulated 363 / 420 (finishes Thu 17:43 -- 209/h) 20240808-17:34:14 keep c_quizzes model 3 validated 23 / 420 (5.48%) nb_accumulated 386 / 420 (finishes Thu 17:43 -- 209/h) 20240808-17:41:10 keep c_quizzes model 2 validated 13 / 420 (3.10%) nb_accumulated 399 / 420 (finishes Thu 17:47 -- 203/h) 20240808-17:48:07 keep c_quizzes model 1 validated 21 / 420 (5.00%) nb_accumulated 420 / 420 (finishes now! -- 202/h) 20240808-17:48:38 wrote c_quizzes.pth 20240808-17:48:38 training model 0 20240808-18:05:19 train_perplexity 102 model 0 1.1594676538947049 20240808-18:05:35 test_perplexity 102 model 0 1.1583163181316516 20240808-18:11:33 test_accuracy 102 model 0 val 1515 / 1606 20240808-18:11:37 wrote gpt_000.pth 20240808-18:12:39 wrote non_validated_0102_00.png 20240808-18:12:39 wrote state.pth 20240808-18:12:39 --- epoch 103 ---------------------------------------- 20240808-18:12:39 current_test_accuracies 0.9433 0.0000 0.0000 0.0000 0.0000 20240808-18:12:39 training model 1 20240808-18:29:20 train_perplexity 103 model 1 1.1597105869072428 20240808-18:29:36 test_perplexity 103 model 1 1.1578670977434653 20240808-18:35:55 test_accuracy 103 model 1 val 1491 / 1572 20240808-18:35:59 wrote gpt_001.pth 20240808-18:37:00 wrote non_validated_0103_01.png 20240808-18:37:00 wrote state.pth 20240808-18:37:00 --- epoch 104 ---------------------------------------- 20240808-18:37:00 current_test_accuracies 0.9433 0.9485 0.0000 0.0000 0.0000 20240808-18:37:00 training model 2 20240808-18:53:42 train_perplexity 104 model 2 1.1603086626948589 20240808-18:53:57 test_perplexity 104 model 2 1.1598608815950417 20240808-19:00:09 test_accuracy 104 model 2 val 1540 / 1605 20240808-19:00:12 wrote gpt_002.pth 20240808-19:01:15 wrote non_validated_0104_02.png 20240808-19:01:15 wrote state.pth 20240808-19:01:15 --- epoch 105 ---------------------------------------- 20240808-19:01:15 current_test_accuracies 0.9433 0.9485 0.9595 0.0000 0.0000 20240808-19:01:15 training model 3 20240808-19:17:56 train_perplexity 105 model 3 1.1593274926649015 20240808-19:18:12 test_perplexity 105 model 3 1.1586473881903387 20240808-19:24:16 test_accuracy 105 model 3 val 1548 / 1624 20240808-19:24:20 wrote gpt_003.pth 20240808-19:25:21 wrote non_validated_0105_03.png 20240808-19:25:21 wrote state.pth 20240808-19:25:21 --- epoch 106 ---------------------------------------- 20240808-19:25:21 current_test_accuracies 0.9433 0.9485 0.9595 0.9532 0.0000 20240808-19:25:21 training model 4 20240808-19:42:03 train_perplexity 106 model 4 1.1592599627161568 20240808-19:42:19 test_perplexity 106 model 4 1.1578272911768048 20240808-19:48:36 test_accuracy 106 model 4 val 1504 / 1577 20240808-19:48:39 wrote gpt_004.pth 20240808-19:49:41 wrote non_validated_0106_04.png 20240808-19:49:41 wrote state.pth 20240808-19:49:41 --- epoch 107 ---------------------------------------- 20240808-19:49:41 current_test_accuracies 0.9433 0.9485 0.9595 0.9532 0.9537 20240808-19:49:41 training model 0 20240808-20:06:23 train_perplexity 107 model 0 1.159474730103816 20240808-20:06:39 test_perplexity 107 model 0 1.1568266364048343 20240808-20:12:51 test_accuracy 107 model 0 val 1544 / 1591 20240808-20:12:54 wrote gpt_000.pth 20240808-20:13:57 wrote non_validated_0107_00.png 20240808-20:13:57 wrote state.pth 20240808-20:13:57 --- epoch 108 ---------------------------------------- 20240808-20:13:57 current_test_accuracies 0.9705 0.9485 0.9595 0.9532 0.9537 20240808-20:13:57 training model 1 20240808-20:30:39 train_perplexity 108 model 1 1.1599859234975631 20240808-20:30:55 test_perplexity 108 model 1 1.1581418331284958 20240808-20:37:07 test_accuracy 108 model 1 val 1539 / 1609 20240808-20:37:10 wrote gpt_001.pth 20240808-20:38:12 wrote non_validated_0108_01.png 20240808-20:38:12 wrote state.pth 20240808-20:38:12 --- epoch 109 ---------------------------------------- 20240808-20:38:12 current_test_accuracies 0.9705 0.9565 0.9595 0.9532 0.9537 20240808-20:45:18 keep c_quizzes model 1 validated 41 / 420 (9.76%) nb_accumulated 41 / 420 (finishes Thu 21:50 -- 346/h) 20240808-20:52:16 keep c_quizzes model 3 validated 29 / 420 (6.90%) nb_accumulated 70 / 420 (finishes Thu 22:02 -- 298/h) 20240808-20:59:15 keep c_quizzes model 0 validated 26 / 420 (6.19%) nb_accumulated 96 / 420 (finishes Thu 22:10 -- 273/h) 20240808-21:06:13 keep c_quizzes model 1 validated 20 / 420 (4.76%) nb_accumulated 116 / 420 (finishes Thu 22:19 -- 248/h) 20240808-21:13:13 keep c_quizzes model 1 validated 22 / 420 (5.24%) nb_accumulated 138 / 420 (finishes Thu 22:24 -- 236/h) 20240808-21:20:14 keep c_quizzes model 3 validated 34 / 420 (8.10%) nb_accumulated 172 / 420 (finishes Thu 22:20 -- 245/h) 20240808-21:27:15 keep c_quizzes model 4 validated 24 / 420 (5.71%) nb_accumulated 196 / 420 (finishes Thu 22:23 -- 239/h) 20240808-21:34:16 keep c_quizzes model 4 validated 29 / 420 (6.90%) nb_accumulated 225 / 420 (finishes Thu 22:22 -- 240/h) 20240808-21:41:17 keep c_quizzes model 4 validated 22 / 420 (5.24%) nb_accumulated 247 / 420 (finishes Thu 22:25 -- 234/h) 20240808-21:48:18 keep c_quizzes model 4 validated 33 / 420 (7.86%) nb_accumulated 280 / 420 (finishes Thu 22:23 -- 239/h) 20240808-21:55:19 keep c_quizzes model 0 validated 24 / 420 (5.71%) nb_accumulated 304 / 420 (finishes Thu 22:24 -- 236/h) 20240808-22:02:21 keep c_quizzes model 1 validated 27 / 420 (6.43%) nb_accumulated 331 / 420 (finishes Thu 22:24 -- 236/h) 20240808-22:09:21 keep c_quizzes model 1 validated 17 / 420 (4.05%) nb_accumulated 348 / 420 (finishes Thu 22:28 -- 229/h) 20240808-22:16:20 keep c_quizzes model 3 validated 23 / 420 (5.48%) nb_accumulated 371 / 420 (finishes Thu 22:29 -- 226/h) 20240808-22:23:23 keep c_quizzes model 2 validated 10 / 420 (2.38%) nb_accumulated 381 / 420 (finishes Thu 22:34 -- 217/h) 20240808-22:30:30 keep c_quizzes model 0 validated 24 / 420 (5.71%) nb_accumulated 405 / 420 (finishes Thu 22:34 -- 216/h) 20240808-22:37:35 keep c_quizzes model 4 validated 19 / 420 (4.52%) nb_accumulated 424 / 420 (finishes now! -- 213/h) 20240808-22:38:06 wrote c_quizzes.pth 20240808-22:38:06 training model 0 20240808-22:54:48 train_perplexity 109 model 0 1.1596523542535835 20240808-22:55:04 test_perplexity 109 model 0 1.1566756856659979 20240808-23:01:15 test_accuracy 109 model 0 val 1538 / 1600 20240808-23:01:19 wrote gpt_000.pth 20240808-23:02:22 wrote non_validated_0109_00.png 20240808-23:02:22 wrote state.pth 20240808-23:02:22 --- epoch 110 ---------------------------------------- 20240808-23:02:22 current_test_accuracies 0.9613 0.0000 0.0000 0.0000 0.0000 20240808-23:02:22 training model 1 20240808-23:19:03 train_perplexity 110 model 1 1.1595138730051822 20240808-23:19:19 test_perplexity 110 model 1 1.1573734435679752 20240808-23:25:26 test_accuracy 110 model 1 val 1551 / 1618 20240808-23:25:30 wrote gpt_001.pth 20240808-23:26:32 wrote non_validated_0110_01.png 20240808-23:26:32 wrote state.pth 20240808-23:26:32 --- epoch 111 ---------------------------------------- 20240808-23:26:32 current_test_accuracies 0.9613 0.9586 0.0000 0.0000 0.0000 20240808-23:26:32 training model 2 20240808-23:43:14 train_perplexity 111 model 2 1.1602840014051254 20240808-23:43:30 test_perplexity 111 model 2 1.158575186071203 20240808-23:49:46 test_accuracy 111 model 2 val 1529 / 1598 20240808-23:49:49 wrote gpt_002.pth 20240808-23:50:51 wrote non_validated_0111_02.png 20240808-23:50:51 wrote state.pth 20240808-23:50:51 --- epoch 112 ---------------------------------------- 20240808-23:50:51 current_test_accuracies 0.9613 0.9586 0.9568 0.0000 0.0000 20240808-23:50:51 training model 3 20240809-00:07:32 train_perplexity 112 model 3 1.1595905256770778 20240809-00:07:48 test_perplexity 112 model 3 1.1575557700527748 20240809-00:14:08 test_accuracy 112 model 3 val 1494 / 1577 20240809-00:14:12 wrote gpt_003.pth 20240809-00:15:13 wrote non_validated_0112_03.png 20240809-00:15:13 wrote state.pth 20240809-00:15:13 --- epoch 113 ---------------------------------------- 20240809-00:15:13 current_test_accuracies 0.9613 0.9586 0.9568 0.9474 0.0000 20240809-00:15:13 training model 4 20240809-00:31:55 train_perplexity 113 model 4 1.1593194240253053 20240809-00:32:11 test_perplexity 113 model 4 1.157806436404483 20240809-00:38:22 test_accuracy 113 model 4 val 1531 / 1603 20240809-00:38:25 wrote gpt_004.pth 20240809-00:39:26 wrote non_validated_0113_04.png 20240809-00:39:26 wrote state.pth 20240809-00:39:26 --- epoch 114 ---------------------------------------- 20240809-00:39:26 current_test_accuracies 0.9613 0.9586 0.9568 0.9474 0.9551 20240809-00:39:26 training model 3 20240809-00:56:08 train_perplexity 114 model 3 1.1595124328286661 20240809-00:56:23 test_perplexity 114 model 3 1.1581992287326408 20240809-01:02:30 test_accuracy 114 model 3 val 1537 / 1605 20240809-01:02:34 wrote gpt_003.pth 20240809-01:03:35 wrote non_validated_0114_03.png 20240809-01:03:35 wrote state.pth 20240809-01:03:35 --- epoch 115 ---------------------------------------- 20240809-01:03:35 current_test_accuracies 0.9613 0.9586 0.9568 0.9576 0.9551 20240809-01:10:38 keep c_quizzes model 2 validated 37 / 420 (8.81%) nb_accumulated 37 / 420 (finishes Fri 02:23 -- 315/h) 20240809-01:17:32 keep c_quizzes model 1 validated 34 / 420 (8.10%) nb_accumulated 71 / 420 (finishes Fri 02:26 -- 305/h) 20240809-01:24:32 keep c_quizzes model 4 validated 39 / 420 (9.29%) nb_accumulated 110 / 420 (finishes Fri 02:23 -- 315/h) 20240809-01:31:31 keep c_quizzes model 2 validated 30 / 420 (7.14%) nb_accumulated 140 / 420 (finishes Fri 02:27 -- 300/h) 20240809-01:38:27 keep c_quizzes model 1 validated 35 / 420 (8.33%) nb_accumulated 175 / 420 (finishes Fri 02:27 -- 301/h) 20240809-01:45:26 keep c_quizzes model 0 validated 32 / 420 (7.62%) nb_accumulated 207 / 420 (finishes Fri 02:28 -- 296/h) 20240809-01:52:28 keep c_quizzes model 4 validated 24 / 420 (5.71%) nb_accumulated 231 / 420 (finishes Fri 02:32 -- 283/h) 20240809-01:59:25 keep c_quizzes model 1 validated 24 / 420 (5.71%) nb_accumulated 255 / 420 (finishes Fri 02:35 -- 273/h) 20240809-02:06:23 keep c_quizzes model 2 validated 28 / 420 (6.67%) nb_accumulated 283 / 420 (finishes Fri 02:36 -- 270/h) 20240809-02:13:21 keep c_quizzes model 2 validated 30 / 420 (7.14%) nb_accumulated 313 / 420 (finishes Fri 02:37 -- 269/h) 20240809-02:20:30 keep c_quizzes model 4 validated 24 / 420 (5.71%) nb_accumulated 337 / 420 (finishes Fri 02:39 -- 262/h) 20240809-02:27:26 keep c_quizzes model 0 validated 20 / 420 (4.76%) nb_accumulated 357 / 420 (finishes Fri 02:42 -- 255/h) 20240809-02:34:25 keep c_quizzes model 0 validated 17 / 420 (4.05%) nb_accumulated 374 / 420 (finishes Fri 02:45 -- 247/h) 20240809-02:41:21 keep c_quizzes model 4 validated 23 / 420 (5.48%) nb_accumulated 397 / 420 (finishes Fri 02:47 -- 243/h) 20240809-02:48:19 keep c_quizzes model 0 validated 19 / 420 (4.52%) nb_accumulated 416 / 420 (finishes Fri 02:49 -- 238/h) 20240809-02:55:21 keep c_quizzes model 4 validated 25 / 420 (5.95%) nb_accumulated 441 / 420 (finishes now! -- 236/h) 20240809-02:55:52 wrote c_quizzes.pth 20240809-02:55:52 training model 0 20240809-03:12:33 train_perplexity 115 model 0 1.159791259447912 20240809-03:12:48 test_perplexity 115 model 0 1.156010974264626 20240809-03:19:02 test_accuracy 115 model 0 val 1514 / 1597 20240809-03:19:05 wrote gpt_000.pth 20240809-03:20:07 wrote non_validated_0115_00.png 20240809-03:20:07 wrote state.pth 20240809-03:20:07 --- epoch 116 ---------------------------------------- 20240809-03:20:07 current_test_accuracies 0.9480 0.0000 0.0000 0.0000 0.0000 20240809-03:20:07 training model 1 20240809-03:36:47 train_perplexity 116 model 1 1.160057838537235 20240809-03:37:03 test_perplexity 116 model 1 1.1576474251548725 20240809-03:43:08 test_accuracy 116 model 1 val 1537 / 1610 20240809-03:43:12 wrote gpt_001.pth 20240809-03:44:14 wrote non_validated_0116_01.png 20240809-03:44:14 wrote state.pth 20240809-03:44:14 --- epoch 117 ---------------------------------------- 20240809-03:44:14 current_test_accuracies 0.9480 0.9547 0.0000 0.0000 0.0000 20240809-03:44:14 training model 2 20240809-04:00:54 train_perplexity 117 model 2 1.1608333406588593 20240809-04:01:10 test_perplexity 117 model 2 1.1607350977679012 20240809-04:07:11 test_accuracy 117 model 2 val 1533 / 1622 20240809-04:07:15 wrote gpt_002.pth 20240809-04:08:16 wrote non_validated_0117_02.png 20240809-04:08:16 wrote state.pth 20240809-04:08:16 --- epoch 118 ---------------------------------------- 20240809-04:08:16 current_test_accuracies 0.9480 0.9547 0.9451 0.0000 0.0000 20240809-04:08:16 training model 3 20240809-04:24:56 train_perplexity 118 model 3 1.160161388569419 20240809-04:25:12 test_perplexity 118 model 3 1.160315442716451 20240809-04:31:24 test_accuracy 118 model 3 val 1517 / 1591 20240809-04:31:27 wrote gpt_003.pth 20240809-04:32:29 wrote non_validated_0118_03.png 20240809-04:32:29 wrote state.pth 20240809-04:32:29 --- epoch 119 ---------------------------------------- 20240809-04:32:29 current_test_accuracies 0.9480 0.9547 0.9451 0.9535 0.0000 20240809-04:32:29 training model 4 20240809-04:49:09 train_perplexity 119 model 4 1.1594884310657956 20240809-04:49:25 test_perplexity 119 model 4 1.1588972835039584 20240809-04:55:30 test_accuracy 119 model 4 val 1535 / 1612 20240809-04:55:33 wrote gpt_004.pth 20240809-04:56:34 wrote non_validated_0119_04.png 20240809-04:56:34 wrote state.pth 20240809-04:56:34 --- epoch 120 ---------------------------------------- 20240809-04:56:34 current_test_accuracies 0.9480 0.9547 0.9451 0.9535 0.9522 20240809-04:56:34 training model 2 20240809-05:13:15 train_perplexity 120 model 2 1.1601981144555202 20240809-05:13:31 test_perplexity 120 model 2 1.159537019987457 20240809-05:19:35 test_accuracy 120 model 2 val 1560 / 1626 20240809-05:19:38 wrote gpt_002.pth 20240809-05:20:40 wrote non_validated_0120_02.png 20240809-05:20:40 wrote state.pth 20240809-05:20:40 --- epoch 121 ---------------------------------------- 20240809-05:20:40 current_test_accuracies 0.9480 0.9547 0.9594 0.9535 0.9522 20240809-05:20:40 training model 0 20240809-05:37:21 train_perplexity 121 model 0 1.1591624048394324 20240809-05:37:36 test_perplexity 121 model 0 1.1593215337055658 20240809-05:43:43 test_accuracy 121 model 0 val 1543 / 1614 20240809-05:43:46 wrote gpt_000.pth 20240809-05:44:47 wrote non_validated_0121_00.png 20240809-05:44:47 wrote state.pth 20240809-05:44:47 --- epoch 122 ---------------------------------------- 20240809-05:44:47 current_test_accuracies 0.9560 0.9547 0.9594 0.9535 0.9522 20240809-05:51:50 keep c_quizzes model 1 validated 34 / 420 (8.10%) nb_accumulated 34 / 420 (finishes Fri 07:11 -- 289/h) 20240809-05:58:47 keep c_quizzes model 1 validated 44 / 420 (10.48%) nb_accumulated 78 / 420 (finishes Fri 07:00 -- 334/h) 20240809-06:05:44 keep c_quizzes model 4 validated 35 / 420 (8.33%) nb_accumulated 113 / 420 (finishes Fri 07:02 -- 323/h) 20240809-06:12:40 keep c_quizzes model 0 validated 26 / 420 (6.19%) nb_accumulated 139 / 420 (finishes Fri 07:09 -- 299/h) 20240809-06:19:37 keep c_quizzes model 4 validated 26 / 420 (6.19%) nb_accumulated 165 / 420 (finishes Fri 07:13 -- 284/h) 20240809-06:26:37 keep c_quizzes model 0 validated 19 / 420 (4.52%) nb_accumulated 184 / 420 (finishes Fri 07:20 -- 263/h) 20240809-06:33:36 keep c_quizzes model 4 validated 29 / 420 (6.90%) nb_accumulated 213 / 420 (finishes Fri 07:21 -- 261/h) 20240809-06:40:34 keep c_quizzes model 3 validated 19 / 420 (4.52%) nb_accumulated 232 / 420 (finishes Fri 07:25 -- 249/h) 20240809-06:47:30 keep c_quizzes model 2 validated 22 / 420 (5.24%) nb_accumulated 254 / 420 (finishes Fri 07:28 -- 243/h) 20240809-06:54:26 keep c_quizzes model 0 validated 29 / 420 (6.90%) nb_accumulated 283 / 420 (finishes Fri 07:28 -- 243/h) 20240809-07:01:21 keep c_quizzes model 2 validated 28 / 420 (6.67%) nb_accumulated 311 / 420 (finishes Fri 07:28 -- 243/h) 20240809-07:08:17 keep c_quizzes model 0 validated 26 / 420 (6.19%) nb_accumulated 337 / 420 (finishes Fri 07:28 -- 242/h) 20240809-07:15:14 keep c_quizzes model 0 validated 17 / 420 (4.05%) nb_accumulated 354 / 420 (finishes Fri 07:32 -- 234/h) 20240809-07:22:15 keep c_quizzes model 3 validated 26 / 420 (6.19%) nb_accumulated 380 / 420 (finishes Fri 07:32 -- 233/h) 20240809-07:29:14 keep c_quizzes model 1 validated 22 / 420 (5.24%) nb_accumulated 402 / 420 (finishes Fri 07:33 -- 230/h) 20240809-07:36:19 keep c_quizzes model 1 validated 26 / 420 (6.19%) nb_accumulated 428 / 420 (finishes now! -- 230/h) 20240809-07:36:50 wrote c_quizzes.pth 20240809-07:36:50 training model 0 20240809-07:53:31 train_perplexity 122 model 0 1.1600257123836022 20240809-07:53:46 test_perplexity 122 model 0 1.1599618709512836 20240809-07:59:49 test_accuracy 122 model 0 val 1572 / 1636 20240809-07:59:52 wrote gpt_000.pth 20240809-08:00:54 wrote non_validated_0122_00.png 20240809-08:00:54 wrote state.pth 20240809-08:00:54 --- epoch 123 ---------------------------------------- 20240809-08:00:54 current_test_accuracies 0.9609 0.0000 0.0000 0.0000 0.0000 20240809-08:00:54 training model 1 20240809-08:17:33 train_perplexity 123 model 1 1.1602265839829131 20240809-08:17:49 test_perplexity 123 model 1 1.1595837744666257 20240809-08:24:09 test_accuracy 123 model 1 val 1478 / 1554 20240809-08:24:12 wrote gpt_001.pth 20240809-08:25:14 wrote non_validated_0123_01.png 20240809-08:25:14 wrote state.pth 20240809-08:25:14 --- epoch 124 ---------------------------------------- 20240809-08:25:14 current_test_accuracies 0.9609 0.9511 0.0000 0.0000 0.0000 20240809-08:25:14 training model 2 20240809-08:41:55 train_perplexity 124 model 2 1.1611744017033492 20240809-08:42:11 test_perplexity 124 model 2 1.1570542659206233 20240809-08:48:25 test_accuracy 124 model 2 val 1510 / 1586 20240809-08:48:28 wrote gpt_002.pth 20240809-08:49:28 wrote non_validated_0124_02.png 20240809-08:49:28 wrote state.pth 20240809-08:49:28 --- epoch 125 ---------------------------------------- 20240809-08:49:28 current_test_accuracies 0.9609 0.9511 0.9521 0.0000 0.0000 20240809-08:49:28 training model 3 20240809-09:06:09 train_perplexity 125 model 3 1.1599866325016994 20240809-09:06:24 test_perplexity 125 model 3 1.1584417050296023 20240809-09:12:33 test_accuracy 125 model 3 val 1543 / 1618 20240809-09:12:36 wrote gpt_003.pth 20240809-09:13:38 wrote non_validated_0125_03.png 20240809-09:13:38 wrote state.pth 20240809-09:13:38 --- epoch 126 ---------------------------------------- 20240809-09:13:38 current_test_accuracies 0.9609 0.9511 0.9521 0.9536 0.0000 20240809-09:13:38 training model 4 20240809-09:30:19 train_perplexity 126 model 4 1.1600811552225034 20240809-09:30:35 test_perplexity 126 model 4 1.1602059129287565 20240809-09:36:46 test_accuracy 126 model 4 val 1527 / 1596 20240809-09:36:50 wrote gpt_004.pth 20240809-09:37:52 wrote non_validated_0126_04.png 20240809-09:37:52 wrote state.pth 20240809-09:37:52 --- epoch 127 ---------------------------------------- 20240809-09:37:52 current_test_accuracies 0.9609 0.9511 0.9521 0.9536 0.9568 20240809-09:45:02 keep c_quizzes model 3 validated 28 / 420 (6.67%) nb_accumulated 28 / 420 (finishes Fri 11:25 -- 234/h) 20240809-09:51:56 keep c_quizzes model 1 validated 27 / 420 (6.43%) nb_accumulated 55 / 420 (finishes Fri 11:25 -- 234/h) 20240809-09:59:03 keep c_quizzes model 1 validated 27 / 420 (6.43%) nb_accumulated 82 / 420 (finishes Fri 11:26 -- 232/h) 20240809-10:05:59 keep c_quizzes model 0 validated 23 / 420 (5.48%) nb_accumulated 105 / 420 (finishes Fri 11:30 -- 224/h) 20240809-10:12:55 keep c_quizzes model 0 validated 29 / 420 (6.90%) nb_accumulated 134 / 420 (finishes Fri 11:27 -- 229/h) 20240809-10:19:56 keep c_quizzes model 3 validated 22 / 420 (5.24%) nb_accumulated 156 / 420 (finishes Fri 11:31 -- 222/h) 20240809-10:26:53 keep c_quizzes model 3 validated 28 / 420 (6.67%) nb_accumulated 184 / 420 (finishes Fri 11:29 -- 225/h) 20240809-10:33:51 keep c_quizzes model 1 validated 26 / 420 (6.19%) nb_accumulated 210 / 420 (finishes Fri 11:29 -- 225/h) 20240809-10:40:47 keep c_quizzes model 2 validated 19 / 420 (4.52%) nb_accumulated 229 / 420 (finishes Fri 11:33 -- 218/h) 20240809-10:47:48 keep c_quizzes model 0 validated 24 / 420 (5.71%) nb_accumulated 253 / 420 (finishes Fri 11:33 -- 217/h) 20240809-10:54:45 keep c_quizzes model 1 validated 33 / 420 (7.86%) nb_accumulated 286 / 420 (finishes Fri 11:30 -- 223/h) 20240809-11:01:46 keep c_quizzes model 0 validated 22 / 420 (5.24%) nb_accumulated 308 / 420 (finishes Fri 11:32 -- 220/h) 20240809-11:08:42 keep c_quizzes model 0 validated 29 / 420 (6.90%) nb_accumulated 337 / 420 (finishes Fri 11:31 -- 222/h) 20240809-11:15:35 keep c_quizzes model 2 validated 25 / 420 (5.95%) nb_accumulated 362 / 420 (finishes Fri 11:31 -- 222/h) 20240809-11:22:31 keep c_quizzes model 2 validated 23 / 420 (5.48%) nb_accumulated 385 / 420 (finishes Fri 11:32 -- 220/h) 20240809-11:29:26 keep c_quizzes model 3 validated 20 / 420 (4.76%) nb_accumulated 405 / 420 (finishes Fri 11:33 -- 217/h) 20240809-11:36:22 keep c_quizzes model 2 validated 20 / 420 (4.76%) nb_accumulated 425 / 420 (finishes now! -- 215/h) 20240809-11:36:54 wrote c_quizzes.pth 20240809-11:36:54 training model 0 20240809-11:53:35 train_perplexity 127 model 0 1.160185053443801 20240809-11:53:50 test_perplexity 127 model 0 1.1600390539887304 20240809-11:59:54 test_accuracy 127 model 0 val 1550 / 1626 20240809-11:59:58 wrote gpt_000.pth 20240809-12:01:00 wrote non_validated_0127_00.png 20240809-12:01:00 wrote state.pth 20240809-12:01:00 --- epoch 128 ---------------------------------------- 20240809-12:01:00 current_test_accuracies 0.9533 0.0000 0.0000 0.0000 0.0000 20240809-12:01:00 training model 1 20240809-12:17:41 train_perplexity 128 model 1 1.1605717357886558 20240809-12:17:57 test_perplexity 128 model 1 1.1566106840538788 20240809-12:24:12 test_accuracy 128 model 1 val 1515 / 1597 20240809-12:24:15 wrote gpt_001.pth 20240809-12:25:17 wrote non_validated_0128_01.png 20240809-12:25:17 wrote state.pth 20240809-12:25:17 --- epoch 129 ---------------------------------------- 20240809-12:25:17 current_test_accuracies 0.9533 0.9487 0.0000 0.0000 0.0000 20240809-12:25:17 training model 2 20240809-12:41:59 train_perplexity 129 model 2 1.1605836757862757 20240809-12:42:14 test_perplexity 129 model 2 1.158809421126251 20240809-12:48:33 test_accuracy 129 model 2 val 1510 / 1583 20240809-12:48:36 wrote gpt_002.pth 20240809-12:49:36 wrote non_validated_0129_02.png 20240809-12:49:36 wrote state.pth 20240809-12:49:36 --- epoch 130 ---------------------------------------- 20240809-12:49:36 current_test_accuracies 0.9533 0.9487 0.9539 0.0000 0.0000 20240809-12:49:36 training model 3 20240809-13:06:18 train_perplexity 130 model 3 1.1603709819019508 20240809-13:06:34 test_perplexity 130 model 3 1.159139076516457 20240809-13:12:35 test_accuracy 130 model 3 val 1527 / 1628 20240809-13:12:39 wrote gpt_003.pth 20240809-13:13:41 wrote non_validated_0130_03.png 20240809-13:13:41 wrote state.pth 20240809-13:13:41 --- epoch 131 ---------------------------------------- 20240809-13:13:41 current_test_accuracies 0.9533 0.9487 0.9539 0.9380 0.0000 20240809-13:13:41 training model 4 20240809-13:30:22 train_perplexity 131 model 4 1.1604243519662396 20240809-13:30:37 test_perplexity 131 model 4 1.1581261901162083 20240809-13:36:52 test_accuracy 131 model 4 val 1517 / 1585 20240809-13:36:55 wrote gpt_004.pth 20240809-13:37:57 wrote non_validated_0131_04.png 20240809-13:37:57 wrote state.pth 20240809-13:37:57 --- epoch 132 ---------------------------------------- 20240809-13:37:57 current_test_accuracies 0.9533 0.9487 0.9539 0.9380 0.9571 20240809-13:37:57 training model 3 20240809-13:54:38 train_perplexity 132 model 3 1.1601406835227932 20240809-13:54:54 test_perplexity 132 model 3 1.1611006011018177 20240809-14:01:04 test_accuracy 132 model 3 val 1517 / 1614 20240809-14:01:07 wrote gpt_003.pth 20240809-14:02:09 wrote non_validated_0132_03.png 20240809-14:02:09 wrote state.pth 20240809-14:02:09 --- epoch 133 ---------------------------------------- 20240809-14:02:09 current_test_accuracies 0.9533 0.9487 0.9539 0.9399 0.9571 20240809-14:02:09 training model 3 20240809-14:18:50 train_perplexity 133 model 3 1.159928706889082 20240809-14:19:06 test_perplexity 133 model 3 1.1573182521024197 20240809-14:25:23 test_accuracy 133 model 3 val 1488 / 1574 20240809-14:25:26 wrote gpt_003.pth 20240809-14:26:28 wrote non_validated_0133_03.png 20240809-14:26:28 wrote state.pth 20240809-14:26:28 --- epoch 134 ---------------------------------------- 20240809-14:26:28 current_test_accuracies 0.9533 0.9487 0.9539 0.9454 0.9571 20240809-14:26:28 training model 3 20240809-14:43:09 train_perplexity 134 model 3 1.1596387692018144 20240809-14:43:25 test_perplexity 134 model 3 1.1597536487631726 20240809-14:49:36 test_accuracy 134 model 3 val 1541 / 1601 20240809-14:49:40 wrote gpt_003.pth 20240809-14:50:41 wrote non_validated_0134_03.png 20240809-14:50:41 wrote state.pth 20240809-14:50:41 --- epoch 135 ---------------------------------------- 20240809-14:50:41 current_test_accuracies 0.9533 0.9487 0.9539 0.9625 0.9571 20240809-14:50:41 training model 1 20240809-15:07:23 train_perplexity 135 model 1 1.16014647863186 20240809-15:07:39 test_perplexity 135 model 1 1.1586085877824184 20240809-15:13:58 test_accuracy 135 model 1 val 1487 / 1574 20240809-15:14:02 wrote gpt_001.pth 20240809-15:15:04 wrote non_validated_0135_01.png 20240809-15:15:04 wrote state.pth 20240809-15:15:04 --- epoch 136 ---------------------------------------- 20240809-15:15:04 current_test_accuracies 0.9533 0.9447 0.9539 0.9625 0.9571 20240809-15:15:04 training model 1 20240809-15:31:46 train_perplexity 136 model 1 1.1600658516583267 20240809-15:32:01 test_perplexity 136 model 1 1.1585514268346884 20240809-15:38:14 test_accuracy 136 model 1 val 1533 / 1608 20240809-15:38:17 wrote gpt_001.pth 20240809-15:39:19 wrote non_validated_0136_01.png 20240809-15:39:19 wrote state.pth 20240809-15:39:19 --- epoch 137 ---------------------------------------- 20240809-15:39:19 current_test_accuracies 0.9533 0.9534 0.9539 0.9625 0.9571 20240809-15:46:23 keep c_quizzes model 2 validated 31 / 420 (7.38%) nb_accumulated 31 / 420 (finishes Fri 17:15 -- 262/h) 20240809-15:53:15 keep c_quizzes model 4 validated 46 / 420 (10.95%) nb_accumulated 77 / 420 (finishes Fri 16:55 -- 331/h) 20240809-16:00:09 keep c_quizzes model 2 validated 39 / 420 (9.29%) nb_accumulated 116 / 420 (finishes Fri 16:54 -- 334/h) 20240809-16:06:59 keep c_quizzes model 3 validated 29 / 420 (6.90%) nb_accumulated 145 / 420 (finishes Fri 16:59 -- 314/h) 20240809-16:13:53 keep c_quizzes model 2 validated 29 / 420 (6.90%) nb_accumulated 174 / 420 (finishes Fri 17:02 -- 302/h) 20240809-16:20:46 keep c_quizzes model 1 validated 36 / 420 (8.57%) nb_accumulated 210 / 420 (finishes Fri 17:02 -- 303/h) 20240809-16:27:46 keep c_quizzes model 3 validated 31 / 420 (7.38%) nb_accumulated 241 / 420 (finishes Fri 17:03 -- 298/h) 20240809-16:34:41 keep c_quizzes model 0 validated 28 / 420 (6.67%) nb_accumulated 269 / 420 (finishes Fri 17:05 -- 291/h) 20240809-16:41:35 keep c_quizzes model 4 validated 16 / 420 (3.81%) nb_accumulated 285 / 420 (finishes Fri 17:11 -- 274/h) 20240809-16:48:32 keep c_quizzes model 4 validated 35 / 420 (8.33%) nb_accumulated 320 / 420 (finishes Fri 17:10 -- 277/h) 20240809-16:55:31 keep c_quizzes model 3 validated 30 / 420 (7.14%) nb_accumulated 350 / 420 (finishes Fri 17:10 -- 275/h) 20240809-17:02:27 keep c_quizzes model 1 validated 41 / 420 (9.76%) nb_accumulated 391 / 420 (finishes Fri 17:08 -- 282/h) 20240809-17:09:25 keep c_quizzes model 0 validated 27 / 420 (6.43%) nb_accumulated 418 / 420 (finishes Fri 17:09 -- 278/h) 20240809-17:16:24 keep c_quizzes model 2 validated 31 / 420 (7.38%) nb_accumulated 449 / 420 (finishes now! -- 277/h) 20240809-17:16:56 wrote c_quizzes.pth 20240809-17:16:56 training model 0 20240809-17:33:36 train_perplexity 137 model 0 1.1605115435028712 20240809-17:33:52 test_perplexity 137 model 0 1.1594175003234872 20240809-17:40:06 test_accuracy 137 model 0 val 1515 / 1596 20240809-17:40:09 wrote gpt_000.pth 20240809-17:41:09 wrote non_validated_0137_00.png 20240809-17:41:09 wrote state.pth 20240809-17:41:09 --- epoch 138 ---------------------------------------- 20240809-17:41:09 current_test_accuracies 0.9492 0.0000 0.0000 0.0000 0.0000 20240809-17:41:09 training model 1 20240809-17:57:50 train_perplexity 138 model 1 1.1606569938622437 20240809-17:58:06 test_perplexity 138 model 1 1.1602043961306292 20240809-18:04:12 test_accuracy 138 model 1 val 1535 / 1612 20240809-18:04:15 wrote gpt_001.pth 20240809-18:05:17 wrote non_validated_0138_01.png 20240809-18:05:17 wrote state.pth 20240809-18:05:17 --- epoch 139 ---------------------------------------- 20240809-18:05:17 current_test_accuracies 0.9492 0.9522 0.0000 0.0000 0.0000 20240809-18:05:17 training model 2 20240809-18:22:00 train_perplexity 139 model 2 1.161446829619873 20240809-18:22:15 test_perplexity 139 model 2 1.1620252777496216 20240809-18:28:23 test_accuracy 139 model 2 val 1563 / 1623 20240809-18:28:27 wrote gpt_002.pth 20240809-18:29:28 wrote non_validated_0139_02.png 20240809-18:29:28 wrote state.pth 20240809-18:29:28 --- epoch 140 ---------------------------------------- 20240809-18:29:28 current_test_accuracies 0.9492 0.9522 0.9630 0.0000 0.0000 20240809-18:29:28 training model 3 20240809-18:46:11 train_perplexity 140 model 3 1.1597469572699797 20240809-18:46:27 test_perplexity 140 model 3 1.1597228978108374 20240809-18:52:33 test_accuracy 140 model 3 val 1529 / 1613 20240809-18:52:36 wrote gpt_003.pth 20240809-18:53:38 wrote non_validated_0140_03.png 20240809-18:53:38 wrote state.pth 20240809-18:53:38 --- epoch 141 ---------------------------------------- 20240809-18:53:38 current_test_accuracies 0.9492 0.9522 0.9630 0.9479 0.0000 20240809-18:53:38 training model 4 20240809-19:10:19 train_perplexity 141 model 4 1.1611579587145335 20240809-19:10:35 test_perplexity 141 model 4 1.1600909633254 20240809-19:16:47 test_accuracy 141 model 4 val 1534 / 1600 20240809-19:16:50 wrote gpt_004.pth 20240809-19:17:53 wrote non_validated_0141_04.png 20240809-19:17:53 wrote state.pth 20240809-19:17:53 --- epoch 142 ---------------------------------------- 20240809-19:17:53 current_test_accuracies 0.9492 0.9522 0.9630 0.9479 0.9588 20240809-19:17:53 training model 3 20240809-19:34:35 train_perplexity 142 model 3 1.1600612288906518 20240809-19:34:51 test_perplexity 142 model 3 1.159485506821173 20240809-19:41:13 test_accuracy 142 model 3 val 1510 / 1563 20240809-19:41:17 wrote gpt_003.pth 20240809-19:42:19 wrote non_validated_0142_03.png 20240809-19:42:19 wrote state.pth 20240809-19:42:19 --- epoch 143 ---------------------------------------- 20240809-19:42:19 current_test_accuracies 0.9492 0.9522 0.9630 0.9661 0.9588 20240809-19:42:19 training model 0 20240809-19:59:01 train_perplexity 143 model 0 1.1602665968340664 20240809-19:59:16 test_perplexity 143 model 0 1.157665227333193 20240809-20:05:29 test_accuracy 143 model 0 val 1544 / 1620 20240809-20:05:32 wrote gpt_000.pth 20240809-20:06:33 wrote non_validated_0143_00.png 20240809-20:06:33 wrote state.pth 20240809-20:06:33 --- epoch 144 ---------------------------------------- 20240809-20:06:33 current_test_accuracies 0.9531 0.9522 0.9630 0.9661 0.9588 20240809-20:13:25 keep c_quizzes model 1 validated 23 / 420 (5.48%) nb_accumulated 23 / 420 (finishes Fri 22:12 -- 200/h) 20240809-20:20:10 keep c_quizzes model 3 validated 26 / 420 (6.19%) nb_accumulated 49 / 420 (finishes Fri 22:03 -- 215/h) 20240809-20:27:11 keep c_quizzes model 0 validated 32 / 420 (7.62%) nb_accumulated 81 / 420 (finishes Fri 21:53 -- 235/h) 20240809-20:34:07 keep c_quizzes model 3 validated 30 / 420 (7.14%) nb_accumulated 111 / 420 (finishes Fri 21:50 -- 241/h) 20240809-20:41:01 keep c_quizzes model 0 validated 31 / 420 (7.38%) nb_accumulated 142 / 420 (finishes Fri 21:48 -- 247/h) 20240809-20:48:07 keep c_quizzes model 2 validated 32 / 420 (7.62%) nb_accumulated 174 / 420 (finishes Fri 21:46 -- 251/h) 20240809-20:55:09 keep c_quizzes model 1 validated 23 / 420 (5.48%) nb_accumulated 197 / 420 (finishes Fri 21:50 -- 243/h) 20240809-21:02:06 keep c_quizzes model 3 validated 36 / 420 (8.57%) nb_accumulated 233 / 420 (finishes Fri 21:46 -- 251/h) 20240809-21:09:02 keep c_quizzes model 0 validated 35 / 420 (8.33%) nb_accumulated 268 / 420 (finishes Fri 21:44 -- 257/h) 20240809-21:16:05 keep c_quizzes model 0 validated 29 / 420 (6.90%) nb_accumulated 297 / 420 (finishes Fri 21:44 -- 256/h) 20240809-21:23:03 keep c_quizzes model 3 validated 27 / 420 (6.43%) nb_accumulated 324 / 420 (finishes Fri 21:45 -- 254/h) 20240809-21:30:04 keep c_quizzes model 0 validated 26 / 420 (6.19%) nb_accumulated 350 / 420 (finishes Fri 21:46 -- 251/h) 20240809-21:37:04 keep c_quizzes model 1 validated 24 / 420 (5.71%) nb_accumulated 374 / 420 (finishes Fri 21:48 -- 247/h) 20240809-21:43:59 keep c_quizzes model 2 validated 31 / 420 (7.38%) nb_accumulated 405 / 420 (finishes Fri 21:47 -- 249/h) 20240809-21:50:54 keep c_quizzes model 4 validated 32 / 420 (7.62%) nb_accumulated 437 / 420 (finishes now! -- 251/h) 20240809-21:51:25 wrote c_quizzes.pth 20240809-21:51:25 training model 0 20240809-22:08:05 train_perplexity 144 model 0 1.1607981953719761 20240809-22:08:21 test_perplexity 144 model 0 1.157587671516922 20240809-22:14:28 test_accuracy 144 model 0 val 1558 / 1618 20240809-22:14:32 wrote gpt_000.pth 20240809-22:15:33 wrote non_validated_0144_00.png 20240809-22:15:33 wrote state.pth 20240809-22:15:33 --- epoch 145 ---------------------------------------- 20240809-22:15:33 current_test_accuracies 0.9629 0.0000 0.0000 0.0000 0.0000 20240809-22:15:33 training model 1 20240809-22:32:13 train_perplexity 145 model 1 1.1607806961381089 20240809-22:32:29 test_perplexity 145 model 1 1.159937972441255 20240809-22:38:34 test_accuracy 145 model 1 val 1543 / 1622 20240809-22:38:37 wrote gpt_001.pth 20240809-22:39:38 wrote non_validated_0145_01.png 20240809-22:39:38 wrote state.pth 20240809-22:39:38 --- epoch 146 ---------------------------------------- 20240809-22:39:38 current_test_accuracies 0.9629 0.9513 0.0000 0.0000 0.0000 20240809-22:39:38 training model 2 20240809-22:56:20 train_perplexity 146 model 2 1.1618442942731695 20240809-22:56:36 test_perplexity 146 model 2 1.1616951512073577 20240809-23:02:43 test_accuracy 146 model 2 val 1505 / 1605 20240809-23:02:47 wrote gpt_002.pth 20240809-23:03:48 wrote non_validated_0146_02.png 20240809-23:03:48 wrote state.pth 20240809-23:03:48 --- epoch 147 ---------------------------------------- 20240809-23:03:48 current_test_accuracies 0.9629 0.9513 0.9377 0.0000 0.0000 20240809-23:03:48 training model 3 20240809-23:20:29 train_perplexity 147 model 3 1.1603258213948116 20240809-23:20:45 test_perplexity 147 model 3 1.1599965664288652 20240809-23:26:59 test_accuracy 147 model 3 val 1530 / 1586 20240809-23:27:02 wrote gpt_003.pth 20240809-23:28:02 wrote non_validated_0147_03.png 20240809-23:28:02 wrote state.pth 20240809-23:28:02 --- epoch 148 ---------------------------------------- 20240809-23:28:02 current_test_accuracies 0.9629 0.9513 0.9377 0.9647 0.0000 20240809-23:28:02 training model 4 20240809-23:44:43 train_perplexity 148 model 4 1.1613287909599417 20240809-23:44:58 test_perplexity 148 model 4 1.1590186404676852 20240809-23:51:14 test_accuracy 148 model 4 val 1528 / 1573 20240809-23:51:18 wrote gpt_004.pth 20240809-23:52:20 wrote non_validated_0148_04.png 20240809-23:52:20 wrote state.pth 20240809-23:52:20 --- epoch 149 ---------------------------------------- 20240809-23:52:20 current_test_accuracies 0.9629 0.9513 0.9377 0.9647 0.9714 20240809-23:52:20 training model 2 20240810-00:09:01 train_perplexity 149 model 2 1.1613318549420186 20240810-00:09:16 test_perplexity 149 model 2 1.1592781192996777 20240810-00:15:26 test_accuracy 149 model 2 val 1504 / 1601 20240810-00:15:29 wrote gpt_002.pth 20240810-00:16:31 wrote non_validated_0149_02.png 20240810-00:16:31 wrote state.pth 20240810-00:16:31 --- epoch 150 ---------------------------------------- 20240810-00:16:31 current_test_accuracies 0.9629 0.9513 0.9394 0.9647 0.9714 20240810-00:16:31 training model 2 20240810-00:33:12 train_perplexity 150 model 2 1.1613506474803954 20240810-00:33:28 test_perplexity 150 model 2 1.1615939619191171 20240810-00:39:43 test_accuracy 150 model 2 val 1501 / 1588 20240810-00:39:47 wrote gpt_002.pth 20240810-00:40:48 wrote non_validated_0150_02.png 20240810-00:40:48 wrote state.pth 20240810-00:40:48 --- epoch 151 ---------------------------------------- 20240810-00:40:48 current_test_accuracies 0.9629 0.9513 0.9452 0.9647 0.9714 20240810-00:40:48 training model 2 20240810-00:57:29 train_perplexity 151 model 2 1.160746728129077 20240810-00:57:45 test_perplexity 151 model 2 1.1610809039650076 20240810-01:03:53 test_accuracy 151 model 2 val 1539 / 1612 20240810-01:03:57 wrote gpt_002.pth 20240810-01:04:59 wrote non_validated_0151_02.png 20240810-01:04:59 wrote state.pth 20240810-01:04:59 --- epoch 152 ---------------------------------------- 20240810-01:04:59 current_test_accuracies 0.9629 0.9513 0.9547 0.9647 0.9714 20240810-01:12:03 keep c_quizzes model 2 validated 31 / 420 (7.38%) nb_accumulated 31 / 420 (finishes Sat 02:40 -- 262/h) 20240810-01:18:57 keep c_quizzes model 1 validated 36 / 420 (8.57%) nb_accumulated 67 / 420 (finishes Sat 02:32 -- 287/h) 20240810-01:25:52 keep c_quizzes model 2 validated 35 / 420 (8.33%) nb_accumulated 102 / 420 (finishes Sat 02:30 -- 293/h) 20240810-01:32:50 keep c_quizzes model 1 validated 24 / 420 (5.71%) nb_accumulated 126 / 420 (finishes Sat 02:37 -- 271/h) 20240810-01:39:49 keep c_quizzes model 2 validated 28 / 420 (6.67%) nb_accumulated 154 / 420 (finishes Sat 02:40 -- 265/h) 20240810-01:46:48 keep c_quizzes model 0 validated 19 / 420 (4.52%) nb_accumulated 173 / 420 (finishes Sat 02:46 -- 248/h) 20240810-01:53:44 keep c_quizzes model 0 validated 25 / 420 (5.95%) nb_accumulated 198 / 420 (finishes Sat 02:48 -- 243/h) 20240810-02:00:41 keep c_quizzes model 2 validated 17 / 420 (4.05%) nb_accumulated 215 / 420 (finishes Sat 02:53 -- 231/h) 20240810-02:07:39 keep c_quizzes model 2 validated 26 / 420 (6.19%) nb_accumulated 241 / 420 (finishes Sat 02:54 -- 230/h) 20240810-02:14:34 keep c_quizzes model 4 validated 30 / 420 (7.14%) nb_accumulated 271 / 420 (finishes Sat 02:52 -- 233/h) 20240810-02:21:29 keep c_quizzes model 0 validated 35 / 420 (8.33%) nb_accumulated 306 / 420 (finishes Sat 02:49 -- 239/h) 20240810-02:28:24 keep c_quizzes model 2 validated 27 / 420 (6.43%) nb_accumulated 333 / 420 (finishes Sat 02:50 -- 239/h) 20240810-02:35:23 keep c_quizzes model 2 validated 21 / 420 (5.00%) nb_accumulated 354 / 420 (finishes Sat 02:52 -- 234/h) 20240810-02:42:20 keep c_quizzes model 3 validated 31 / 420 (7.38%) nb_accumulated 385 / 420 (finishes Sat 02:51 -- 237/h) 20240810-02:49:16 keep c_quizzes model 2 validated 22 / 420 (5.24%) nb_accumulated 407 / 420 (finishes Sat 02:52 -- 234/h) 20240810-02:56:15 keep c_quizzes model 4 validated 21 / 420 (5.00%) nb_accumulated 428 / 420 (finishes now! -- 230/h) 20240810-02:56:46 wrote c_quizzes.pth 20240810-02:56:46 training model 0 20240810-03:13:27 train_perplexity 152 model 0 1.1613598832642962 20240810-03:13:43 test_perplexity 152 model 0 1.1596225385862808 20240810-03:20:03 test_accuracy 152 model 0 val 1509 / 1579 20240810-03:20:07 wrote gpt_000.pth 20240810-03:21:08 wrote non_validated_0152_00.png 20240810-03:21:08 wrote state.pth 20240810-03:21:08 --- epoch 153 ---------------------------------------- 20240810-03:21:08 current_test_accuracies 0.9557 0.0000 0.0000 0.0000 0.0000 20240810-03:21:08 training model 1 20240810-03:37:49 train_perplexity 153 model 1 1.1610307626060814 20240810-03:38:05 test_perplexity 153 model 1 1.1609874366236201 20240810-03:44:10 test_accuracy 153 model 1 val 1535 / 1627 20240810-03:44:14 wrote gpt_001.pth 20240810-03:45:15 wrote non_validated_0153_01.png 20240810-03:45:15 wrote state.pth 20240810-03:45:15 --- epoch 154 ---------------------------------------- 20240810-03:45:15 current_test_accuracies 0.9557 0.9435 0.0000 0.0000 0.0000 20240810-03:45:15 training model 2 20240810-04:01:58 train_perplexity 154 model 2 1.1613630755611635 20240810-04:02:14 test_perplexity 154 model 2 1.1599333861151804 20240810-04:08:32 test_accuracy 154 model 2 val 1515 / 1593 20240810-04:08:35 wrote gpt_002.pth 20240810-04:09:38 wrote non_validated_0154_02.png 20240810-04:09:38 wrote state.pth 20240810-04:09:38 --- epoch 155 ---------------------------------------- 20240810-04:09:38 current_test_accuracies 0.9557 0.9435 0.9510 0.0000 0.0000 20240810-04:09:38 training model 3 20240810-04:26:18 train_perplexity 155 model 3 1.1612222008306679 20240810-04:26:34 test_perplexity 155 model 3 1.1583069985358954 20240810-04:32:48 test_accuracy 155 model 3 val 1498 / 1584 20240810-04:32:51 wrote gpt_003.pth 20240810-04:33:53 wrote non_validated_0155_03.png 20240810-04:33:53 wrote state.pth 20240810-04:33:53 --- epoch 156 ---------------------------------------- 20240810-04:33:53 current_test_accuracies 0.9557 0.9435 0.9510 0.9457 0.0000 20240810-04:33:53 training model 4 20240810-04:50:34 train_perplexity 156 model 4 1.161598213829145 20240810-04:50:49 test_perplexity 156 model 4 1.161809169478178 20240810-04:56:59 test_accuracy 156 model 4 val 1529 / 1608 20240810-04:57:02 wrote gpt_004.pth 20240810-04:58:04 wrote non_validated_0156_04.png 20240810-04:58:04 wrote state.pth 20240810-04:58:04 --- epoch 157 ---------------------------------------- 20240810-04:58:04 current_test_accuracies 0.9557 0.9435 0.9510 0.9457 0.9509 20240810-04:58:04 training model 1 20240810-05:14:44 train_perplexity 157 model 1 1.160869511678723 20240810-05:15:00 test_perplexity 157 model 1 1.1598622592024188 20240810-05:21:15 test_accuracy 157 model 1 val 1518 / 1596 20240810-05:21:18 wrote gpt_001.pth 20240810-05:22:20 wrote non_validated_0157_01.png 20240810-05:22:20 wrote state.pth 20240810-05:22:20 --- epoch 158 ---------------------------------------- 20240810-05:22:20 current_test_accuracies 0.9557 0.9511 0.9510 0.9457 0.9509 20240810-05:22:20 training model 3 20240810-05:38:59 train_perplexity 158 model 3 1.160610463755267 20240810-05:39:15 test_perplexity 158 model 3 1.1605378854106858 20240810-05:45:26 test_accuracy 158 model 3 val 1555 / 1607 20240810-05:45:29 wrote gpt_003.pth 20240810-05:46:31 wrote non_validated_0158_03.png 20240810-05:46:31 wrote state.pth 20240810-05:46:31 --- epoch 159 ---------------------------------------- 20240810-05:46:31 current_test_accuracies 0.9557 0.9511 0.9510 0.9676 0.9509 20240810-05:53:36 keep c_quizzes model 2 validated 31 / 420 (7.38%) nb_accumulated 31 / 420 (finishes Sat 07:22 -- 262/h) 20240810-06:00:29 keep c_quizzes model 2 validated 47 / 420 (11.19%) nb_accumulated 78 / 420 (finishes Sat 07:01 -- 335/h) 20240810-06:07:32 keep c_quizzes model 3 validated 42 / 420 (10.00%) nb_accumulated 120 / 420 (finishes Sat 07:00 -- 342/h) 20240810-06:14:29 keep c_quizzes model 3 validated 32 / 420 (7.62%) nb_accumulated 152 / 420 (finishes Sat 07:03 -- 326/h) 20240810-06:21:23 keep c_quizzes model 2 validated 29 / 420 (6.90%) nb_accumulated 181 / 420 (finishes Sat 07:07 -- 311/h) 20240810-06:28:27 keep c_quizzes model 3 validated 20 / 420 (4.76%) nb_accumulated 201 / 420 (finishes Sat 07:14 -- 287/h) 20240810-06:35:25 keep c_quizzes model 1 validated 32 / 420 (7.62%) nb_accumulated 233 / 420 (finishes Sat 07:14 -- 285/h) 20240810-06:42:24 keep c_quizzes model 4 validated 34 / 420 (8.10%) nb_accumulated 267 / 420 (finishes Sat 07:14 -- 286/h) 20240810-06:49:21 keep c_quizzes model 4 validated 24 / 420 (5.71%) nb_accumulated 291 / 420 (finishes Sat 07:17 -- 277/h) 20240810-06:56:25 keep c_quizzes model 2 validated 33 / 420 (7.86%) nb_accumulated 324 / 420 (finishes Sat 07:17 -- 278/h) 20240810-07:03:17 keep c_quizzes model 2 validated 31 / 420 (7.38%) nb_accumulated 355 / 420 (finishes Sat 07:17 -- 277/h) 20240810-07:10:10 keep c_quizzes model 4 validated 27 / 420 (6.43%) nb_accumulated 382 / 420 (finishes Sat 07:18 -- 273/h) 20240810-07:17:08 keep c_quizzes model 3 validated 35 / 420 (8.33%) nb_accumulated 417 / 420 (finishes Sat 07:17 -- 276/h) 20240810-07:24:02 keep c_quizzes model 3 validated 23 / 420 (5.48%) nb_accumulated 440 / 420 (finishes now! -- 270/h) 20240810-07:24:34 wrote c_quizzes.pth 20240810-07:24:34 training model 0 20240810-07:41:14 train_perplexity 159 model 0 1.1614087739007595 20240810-07:41:30 test_perplexity 159 model 0 1.1622215380602496 20240810-07:47:38 test_accuracy 159 model 0 val 1521 / 1607 20240810-07:47:41 wrote gpt_000.pth 20240810-07:48:43 wrote non_validated_0159_00.png 20240810-07:48:43 wrote state.pth 20240810-07:48:43 --- epoch 160 ---------------------------------------- 20240810-07:48:43 current_test_accuracies 0.9465 0.0000 0.0000 0.0000 0.0000 20240810-07:48:43 training model 1 20240810-08:05:23 train_perplexity 160 model 1 1.161678328148036 20240810-08:05:38 test_perplexity 160 model 1 1.162418139752189 20240810-08:11:47 test_accuracy 160 model 1 val 1534 / 1611 20240810-08:11:51 wrote gpt_001.pth 20240810-08:12:53 wrote non_validated_0160_01.png 20240810-08:12:53 wrote state.pth 20240810-08:12:53 --- epoch 161 ---------------------------------------- 20240810-08:12:53 current_test_accuracies 0.9465 0.9522 0.0000 0.0000 0.0000 20240810-08:12:53 training model 2 20240810-08:29:33 train_perplexity 161 model 2 1.161749386673403 20240810-08:29:48 test_perplexity 161 model 2 1.1596363448642475 20240810-08:35:58 test_accuracy 161 model 2 val 1529 / 1599 20240810-08:36:02 wrote gpt_002.pth 20240810-08:37:03 wrote non_validated_0161_02.png 20240810-08:37:03 wrote state.pth 20240810-08:37:03 --- epoch 162 ---------------------------------------- 20240810-08:37:03 current_test_accuracies 0.9465 0.9522 0.9562 0.0000 0.0000 20240810-08:37:03 training model 3 20240810-08:53:43 train_perplexity 162 model 3 1.161424951849663 20240810-08:53:59 test_perplexity 162 model 3 1.1618840528072332 20240810-09:00:09 test_accuracy 162 model 3 val 1498 / 1597 20240810-09:00:12 wrote gpt_003.pth 20240810-09:01:14 wrote non_validated_0162_03.png 20240810-09:01:14 wrote state.pth 20240810-09:01:14 --- epoch 163 ---------------------------------------- 20240810-09:01:14 current_test_accuracies 0.9465 0.9522 0.9562 0.9380 0.0000 20240810-09:01:14 training model 4 20240810-09:17:55 train_perplexity 163 model 4 1.1615607561619457 20240810-09:18:10 test_perplexity 163 model 4 1.1626866693897997 20240810-09:24:07 test_accuracy 163 model 4 val 1546 / 1644 20240810-09:24:11 wrote gpt_004.pth 20240810-09:25:12 wrote non_validated_0163_04.png 20240810-09:25:13 wrote state.pth 20240810-09:25:13 --- epoch 164 ---------------------------------------- 20240810-09:25:13 current_test_accuracies 0.9465 0.9522 0.9562 0.9380 0.9404 20240810-09:25:13 training model 3 20240810-09:41:56 train_perplexity 164 model 3 1.1608802020947808 20240810-09:42:12 test_perplexity 164 model 3 1.1611446848008768 20240810-09:48:23 test_accuracy 164 model 3 val 1505 / 1593 20240810-09:48:26 wrote gpt_003.pth 20240810-09:49:29 wrote non_validated_0164_03.png 20240810-09:49:29 wrote state.pth 20240810-09:49:29 --- epoch 165 ---------------------------------------- 20240810-09:49:29 current_test_accuracies 0.9465 0.9522 0.9562 0.9448 0.9404 20240810-09:49:29 training model 4 20240810-10:06:12 train_perplexity 165 model 4 1.1614380887802034 20240810-10:06:28 test_perplexity 165 model 4 1.1611231772898853 20240810-10:12:38 test_accuracy 165 model 4 val 1504 / 1594 20240810-10:12:42 wrote gpt_004.pth 20240810-10:13:43 wrote non_validated_0165_04.png 20240810-10:13:43 wrote state.pth 20240810-10:13:43 --- epoch 166 ---------------------------------------- 20240810-10:13:43 current_test_accuracies 0.9465 0.9522 0.9562 0.9448 0.9435 20240810-10:13:43 training model 4 20240810-10:30:26 train_perplexity 166 model 4 1.1612841616992995 20240810-10:30:42 test_perplexity 166 model 4 1.1617451578227063 20240810-10:37:06 test_accuracy 166 model 4 val 1488 / 1558 20240810-10:37:09 wrote gpt_004.pth 20240810-10:38:10 wrote non_validated_0166_04.png 20240810-10:38:10 wrote state.pth 20240810-10:38:10 --- epoch 167 ---------------------------------------- 20240810-10:38:10 current_test_accuracies 0.9465 0.9522 0.9562 0.9448 0.9551 20240810-10:38:10 training model 3 20240810-10:54:52 train_perplexity 167 model 3 1.1610602875325133 20240810-10:55:08 test_perplexity 167 model 3 1.1593986426924048 20240810-11:01:28 test_accuracy 167 model 3 val 1503 / 1569 20240810-11:01:32 wrote gpt_003.pth 20240810-11:02:34 wrote non_validated_0167_03.png 20240810-11:02:34 wrote state.pth 20240810-11:02:34 --- epoch 168 ---------------------------------------- 20240810-11:02:34 current_test_accuracies 0.9465 0.9522 0.9562 0.9579 0.9551 20240810-11:02:34 training model 0 20240810-11:19:14 train_perplexity 168 model 0 1.1615370154905127 20240810-11:19:30 test_perplexity 168 model 0 1.1604689539548887 20240810-11:25:41 test_accuracy 168 model 0 val 1543 / 1607 20240810-11:25:44 wrote gpt_000.pth 20240810-11:26:46 wrote non_validated_0168_00.png 20240810-11:26:46 wrote state.pth 20240810-11:26:46 --- epoch 169 ---------------------------------------- 20240810-11:26:46 current_test_accuracies 0.9602 0.9522 0.9562 0.9579 0.9551 20240810-11:33:51 keep c_quizzes model 3 validated 36 / 420 (8.57%) nb_accumulated 36 / 420 (finishes Sat 12:49 -- 305/h) 20240810-11:40:43 keep c_quizzes model 0 validated 38 / 420 (9.05%) nb_accumulated 74 / 420 (finishes Sat 12:45 -- 318/h) 20240810-11:47:39 keep c_quizzes model 0 validated 32 / 420 (7.62%) nb_accumulated 106 / 420 (finishes Sat 12:49 -- 304/h) 20240810-11:54:36 keep c_quizzes model 0 validated 31 / 420 (7.38%) nb_accumulated 137 / 420 (finishes Sat 12:52 -- 295/h) 20240810-12:01:31 keep c_quizzes model 2 validated 32 / 420 (7.62%) nb_accumulated 169 / 420 (finishes Sat 12:53 -- 291/h) 20240810-12:08:27 keep c_quizzes model 3 validated 29 / 420 (6.90%) nb_accumulated 198 / 420 (finishes Sat 12:55 -- 285/h) 20240810-12:15:27 keep c_quizzes model 1 validated 27 / 420 (6.43%) nb_accumulated 225 / 420 (finishes Sat 12:57 -- 277/h) 20240810-12:22:22 keep c_quizzes model 1 validated 30 / 420 (7.14%) nb_accumulated 255 / 420 (finishes Sat 12:58 -- 275/h) 20240810-12:29:22 keep c_quizzes model 1 validated 39 / 420 (9.29%) nb_accumulated 294 / 420 (finishes Sat 12:56 -- 281/h) 20240810-12:36:19 keep c_quizzes model 2 validated 23 / 420 (5.48%) nb_accumulated 317 / 420 (finishes Sat 12:58 -- 273/h) 20240810-12:43:20 keep c_quizzes model 1 validated 30 / 420 (7.14%) nb_accumulated 347 / 420 (finishes Sat 12:59 -- 271/h) 20240810-12:50:21 keep c_quizzes model 2 validated 20 / 420 (4.76%) nb_accumulated 367 / 420 (finishes Sat 13:02 -- 263/h) 20240810-12:57:22 keep c_quizzes model 2 validated 22 / 420 (5.24%) nb_accumulated 389 / 420 (finishes Sat 13:04 -- 257/h) 20240810-13:04:27 keep c_quizzes model 1 validated 28 / 420 (6.67%) nb_accumulated 417 / 420 (finishes Sat 13:05 -- 256/h) 20240810-13:11:24 keep c_quizzes model 2 validated 18 / 420 (4.29%) nb_accumulated 435 / 420 (finishes now! -- 249/h) 20240810-13:11:55 wrote c_quizzes.pth 20240810-13:11:55 training model 0 20240810-13:28:36 train_perplexity 169 model 0 1.1615642071392416 20240810-13:28:52 test_perplexity 169 model 0 1.1617326247312327 20240810-13:35:06 test_accuracy 169 model 0 val 1495 / 1583 20240810-13:35:10 wrote gpt_000.pth 20240810-13:36:11 wrote non_validated_0169_00.png 20240810-13:36:11 wrote state.pth 20240810-13:36:11 --- epoch 170 ---------------------------------------- 20240810-13:36:11 current_test_accuracies 0.9444 0.0000 0.0000 0.0000 0.0000 20240810-13:36:11 training model 1 20240810-13:52:53 train_perplexity 170 model 1 1.1615450823444526 20240810-13:53:08 test_perplexity 170 model 1 1.1617412302298347 20240810-13:59:12 test_accuracy 170 model 1 val 1542 / 1634 20240810-13:59:16 wrote gpt_001.pth 20240810-14:00:16 wrote non_validated_0170_01.png 20240810-14:00:16 wrote state.pth 20240810-14:00:16 --- epoch 171 ---------------------------------------- 20240810-14:00:16 current_test_accuracies 0.9444 0.9437 0.0000 0.0000 0.0000 20240810-14:00:16 training model 2 20240810-14:16:57 train_perplexity 171 model 2 1.1621262859443364 20240810-14:17:13 test_perplexity 171 model 2 1.159788713209116 20240810-14:23:28 test_accuracy 171 model 2 val 1518 / 1585 20240810-14:23:31 wrote gpt_002.pth 20240810-14:24:33 wrote non_validated_0171_02.png 20240810-14:24:33 wrote state.pth 20240810-14:24:33 --- epoch 172 ---------------------------------------- 20240810-14:24:33 current_test_accuracies 0.9444 0.9437 0.9577 0.0000 0.0000 20240810-14:24:33 training model 3 20240810-14:41:13 train_perplexity 172 model 3 1.1613917159201554 20240810-14:41:29 test_perplexity 172 model 3 1.1596001142161905 20240810-14:47:43 test_accuracy 172 model 3 val 1514 / 1599 20240810-14:47:46 wrote gpt_003.pth 20240810-14:48:48 wrote non_validated_0172_03.png 20240810-14:48:48 wrote state.pth 20240810-14:48:48 --- epoch 173 ---------------------------------------- 20240810-14:48:48 current_test_accuracies 0.9444 0.9437 0.9577 0.9468 0.0000 20240810-14:48:48 training model 4 20240810-15:05:28 train_perplexity 173 model 4 1.1619312674259932 20240810-15:05:44 test_perplexity 173 model 4 1.1608536221365173 20240810-15:12:05 test_accuracy 173 model 4 val 1484 / 1568 20240810-15:12:08 wrote gpt_004.pth 20240810-15:13:10 wrote non_validated_0173_04.png 20240810-15:13:10 wrote state.pth 20240810-15:13:10 --- epoch 174 ---------------------------------------- 20240810-15:13:10 current_test_accuracies 0.9444 0.9437 0.9577 0.9468 0.9464 20240810-15:13:10 training model 1 20240810-15:29:50 train_perplexity 174 model 1 1.161441066373758 20240810-15:30:06 test_perplexity 174 model 1 1.1607030315610138 20240810-15:36:27 test_accuracy 174 model 1 val 1472 / 1569 20240810-15:36:30 wrote gpt_001.pth 20240810-15:37:32 wrote non_validated_0174_01.png 20240810-15:37:32 wrote state.pth 20240810-15:37:32 --- epoch 175 ---------------------------------------- 20240810-15:37:32 current_test_accuracies 0.9444 0.9382 0.9577 0.9468 0.9464 20240810-15:37:32 training model 1 20240810-15:54:13 train_perplexity 175 model 1 1.161640984892183 20240810-15:54:28 test_perplexity 175 model 1 1.1615844149085055 20240810-16:00:40 test_accuracy 175 model 1 val 1503 / 1591 20240810-16:00:43 wrote gpt_001.pth 20240810-16:01:45 wrote non_validated_0175_01.png 20240810-16:01:45 wrote state.pth 20240810-16:01:45 --- epoch 176 ---------------------------------------- 20240810-16:01:45 current_test_accuracies 0.9444 0.9447 0.9577 0.9468 0.9464 20240810-16:01:45 training model 0 20240810-16:18:26 train_perplexity 176 model 0 1.1615389743966629 20240810-16:18:42 test_perplexity 176 model 0 1.161865647184409 20240810-16:24:56 test_accuracy 176 model 0 val 1518 / 1590 20240810-16:24:59 wrote gpt_000.pth 20240810-16:26:00 wrote non_validated_0176_00.png 20240810-16:26:00 wrote state.pth 20240810-16:26:00 --- epoch 177 ---------------------------------------- 20240810-16:26:00 current_test_accuracies 0.9547 0.9447 0.9577 0.9468 0.9464 20240810-16:26:00 training model 1 20240810-16:42:42 train_perplexity 177 model 1 1.1608759935939386 20240810-16:42:57 test_perplexity 177 model 1 1.1607107856070291 20240810-16:49:11 test_accuracy 177 model 1 val 1514 / 1598 20240810-16:49:15 wrote gpt_001.pth 20240810-16:50:15 wrote non_validated_0177_01.png 20240810-16:50:15 wrote state.pth 20240810-16:50:15 --- epoch 178 ---------------------------------------- 20240810-16:50:15 current_test_accuracies 0.9547 0.9474 0.9577 0.9468 0.9464 20240810-16:50:15 training model 4 20240810-17:06:58 train_perplexity 178 model 4 1.1619765971615346 20240810-17:07:14 test_perplexity 178 model 4 1.1643563898150961 20240810-17:13:30 test_accuracy 178 model 4 val 1523 / 1588 20240810-17:13:33 wrote gpt_004.pth 20240810-17:14:35 wrote non_validated_0178_04.png 20240810-17:14:35 wrote state.pth 20240810-17:14:35 --- epoch 179 ---------------------------------------- 20240810-17:14:35 current_test_accuracies 0.9547 0.9474 0.9577 0.9468 0.9591 20240810-17:14:35 training model 3 20240810-17:31:18 train_perplexity 179 model 3 1.1612783156282174 20240810-17:31:34 test_perplexity 179 model 3 1.1604294765283256 20240810-17:37:44 test_accuracy 179 model 3 val 1531 / 1610 20240810-17:37:47 wrote gpt_003.pth 20240810-17:38:49 wrote non_validated_0179_03.png 20240810-17:38:49 wrote state.pth 20240810-17:38:49 --- epoch 180 ---------------------------------------- 20240810-17:38:49 current_test_accuracies 0.9547 0.9474 0.9577 0.9509 0.9591 20240810-17:38:49 training model 1 20240810-17:55:31 train_perplexity 180 model 1 1.1610284234404582 20240810-17:55:47 test_perplexity 180 model 1 1.1612466186459005 20240810-18:01:55 test_accuracy 180 model 1 val 1538 / 1612 20240810-18:01:58 wrote gpt_001.pth 20240810-18:03:00 wrote non_validated_0180_01.png 20240810-18:03:00 wrote state.pth 20240810-18:03:00 --- epoch 181 ---------------------------------------- 20240810-18:03:00 current_test_accuracies 0.9547 0.9541 0.9577 0.9509 0.9591 20240810-18:09:59 keep c_quizzes model 0 validated 36 / 420 (8.57%) nb_accumulated 36 / 420 (finishes Sat 19:24 -- 309/h) 20240810-18:16:52 keep c_quizzes model 2 validated 38 / 420 (9.05%) nb_accumulated 74 / 420 (finishes Sat 19:21 -- 320/h) 20240810-18:23:42 keep c_quizzes model 4 validated 48 / 420 (11.43%) nb_accumulated 122 / 420 (finishes Sat 19:14 -- 353/h) 20240810-18:30:35 keep c_quizzes model 2 validated 35 / 420 (8.33%) nb_accumulated 157 / 420 (finishes Sat 19:16 -- 341/h) 20240810-18:37:29 keep c_quizzes model 1 validated 24 / 420 (5.71%) nb_accumulated 181 / 420 (finishes Sat 19:23 -- 314/h) 20240810-18:44:24 keep c_quizzes model 3 validated 25 / 420 (5.95%) nb_accumulated 206 / 420 (finishes Sat 19:27 -- 298/h) 20240810-18:51:22 keep c_quizzes model 4 validated 28 / 420 (6.67%) nb_accumulated 234 / 420 (finishes Sat 19:29 -- 290/h) 20240810-18:58:16 keep c_quizzes model 4 validated 18 / 420 (4.29%) nb_accumulated 252 / 420 (finishes Sat 19:35 -- 273/h) 20240810-19:05:08 keep c_quizzes model 0 validated 29 / 420 (6.90%) nb_accumulated 281 / 420 (finishes Sat 19:35 -- 271/h) 20240810-19:12:03 keep c_quizzes model 1 validated 30 / 420 (7.14%) nb_accumulated 311 / 420 (finishes Sat 19:36 -- 270/h) 20240810-19:18:59 keep c_quizzes model 1 validated 17 / 420 (4.05%) nb_accumulated 328 / 420 (finishes Sat 19:40 -- 258/h) 20240810-19:25:53 keep c_quizzes model 3 validated 24 / 420 (5.71%) nb_accumulated 352 / 420 (finishes Sat 19:41 -- 254/h) 20240810-19:32:46 keep c_quizzes model 0 validated 32 / 420 (7.62%) nb_accumulated 384 / 420 (finishes Sat 19:41 -- 256/h) 20240810-19:39:43 keep c_quizzes model 1 validated 28 / 420 (6.67%) nb_accumulated 412 / 420 (finishes Sat 19:41 -- 255/h) 20240810-19:46:42 keep c_quizzes model 2 validated 26 / 420 (6.19%) nb_accumulated 438 / 420 (finishes now! -- 253/h) 20240810-19:47:13 wrote c_quizzes.pth 20240810-19:47:13 training model 0 20240810-20:03:55 train_perplexity 181 model 0 1.1620506343002737 20240810-20:04:11 test_perplexity 181 model 0 1.1615565332402018 20240810-20:10:18 test_accuracy 181 model 0 val 1546 / 1610 20240810-20:10:21 wrote gpt_000.pth 20240810-20:11:23 wrote non_validated_0181_00.png 20240810-20:11:23 wrote state.pth 20240810-20:11:23 --- epoch 182 ---------------------------------------- 20240810-20:11:23 current_test_accuracies 0.9602 0.0000 0.0000 0.0000 0.0000 20240810-20:11:23 training model 1 20240810-20:28:05 train_perplexity 182 model 1 1.161918444887264 20240810-20:28:21 test_perplexity 182 model 1 1.163167898693214 20240810-20:34:29 test_accuracy 182 model 1 val 1515 / 1604 20240810-20:34:32 wrote gpt_001.pth 20240810-20:35:34 wrote non_validated_0182_01.png 20240810-20:35:34 wrote state.pth 20240810-20:35:34 --- epoch 183 ---------------------------------------- 20240810-20:35:34 current_test_accuracies 0.9602 0.9445 0.0000 0.0000 0.0000 20240810-20:35:34 training model 2 20240810-20:52:17 train_perplexity 183 model 2 1.1627248043341176 20240810-20:52:33 test_perplexity 183 model 2 1.162865177574449 20240810-20:58:36 test_accuracy 183 model 2 val 1515 / 1632 20240810-20:58:39 wrote gpt_002.pth 20240810-20:59:41 wrote non_validated_0183_02.png 20240810-20:59:41 wrote state.pth 20240810-20:59:41 --- epoch 184 ---------------------------------------- 20240810-20:59:41 current_test_accuracies 0.9602 0.9445 0.9283 0.0000 0.0000 20240810-20:59:41 training model 3 20240810-21:16:23 train_perplexity 184 model 3 1.1619177262336975 20240810-21:16:39 test_perplexity 184 model 3 1.161135480823485 20240810-21:23:00 test_accuracy 184 model 3 val 1514 / 1578 20240810-21:23:03 wrote gpt_003.pth 20240810-21:24:04 wrote non_validated_0184_03.png 20240810-21:24:04 wrote state.pth 20240810-21:24:04 --- epoch 185 ---------------------------------------- 20240810-21:24:04 current_test_accuracies 0.9602 0.9445 0.9283 0.9594 0.0000 20240810-21:24:04 training model 4 20240810-21:40:46 train_perplexity 185 model 4 1.1618228756551914 20240810-21:41:02 test_perplexity 185 model 4 1.1632373728876118 20240810-21:47:11 test_accuracy 185 model 4 val 1529 / 1608 20240810-21:47:15 wrote gpt_004.pth 20240810-21:48:17 wrote non_validated_0185_04.png 20240810-21:48:17 wrote state.pth 20240810-21:48:17 --- epoch 186 ---------------------------------------- 20240810-21:48:17 current_test_accuracies 0.9602 0.9445 0.9283 0.9594 0.9509 20240810-21:48:17 training model 2 20240810-22:04:58 train_perplexity 186 model 2 1.162707576089605 20240810-22:05:14 test_perplexity 186 model 2 1.16240966573091 20240810-22:11:21 test_accuracy 186 model 2 val 1540 / 1603 20240810-22:11:24 wrote gpt_002.pth 20240810-22:12:25 wrote non_validated_0186_02.png 20240810-22:12:25 wrote state.pth 20240810-22:12:25 --- epoch 187 ---------------------------------------- 20240810-22:12:25 current_test_accuracies 0.9602 0.9445 0.9607 0.9594 0.9509 20240810-22:12:25 training model 1 20240810-22:29:07 train_perplexity 187 model 1 1.1616340479068612 20240810-22:29:22 test_perplexity 187 model 1 1.1630783184993623 20240810-22:35:39 test_accuracy 187 model 1 val 1517 / 1595 20240810-22:35:43 wrote gpt_001.pth 20240810-22:36:47 wrote non_validated_0187_01.png 20240810-22:36:47 wrote state.pth 20240810-22:36:47 --- epoch 188 ---------------------------------------- 20240810-22:36:47 current_test_accuracies 0.9602 0.9511 0.9607 0.9594 0.9509 20240810-22:43:52 keep c_quizzes model 1 validated 38 / 420 (9.05%) nb_accumulated 38 / 420 (finishes Sat 23:55 -- 321/h) 20240810-22:50:50 keep c_quizzes model 1 validated 29 / 420 (6.90%) nb_accumulated 67 / 420 (finishes Sun 00:04 -- 285/h) 20240810-22:57:54 keep c_quizzes model 0 validated 34 / 420 (8.10%) nb_accumulated 101 / 420 (finishes Sun 00:04 -- 286/h) 20240810-23:04:55 keep c_quizzes model 3 validated 31 / 420 (7.38%) nb_accumulated 132 / 420 (finishes Sun 00:06 -- 281/h) 20240810-23:11:55 keep c_quizzes model 2 validated 23 / 420 (5.48%) nb_accumulated 155 / 420 (finishes Sun 00:12 -- 264/h) 20240810-23:18:55 keep c_quizzes model 1 validated 22 / 420 (5.24%) nb_accumulated 177 / 420 (finishes Sun 00:16 -- 252/h) 20240810-23:25:55 keep c_quizzes model 0 validated 24 / 420 (5.71%) nb_accumulated 201 / 420 (finishes Sun 00:19 -- 245/h) 20240810-23:32:57 keep c_quizzes model 1 validated 27 / 420 (6.43%) nb_accumulated 228 / 420 (finishes Sun 00:20 -- 243/h) 20240810-23:39:59 keep c_quizzes model 0 validated 22 / 420 (5.24%) nb_accumulated 250 / 420 (finishes Sun 00:22 -- 237/h) 20240810-23:47:01 keep c_quizzes model 0 validated 30 / 420 (7.14%) nb_accumulated 280 / 420 (finishes Sun 00:22 -- 239/h) 20240810-23:54:04 keep c_quizzes model 3 validated 25 / 420 (5.95%) nb_accumulated 305 / 420 (finishes Sun 00:23 -- 236/h) 20240811-00:01:02 keep c_quizzes model 1 validated 26 / 420 (6.19%) nb_accumulated 331 / 420 (finishes Sun 00:23 -- 235/h) 20240811-00:08:05 keep c_quizzes model 4 validated 28 / 420 (6.67%) nb_accumulated 359 / 420 (finishes Sun 00:23 -- 235/h) 20240811-00:15:07 keep c_quizzes model 0 validated 20 / 420 (4.76%) nb_accumulated 379 / 420 (finishes Sun 00:25 -- 231/h) 20240811-00:22:13 keep c_quizzes model 3 validated 34 / 420 (8.10%) nb_accumulated 413 / 420 (finishes Sun 00:24 -- 235/h) 20240811-00:29:10 keep c_quizzes model 0 validated 15 / 420 (3.57%) nb_accumulated 428 / 420 (finishes now! -- 228/h) 20240811-00:29:42 wrote c_quizzes.pth 20240811-00:29:42 training model 0 20240811-00:46:24 train_perplexity 188 model 0 1.1630001517913742 20240811-00:46:40 test_perplexity 188 model 0 1.1604052501362967 20240811-00:52:52 test_accuracy 188 model 0 val 1503 / 1599 20240811-00:52:55 wrote gpt_000.pth 20240811-00:53:57 wrote non_validated_0188_00.png 20240811-00:53:57 wrote state.pth 20240811-00:53:57 --- epoch 189 ---------------------------------------- 20240811-00:53:57 current_test_accuracies 0.9400 0.0000 0.0000 0.0000 0.0000 20240811-00:53:57 training model 1 20240811-01:10:37 train_perplexity 189 model 1 1.1621658052696537 20240811-01:10:53 test_perplexity 189 model 1 1.1625998274259086 20240811-01:17:07 test_accuracy 189 model 1 val 1527 / 1606 20240811-01:17:11 wrote gpt_001.pth 20240811-01:18:12 wrote non_validated_0189_01.png 20240811-01:18:12 wrote state.pth 20240811-01:18:12 --- epoch 190 ---------------------------------------- 20240811-01:18:12 current_test_accuracies 0.9400 0.9508 0.0000 0.0000 0.0000 20240811-01:18:12 training model 2 20240811-01:34:53 train_perplexity 190 model 2 1.1630906124678067 20240811-01:35:09 test_perplexity 190 model 2 1.1615359230180662 20240811-01:41:16 test_accuracy 190 model 2 val 1504 / 1611 20240811-01:41:19 wrote gpt_002.pth 20240811-01:42:21 wrote non_validated_0190_02.png 20240811-01:42:21 wrote state.pth 20240811-01:42:21 --- epoch 191 ---------------------------------------- 20240811-01:42:21 current_test_accuracies 0.9400 0.9508 0.9336 0.0000 0.0000 20240811-01:42:21 training model 3 20240811-01:59:02 train_perplexity 191 model 3 1.1621767202184892 20240811-01:59:17 test_perplexity 191 model 3 1.1621798116914164 20240811-02:05:28 test_accuracy 191 model 3 val 1519 / 1596 20240811-02:05:31 wrote gpt_003.pth 20240811-02:06:31 wrote non_validated_0191_03.png 20240811-02:06:31 wrote state.pth 20240811-02:06:31 --- epoch 192 ---------------------------------------- 20240811-02:06:31 current_test_accuracies 0.9400 0.9508 0.9336 0.9518 0.0000 20240811-02:06:31 training model 4 20240811-02:23:12 train_perplexity 192 model 4 1.1629885959031756 20240811-02:23:27 test_perplexity 192 model 4 1.1633385761909274 20240811-02:29:39 test_accuracy 192 model 4 val 1520 / 1603 20240811-02:29:42 wrote gpt_004.pth 20240811-02:30:44 wrote non_validated_0192_04.png 20240811-02:30:44 wrote state.pth 20240811-02:30:44 --- epoch 193 ---------------------------------------- 20240811-02:30:44 current_test_accuracies 0.9400 0.9508 0.9336 0.9518 0.9482 20240811-02:30:44 training model 2 20240811-02:47:25 train_perplexity 193 model 2 1.1624803790228413 20240811-02:47:41 test_perplexity 193 model 2 1.1620510973904041 20240811-02:53:56 test_accuracy 193 model 2 val 1528 / 1605 20240811-02:53:59 wrote gpt_002.pth 20240811-02:54:59 wrote non_validated_0193_02.png 20240811-02:54:59 wrote state.pth 20240811-02:54:59 --- epoch 194 ---------------------------------------- 20240811-02:54:59 current_test_accuracies 0.9400 0.9508 0.9520 0.9518 0.9482 20240811-02:54:59 training model 0 20240811-03:11:40 train_perplexity 194 model 0 1.162461459568486 20240811-03:11:56 test_perplexity 194 model 0 1.1626147698072036 20240811-03:18:11 test_accuracy 194 model 0 val 1497 / 1583 20240811-03:18:14 wrote gpt_000.pth 20240811-03:19:14 wrote non_validated_0194_00.png 20240811-03:19:14 wrote state.pth 20240811-03:19:14 --- epoch 195 ---------------------------------------- 20240811-03:19:14 current_test_accuracies 0.9457 0.9508 0.9520 0.9518 0.9482 20240811-03:19:14 training model 0 20240811-03:35:56 train_perplexity 195 model 0 1.1623052221478833 20240811-03:36:12 test_perplexity 195 model 0 1.161980520704134 20240811-03:42:22 test_accuracy 195 model 0 val 1516 / 1593 20240811-03:42:26 wrote gpt_000.pth 20240811-03:43:26 wrote non_validated_0195_00.png 20240811-03:43:26 wrote state.pth 20240811-03:43:26 --- epoch 196 ---------------------------------------- 20240811-03:43:26 current_test_accuracies 0.9517 0.9508 0.9520 0.9518 0.9482 20240811-03:43:26 training model 4 20240811-04:00:07 train_perplexity 196 model 4 1.1629022866785041 20240811-04:00:23 test_perplexity 196 model 4 1.1630808183135337 20240811-04:06:37 test_accuracy 196 model 4 val 1523 / 1597 20240811-04:06:40 wrote gpt_004.pth 20240811-04:07:40 wrote non_validated_0196_04.png 20240811-04:07:40 wrote state.pth 20240811-04:07:40 --- epoch 197 ---------------------------------------- 20240811-04:07:40 current_test_accuracies 0.9517 0.9508 0.9520 0.9518 0.9537 20240811-04:14:42 keep c_quizzes model 3 validated 27 / 420 (6.43%) nb_accumulated 27 / 420 (finishes Sun 05:57 -- 230/h) 20240811-04:21:36 keep c_quizzes model 3 validated 29 / 420 (6.90%) nb_accumulated 56 / 420 (finishes Sun 05:52 -- 241/h) 20240811-04:28:27 keep c_quizzes model 4 validated 24 / 420 (5.71%) nb_accumulated 80 / 420 (finishes Sun 05:56 -- 230/h) 20240811-04:35:20 keep c_quizzes model 2 validated 25 / 420 (5.95%) nb_accumulated 105 / 420 (finishes Sun 05:58 -- 227/h) 20240811-04:42:13 keep c_quizzes model 4 validated 33 / 420 (7.86%) nb_accumulated 138 / 420 (finishes Sun 05:52 -- 239/h) 20240811-04:49:04 keep c_quizzes model 2 validated 26 / 420 (6.19%) nb_accumulated 164 / 420 (finishes Sun 05:53 -- 237/h) 20240811-04:56:00 keep c_quizzes model 2 validated 25 / 420 (5.95%) nb_accumulated 189 / 420 (finishes Sun 05:55 -- 234/h) 20240811-05:02:54 keep c_quizzes model 3 validated 29 / 420 (6.90%) nb_accumulated 218 / 420 (finishes Sun 05:54 -- 236/h) 20240811-05:09:48 keep c_quizzes model 1 validated 28 / 420 (6.67%) nb_accumulated 246 / 420 (finishes Sun 05:53 -- 237/h) 20240811-05:16:43 keep c_quizzes model 1 validated 29 / 420 (6.90%) nb_accumulated 275 / 420 (finishes Sun 05:53 -- 238/h) 20240811-05:23:38 keep c_quizzes model 4 validated 26 / 420 (6.19%) nb_accumulated 301 / 420 (finishes Sun 05:53 -- 237/h) 20240811-05:30:32 keep c_quizzes model 4 validated 29 / 420 (6.90%) nb_accumulated 330 / 420 (finishes Sun 05:53 -- 238/h) 20240811-05:37:25 keep c_quizzes model 0 validated 31 / 420 (7.38%) nb_accumulated 361 / 420 (finishes Sun 05:52 -- 241/h) 20240811-05:44:21 keep c_quizzes model 4 validated 20 / 420 (4.76%) nb_accumulated 381 / 420 (finishes Sun 05:54 -- 236/h) 20240811-05:51:15 keep c_quizzes model 2 validated 28 / 420 (6.67%) nb_accumulated 409 / 420 (finishes Sun 05:54 -- 236/h) 20240811-05:58:10 keep c_quizzes model 3 validated 25 / 420 (5.95%) nb_accumulated 434 / 420 (finishes now! -- 235/h) 20240811-05:58:41 wrote c_quizzes.pth 20240811-05:58:41 training model 0 20240811-06:15:23 train_perplexity 197 model 0 1.1629441970703696 20240811-06:15:38 test_perplexity 197 model 0 1.1629372618184908 20240811-06:21:47 test_accuracy 197 model 0 val 1517 / 1601 20240811-06:21:50 wrote gpt_000.pth 20240811-06:22:52 wrote non_validated_0197_00.png 20240811-06:22:52 wrote state.pth 20240811-06:22:52 --- epoch 198 ---------------------------------------- 20240811-06:22:52 current_test_accuracies 0.9475 0.0000 0.0000 0.0000 0.0000 20240811-06:22:52 training model 1 20240811-06:39:33 train_perplexity 198 model 1 1.1631822192719414 20240811-06:39:49 test_perplexity 198 model 1 1.16305843967611 20240811-06:46:03 test_accuracy 198 model 1 val 1509 / 1598 20240811-06:46:06 wrote gpt_001.pth 20240811-06:47:08 wrote non_validated_0198_01.png 20240811-06:47:08 wrote state.pth 20240811-06:47:08 --- epoch 199 ---------------------------------------- 20240811-06:47:08 current_test_accuracies 0.9475 0.9443 0.0000 0.0000 0.0000 20240811-06:47:08 training model 2 20240811-07:03:48 train_perplexity 199 model 2 1.1633820654065554 20240811-07:04:03 test_perplexity 199 model 2 1.163653660250256 20240811-07:10:20 test_accuracy 199 model 2 val 1497 / 1580 20240811-07:10:23 wrote gpt_002.pth 20240811-07:11:25 wrote non_validated_0199_02.png 20240811-07:11:25 wrote state.pth 20240811-07:11:25 --- epoch 200 ---------------------------------------- 20240811-07:11:25 current_test_accuracies 0.9475 0.9443 0.9475 0.0000 0.0000 20240811-07:11:25 training model 3 20240811-07:28:05 train_perplexity 200 model 3 1.1629287960131398 20240811-07:28:21 test_perplexity 200 model 3 1.1628466861886702 20240811-07:34:41 test_accuracy 200 model 3 val 1493 / 1576 20240811-07:34:44 wrote gpt_003.pth 20240811-07:35:45 wrote non_validated_0200_03.png 20240811-07:35:45 wrote state.pth 20240811-07:35:45 --- epoch 201 ---------------------------------------- 20240811-07:35:45 current_test_accuracies 0.9475 0.9443 0.9475 0.9473 0.0000 20240811-07:35:45 training model 4 20240811-07:52:25 train_perplexity 201 model 4 1.1638837382376237 20240811-07:52:40 test_perplexity 201 model 4 1.1641074354858216 20240811-07:58:49 test_accuracy 201 model 4 val 1503 / 1604 20240811-07:58:53 wrote gpt_004.pth 20240811-07:59:54 wrote non_validated_0201_04.png 20240811-07:59:54 wrote state.pth 20240811-07:59:54 --- epoch 202 ---------------------------------------- 20240811-07:59:54 current_test_accuracies 0.9475 0.9443 0.9475 0.9473 0.9370 20240811-07:59:54 training model 4 20240811-08:16:35 train_perplexity 202 model 4 1.1633379095071756 20240811-08:16:50 test_perplexity 202 model 4 1.1623887573468816 20240811-08:23:04 test_accuracy 202 model 4 val 1524 / 1597 20240811-08:23:08 wrote gpt_004.pth 20240811-08:24:10 wrote non_validated_0202_04.png 20240811-08:24:10 wrote state.pth 20240811-08:24:10 --- epoch 203 ---------------------------------------- 20240811-08:24:10 current_test_accuracies 0.9475 0.9443 0.9475 0.9473 0.9543 20240811-08:24:10 training model 1 20240811-08:40:51 train_perplexity 203 model 1 1.163091080569536 20240811-08:41:06 test_perplexity 203 model 1 1.1634312296393199 20240811-08:47:12 test_accuracy 203 model 1 val 1532 / 1626 20240811-08:47:15 wrote gpt_001.pth 20240811-08:48:17 wrote non_validated_0203_01.png 20240811-08:48:17 wrote state.pth 20240811-08:48:17 --- epoch 204 ---------------------------------------- 20240811-08:48:17 current_test_accuracies 0.9475 0.9422 0.9475 0.9473 0.9543 20240811-08:48:17 training model 1 20240811-09:04:57 train_perplexity 204 model 1 1.162704111103434 20240811-09:05:13 test_perplexity 204 model 1 1.1640946896123556 20240811-09:11:23 test_accuracy 204 model 1 val 1528 / 1618 20240811-09:11:26 wrote gpt_001.pth 20240811-09:12:28 wrote non_validated_0204_01.png 20240811-09:12:28 wrote state.pth 20240811-09:12:28 --- epoch 205 ---------------------------------------- 20240811-09:12:28 current_test_accuracies 0.9475 0.9444 0.9475 0.9473 0.9543 20240811-09:12:28 training model 1 20240811-09:29:10 train_perplexity 205 model 1 1.1624421293793068 20240811-09:29:26 test_perplexity 205 model 1 1.1621194385896143 20240811-09:35:37 test_accuracy 205 model 1 val 1527 / 1602 20240811-09:35:40 wrote gpt_001.pth 20240811-09:36:43 wrote non_validated_0205_01.png 20240811-09:36:43 wrote state.pth 20240811-09:36:43 --- epoch 206 ---------------------------------------- 20240811-09:36:43 current_test_accuracies 0.9475 0.9532 0.9475 0.9473 0.9543 20240811-09:36:43 training model 3 20240811-09:53:26 train_perplexity 206 model 3 1.1628559830141045 20240811-09:53:41 test_perplexity 206 model 3 1.1626162101079258 20240811-09:59:57 test_accuracy 206 model 3 val 1509 / 1596 20240811-10:00:00 wrote gpt_003.pth 20240811-10:01:02 wrote non_validated_0206_03.png 20240811-10:01:02 wrote state.pth 20240811-10:01:02 --- epoch 207 ---------------------------------------- 20240811-10:01:02 current_test_accuracies 0.9475 0.9532 0.9475 0.9455 0.9543 20240811-10:01:02 training model 3 20240811-10:17:45 train_perplexity 207 model 3 1.1630428993724486 20240811-10:18:01 test_perplexity 207 model 3 1.1622454221374228 20240811-10:24:15 test_accuracy 207 model 3 val 1507 / 1581 20240811-10:24:19 wrote gpt_003.pth 20240811-10:25:19 wrote non_validated_0207_03.png 20240811-10:25:19 wrote state.pth 20240811-10:25:19 --- epoch 208 ---------------------------------------- 20240811-10:25:19 current_test_accuracies 0.9475 0.9532 0.9475 0.9532 0.9543 20240811-10:25:19 training model 2 20240811-10:42:02 train_perplexity 208 model 2 1.1636007094274066 20240811-10:42:18 test_perplexity 208 model 2 1.1631277343597801 20240811-10:48:31 test_accuracy 208 model 2 val 1517 / 1607 20240811-10:48:34 wrote gpt_002.pth 20240811-10:49:36 wrote non_validated_0208_02.png 20240811-10:49:36 wrote state.pth 20240811-10:49:36 --- epoch 209 ---------------------------------------- 20240811-10:49:36 current_test_accuracies 0.9475 0.9532 0.9440 0.9532 0.9543 20240811-10:49:36 training model 2 20240811-11:06:19 train_perplexity 209 model 2 1.1632649232201584 20240811-11:06:35 test_perplexity 209 model 2 1.163158009560844 20240811-11:12:50 test_accuracy 209 model 2 val 1491 / 1589 20240811-11:12:53 wrote gpt_002.pth 20240811-11:13:53 wrote non_validated_0209_02.png 20240811-11:13:53 wrote state.pth 20240811-11:13:53 --- epoch 210 ---------------------------------------- 20240811-11:13:53 current_test_accuracies 0.9475 0.9532 0.9383 0.9532 0.9543 20240811-11:13:53 training model 2 20240811-11:30:36 train_perplexity 210 model 2 1.1627308171320998 20240811-11:30:52 test_perplexity 210 model 2 1.1636815678944017 20240811-11:37:05 test_accuracy 210 model 2 val 1516 / 1598 20240811-11:37:08 wrote gpt_002.pth 20240811-11:38:10 wrote non_validated_0210_02.png 20240811-11:38:10 wrote state.pth 20240811-11:38:10 --- epoch 211 ---------------------------------------- 20240811-11:38:10 current_test_accuracies 0.9475 0.9532 0.9487 0.9532 0.9543 20240811-11:38:10 training model 0 20240811-11:54:53 train_perplexity 211 model 0 1.1630813987374002 20240811-11:55:08 test_perplexity 211 model 0 1.1617969331616584 20240811-12:01:29 test_accuracy 211 model 0 val 1482 / 1570 20240811-12:01:32 wrote gpt_000.pth 20240811-12:02:34 wrote non_validated_0211_00.png 20240811-12:02:34 wrote state.pth 20240811-12:02:34 --- epoch 212 ---------------------------------------- 20240811-12:02:34 current_test_accuracies 0.9439 0.9532 0.9487 0.9532 0.9543 20240811-12:02:34 training model 0 20240811-12:19:16 train_perplexity 212 model 0 1.162701306598749 20240811-12:19:32 test_perplexity 212 model 0 1.1617014825790137 20240811-12:25:43 test_accuracy 212 model 0 val 1514 / 1590 20240811-12:25:46 wrote gpt_000.pth 20240811-12:26:48 wrote non_validated_0212_00.png 20240811-12:26:48 wrote state.pth 20240811-12:26:48 --- epoch 213 ---------------------------------------- 20240811-12:26:48 current_test_accuracies 0.9522 0.9532 0.9487 0.9532 0.9543 20240811-12:26:48 training model 2 20240811-12:43:31 train_perplexity 213 model 2 1.1630365819762611 20240811-12:43:46 test_perplexity 213 model 2 1.1626250047514808 20240811-12:49:56 test_accuracy 213 model 2 val 1554 / 1616 20240811-12:49:59 wrote gpt_002.pth 20240811-12:51:00 wrote non_validated_0213_02.png 20240811-12:51:00 wrote state.pth 20240811-12:51:00 --- epoch 214 ---------------------------------------- 20240811-12:51:00 current_test_accuracies 0.9522 0.9532 0.9616 0.9532 0.9543 20240811-12:58:06 keep c_quizzes model 4 validated 39 / 420 (9.29%) nb_accumulated 39 / 420 (finishes Sun 14:07 -- 329/h) 20240811-13:04:59 keep c_quizzes model 2 validated 37 / 420 (8.81%) nb_accumulated 76 / 420 (finishes Sun 14:08 -- 326/h) 20240811-13:11:51 keep c_quizzes model 4 validated 32 / 420 (7.62%) nb_accumulated 108 / 420 (finishes Sun 14:12 -- 310/h) 20240811-13:18:45 keep c_quizzes model 2 validated 18 / 420 (4.29%) nb_accumulated 126 / 420 (finishes Sun 14:23 -- 272/h) 20240811-13:25:40 keep c_quizzes model 4 validated 29 / 420 (6.90%) nb_accumulated 155 / 420 (finishes Sun 14:24 -- 268/h) 20240811-13:32:36 keep c_quizzes model 2 validated 16 / 420 (3.81%) nb_accumulated 171 / 420 (finishes Sun 14:33 -- 246/h) 20240811-13:39:30 keep c_quizzes model 1 validated 21 / 420 (5.00%) nb_accumulated 192 / 420 (finishes Sun 14:37 -- 237/h) 20240811-13:46:26 keep c_quizzes model 3 validated 26 / 420 (6.19%) nb_accumulated 218 / 420 (finishes Sun 14:37 -- 235/h) 20240811-13:53:23 keep c_quizzes model 2 validated 31 / 420 (7.38%) nb_accumulated 249 / 420 (finishes Sun 14:36 -- 239/h) 20240811-14:00:17 keep c_quizzes model 4 validated 25 / 420 (5.95%) nb_accumulated 274 / 420 (finishes Sun 14:37 -- 237/h) 20240811-14:07:13 keep c_quizzes model 3 validated 22 / 420 (5.24%) nb_accumulated 296 / 420 (finishes Sun 14:39 -- 232/h) 20240811-14:14:12 keep c_quizzes model 0 validated 25 / 420 (5.95%) nb_accumulated 321 / 420 (finishes Sun 14:39 -- 231/h) 20240811-14:21:07 keep c_quizzes model 0 validated 16 / 420 (3.81%) nb_accumulated 337 / 420 (finishes Sun 14:43 -- 224/h) 20240811-14:28:01 keep c_quizzes model 2 validated 28 / 420 (6.67%) nb_accumulated 365 / 420 (finishes Sun 14:42 -- 225/h) 20240811-14:34:55 keep c_quizzes model 0 validated 20 / 420 (4.76%) nb_accumulated 385 / 420 (finishes Sun 14:44 -- 222/h) 20240811-14:41:50 keep c_quizzes model 0 validated 25 / 420 (5.95%) nb_accumulated 410 / 420 (finishes Sun 14:44 -- 221/h) 20240811-14:48:50 keep c_quizzes model 1 validated 27 / 420 (6.43%) nb_accumulated 437 / 420 (finishes now! -- 222/h) 20240811-14:49:21 wrote c_quizzes.pth 20240811-14:49:21 training model 0 20240811-15:06:04 train_perplexity 214 model 0 1.1636451180049199 20240811-15:06:20 test_perplexity 214 model 0 1.1623886588773293 20240811-15:12:25 test_accuracy 214 model 0 val 1532 / 1613 20240811-15:12:29 wrote gpt_000.pth 20240811-15:13:31 wrote non_validated_0214_00.png 20240811-15:13:31 wrote state.pth 20240811-15:13:31 --- epoch 215 ---------------------------------------- 20240811-15:13:31 current_test_accuracies 0.9498 0.0000 0.0000 0.0000 0.0000 20240811-15:13:31 training model 1 20240811-15:30:13 train_perplexity 215 model 1 1.1632667087945097 20240811-15:30:29 test_perplexity 215 model 1 1.1633923125014283 20240811-15:36:41 test_accuracy 215 model 1 val 1497 / 1599 20240811-15:36:44 wrote gpt_001.pth 20240811-15:37:46 wrote non_validated_0215_01.png 20240811-15:37:46 wrote state.pth 20240811-15:37:46 --- epoch 216 ---------------------------------------- 20240811-15:37:46 current_test_accuracies 0.9498 0.9362 0.0000 0.0000 0.0000 20240811-15:37:46 training model 2 20240811-15:54:28 train_perplexity 216 model 2 1.1631303971813907 20240811-15:54:44 test_perplexity 216 model 2 1.1648511408122155 20240811-16:00:58 test_accuracy 216 model 2 val 1489 / 1589 20240811-16:01:02 wrote gpt_002.pth 20240811-16:02:03 wrote non_validated_0216_02.png 20240811-16:02:03 wrote state.pth 20240811-16:02:03 --- epoch 217 ---------------------------------------- 20240811-16:02:03 current_test_accuracies 0.9498 0.9362 0.9371 0.0000 0.0000 20240811-16:02:03 training model 3 20240811-16:18:46 train_perplexity 217 model 3 1.1632409397987367 20240811-16:19:02 test_perplexity 217 model 3 1.1624364167232744 20240811-16:25:12 test_accuracy 217 model 3 val 1520 / 1596 20240811-16:25:15 wrote gpt_003.pth 20240811-16:26:17 wrote non_validated_0217_03.png 20240811-16:26:17 wrote state.pth 20240811-16:26:17 --- epoch 218 ---------------------------------------- 20240811-16:26:17 current_test_accuracies 0.9498 0.9362 0.9371 0.9524 0.0000 20240811-16:26:17 training model 4 20240811-16:42:59 train_perplexity 218 model 4 1.1637657220972124 20240811-16:43:15 test_perplexity 218 model 4 1.1651089079412278 20240811-16:49:25 test_accuracy 218 model 4 val 1485 / 1592 20240811-16:49:29 wrote gpt_004.pth 20240811-16:50:31 wrote non_validated_0218_04.png 20240811-16:50:31 wrote state.pth 20240811-16:50:31 --- epoch 219 ---------------------------------------- 20240811-16:50:31 current_test_accuracies 0.9498 0.9362 0.9371 0.9524 0.9328 20240811-16:50:31 training model 4 20240811-17:07:14 train_perplexity 219 model 4 1.163653751169285 20240811-17:07:30 test_perplexity 219 model 4 1.1636095732802278 20240811-17:13:39 test_accuracy 219 model 4 val 1513 / 1600 20240811-17:13:43 wrote gpt_004.pth 20240811-17:14:44 wrote non_validated_0219_04.png 20240811-17:14:44 wrote state.pth 20240811-17:14:44 --- epoch 220 ---------------------------------------- 20240811-17:14:44 current_test_accuracies 0.9498 0.9362 0.9371 0.9524 0.9456 20240811-17:14:44 training model 1 20240811-17:31:28 train_perplexity 220 model 1 1.163162484131289 20240811-17:31:43 test_perplexity 220 model 1 1.1623738032776816 20240811-17:37:59 test_accuracy 220 model 1 val 1500 / 1597 20240811-17:38:02 wrote gpt_001.pth 20240811-17:39:03 wrote non_validated_0220_01.png 20240811-17:39:03 wrote state.pth 20240811-17:39:03 --- epoch 221 ---------------------------------------- 20240811-17:39:03 current_test_accuracies 0.9498 0.9393 0.9371 0.9524 0.9456 20240811-17:39:03 training model 2 20240811-17:55:46 train_perplexity 221 model 2 1.163551994735871 20240811-17:56:02 test_perplexity 221 model 2 1.165634705076801 20240811-18:02:18 test_accuracy 221 model 2 val 1516 / 1584 20240811-18:02:22 wrote gpt_002.pth 20240811-18:03:23 wrote non_validated_0221_02.png 20240811-18:03:23 wrote state.pth 20240811-18:03:23 --- epoch 222 ---------------------------------------- 20240811-18:03:23 current_test_accuracies 0.9498 0.9393 0.9571 0.9524 0.9456 20240811-18:03:23 training model 1 20240811-18:20:05 train_perplexity 222 model 1 1.1631915779826272 20240811-18:20:21 test_perplexity 222 model 1 1.1638458170351387 20240811-18:26:25 test_accuracy 222 model 1 val 1543 / 1620 20240811-18:26:28 wrote gpt_001.pth 20240811-18:27:29 wrote non_validated_0222_01.png 20240811-18:27:29 wrote state.pth 20240811-18:27:29 --- epoch 223 ---------------------------------------- 20240811-18:27:29 current_test_accuracies 0.9498 0.9525 0.9571 0.9524 0.9456 20240811-18:27:29 training model 4 20240811-18:44:11 train_perplexity 223 model 4 1.163815608641672 20240811-18:44:27 test_perplexity 223 model 4 1.163867521309012 20240811-18:50:29 test_accuracy 223 model 4 val 1546 / 1640 20240811-18:50:32 wrote gpt_004.pth 20240811-18:51:34 wrote non_validated_0223_04.png 20240811-18:51:34 wrote state.pth 20240811-18:51:34 --- epoch 224 ---------------------------------------- 20240811-18:51:34 current_test_accuracies 0.9498 0.9525 0.9571 0.9524 0.9427 20240811-18:51:34 training model 4 20240811-19:08:16 train_perplexity 224 model 4 1.1632626476541108 20240811-19:08:31 test_perplexity 224 model 4 1.1636736257600677 20240811-19:14:43 test_accuracy 224 model 4 val 1508 / 1593 20240811-19:14:47 wrote gpt_004.pth 20240811-19:15:48 wrote non_validated_0224_04.png 20240811-19:15:48 wrote state.pth 20240811-19:15:48 --- epoch 225 ---------------------------------------- 20240811-19:15:48 current_test_accuracies 0.9498 0.9525 0.9571 0.9524 0.9466 20240811-19:15:48 training model 4 20240811-19:32:32 train_perplexity 225 model 4 1.1628889332168415 20240811-19:32:48 test_perplexity 225 model 4 1.1653717099332257 20240811-19:38:58 test_accuracy 225 model 4 val 1518 / 1619 20240811-19:39:02 wrote gpt_004.pth 20240811-19:40:04 wrote non_validated_0225_04.png 20240811-19:40:04 wrote state.pth 20240811-19:40:04 --- epoch 226 ---------------------------------------- 20240811-19:40:04 current_test_accuracies 0.9498 0.9525 0.9571 0.9524 0.9376 20240811-19:40:04 training model 4 20240811-19:56:47 train_perplexity 226 model 4 1.1635242161977448 20240811-19:57:02 test_perplexity 226 model 4 1.1633804785153303 20240811-20:03:21 test_accuracy 226 model 4 val 1482 / 1576 20240811-20:03:24 wrote gpt_004.pth 20240811-20:04:26 wrote non_validated_0226_04.png 20240811-20:04:26 wrote state.pth 20240811-20:04:26 --- epoch 227 ---------------------------------------- 20240811-20:04:26 current_test_accuracies 0.9498 0.9525 0.9571 0.9524 0.9404 20240811-20:04:26 training model 4 20240811-20:21:08 train_perplexity 227 model 4 1.163148247579671 20240811-20:21:24 test_perplexity 227 model 4 1.163131033631866 20240811-20:27:37 test_accuracy 227 model 4 val 1527 / 1609 20240811-20:27:40 wrote gpt_004.pth 20240811-20:28:42 wrote non_validated_0227_04.png 20240811-20:28:42 wrote state.pth 20240811-20:28:42 --- epoch 228 ---------------------------------------- 20240811-20:28:42 current_test_accuracies 0.9498 0.9525 0.9571 0.9524 0.9490 20240811-20:28:42 training model 4 20240811-20:45:25 train_perplexity 228 model 4 1.163062082450829 20240811-20:45:41 test_perplexity 228 model 4 1.1622850826209277 20240811-20:51:54 test_accuracy 228 model 4 val 1508 / 1594 20240811-20:51:57 wrote gpt_004.pth 20240811-20:53:00 wrote non_validated_0228_04.png 20240811-20:53:00 wrote state.pth 20240811-20:53:00 --- epoch 229 ---------------------------------------- 20240811-20:53:00 current_test_accuracies 0.9498 0.9525 0.9571 0.9524 0.9460 20240811-20:53:00 training model 4 20240811-21:09:43 train_perplexity 229 model 4 1.1628625567566178 20240811-21:09:58 test_perplexity 229 model 4 1.165338804022458 20240811-21:16:03 test_accuracy 229 model 4 val 1539 / 1620 20240811-21:16:07 wrote gpt_004.pth 20240811-21:17:09 wrote non_validated_0229_04.png 20240811-21:17:09 wrote state.pth 20240811-21:17:09 --- epoch 230 ---------------------------------------- 20240811-21:17:09 current_test_accuracies 0.9498 0.9525 0.9571 0.9524 0.9500 20240811-21:17:09 training model 0 20240811-21:33:53 train_perplexity 230 model 0 1.1633952419703937 20240811-21:34:08 test_perplexity 230 model 0 1.1625255751636767 20240811-21:40:19 test_accuracy 230 model 0 val 1523 / 1604 20240811-21:40:23 wrote gpt_000.pth 20240811-21:41:25 wrote non_validated_0230_00.png 20240811-21:41:25 wrote state.pth 20240811-21:41:25 --- epoch 231 ---------------------------------------- 20240811-21:41:25 current_test_accuracies 0.9495 0.9525 0.9571 0.9524 0.9500 20240811-21:41:25 training model 0 20240811-21:58:08 train_perplexity 231 model 0 1.1633439062574997 20240811-21:58:23 test_perplexity 231 model 0 1.1624503147908738 20240811-22:04:27 test_accuracy 231 model 0 val 1553 / 1636 20240811-22:04:31 wrote gpt_000.pth 20240811-22:05:33 wrote non_validated_0231_00.png 20240811-22:05:33 wrote state.pth 20240811-22:05:33 --- epoch 232 ---------------------------------------- 20240811-22:05:33 current_test_accuracies 0.9493 0.9525 0.9571 0.9524 0.9500 20240811-22:05:33 training model 0 20240811-22:22:16 train_perplexity 232 model 0 1.162724759178362 20240811-22:22:31 test_perplexity 232 model 0 1.1636878122929621 20240811-22:28:38 test_accuracy 232 model 0 val 1522 / 1623 20240811-22:28:42 wrote gpt_000.pth 20240811-22:29:45 wrote non_validated_0232_00.png 20240811-22:29:45 wrote state.pth 20240811-22:29:45 --- epoch 233 ---------------------------------------- 20240811-22:29:45 current_test_accuracies 0.9378 0.9525 0.9571 0.9524 0.9500 20240811-22:29:45 training model 0 20240811-22:46:28 train_perplexity 233 model 0 1.1629603211864032 20240811-22:46:43 test_perplexity 233 model 0 1.1651222126368235 20240811-22:52:50 test_accuracy 233 model 0 val 1545 / 1623 20240811-22:52:53 wrote gpt_000.pth 20240811-22:53:55 wrote non_validated_0233_00.png 20240811-22:53:55 wrote state.pth 20240811-22:53:55 --- epoch 234 ---------------------------------------- 20240811-22:53:55 current_test_accuracies 0.9519 0.9525 0.9571 0.9524 0.9500 20240811-23:00:58 keep c_quizzes model 4 validated 27 / 420 (6.43%) nb_accumulated 27 / 420 (finishes Mon 00:43 -- 229/h) 20240811-23:07:52 keep c_quizzes model 0 validated 36 / 420 (8.57%) nb_accumulated 63 / 420 (finishes Mon 00:26 -- 270/h) 20240811-23:14:48 keep c_quizzes model 1 validated 25 / 420 (5.95%) nb_accumulated 88 / 420 (finishes Mon 00:33 -- 252/h) 20240811-23:21:44 keep c_quizzes model 2 validated 44 / 420 (10.48%) nb_accumulated 132 / 420 (finishes Mon 00:22 -- 284/h) 20240811-23:28:34 keep c_quizzes model 4 validated 34 / 420 (8.10%) nb_accumulated 166 / 420 (finishes Mon 00:21 -- 287/h) 20240811-23:35:27 keep c_quizzes model 3 validated 30 / 420 (7.14%) nb_accumulated 196 / 420 (finishes Mon 00:22 -- 283/h) 20240811-23:42:19 keep c_quizzes model 0 validated 31 / 420 (7.38%) nb_accumulated 227 / 420 (finishes Mon 00:23 -- 281/h) 20240811-23:49:14 keep c_quizzes model 1 validated 30 / 420 (7.14%) nb_accumulated 257 / 420 (finishes Mon 00:24 -- 278/h) 20240811-23:56:06 keep c_quizzes model 1 validated 28 / 420 (6.67%) nb_accumulated 285 / 420 (finishes Mon 00:25 -- 274/h) 20240812-00:03:06 keep c_quizzes model 4 validated 25 / 420 (5.95%) nb_accumulated 310 / 420 (finishes Mon 00:27 -- 268/h) 20240812-00:10:09 keep c_quizzes model 4 validated 31 / 420 (7.38%) nb_accumulated 341 / 420 (finishes Mon 00:27 -- 268/h) 20240812-00:17:03 keep c_quizzes model 3 validated 32 / 420 (7.62%) nb_accumulated 373 / 420 (finishes Mon 00:27 -- 269/h) 20240812-00:23:57 keep c_quizzes model 1 validated 26 / 420 (6.19%) nb_accumulated 399 / 420 (finishes Mon 00:28 -- 265/h) 20240812-00:30:54 keep c_quizzes model 2 validated 32 / 420 (7.62%) nb_accumulated 431 / 420 (finishes now! -- 266/h) 20240812-00:31:26 wrote c_quizzes.pth 20240812-00:31:26 training model 0 20240812-00:48:08 train_perplexity 234 model 0 1.1636217821578414 20240812-00:48:23 test_perplexity 234 model 0 1.1635974291009699 20240812-00:54:28 test_accuracy 234 model 0 val 1562 / 1639 20240812-00:54:32 wrote gpt_000.pth 20240812-00:55:33 wrote non_validated_0234_00.png 20240812-00:55:33 wrote state.pth 20240812-00:55:33 --- epoch 235 ---------------------------------------- 20240812-00:55:33 current_test_accuracies 0.9530 0.0000 0.0000 0.0000 0.0000 20240812-00:55:33 training model 1 20240812-01:12:14 train_perplexity 235 model 1 1.1632241162700212 20240812-01:12:30 test_perplexity 235 model 1 1.1620196814283914 20240812-01:18:49 test_accuracy 235 model 1 val 1490 / 1572 20240812-01:18:52 wrote gpt_001.pth 20240812-01:19:54 wrote non_validated_0235_01.png 20240812-01:19:54 wrote state.pth 20240812-01:19:54 --- epoch 236 ---------------------------------------- 20240812-01:19:54 current_test_accuracies 0.9530 0.9478 0.0000 0.0000 0.0000 20240812-01:19:54 training model 2 20240812-01:36:36 train_perplexity 236 model 2 1.1639926702674126 20240812-01:36:51 test_perplexity 236 model 2 1.1641194919660351 20240812-01:43:05 test_accuracy 236 model 2 val 1507 / 1607 20240812-01:43:08 wrote gpt_002.pth 20240812-01:44:11 wrote non_validated_0236_02.png 20240812-01:44:11 wrote state.pth 20240812-01:44:11 --- epoch 237 ---------------------------------------- 20240812-01:44:11 current_test_accuracies 0.9530 0.9478 0.9378 0.0000 0.0000 20240812-01:44:11 training model 3 20240812-02:00:54 train_perplexity 237 model 3 1.1644733099696074 20240812-02:01:09 test_perplexity 237 model 3 1.1646708536376713 20240812-02:07:08 test_accuracy 237 model 3 val 1567 / 1641 20240812-02:07:11 wrote gpt_003.pth 20240812-02:08:12 wrote non_validated_0237_03.png 20240812-02:08:12 wrote state.pth 20240812-02:08:12 --- epoch 238 ---------------------------------------- 20240812-02:08:12 current_test_accuracies 0.9530 0.9478 0.9378 0.9549 0.0000 20240812-02:08:12 training model 4 20240812-02:24:55 train_perplexity 238 model 4 1.1631499203627385 20240812-02:25:10 test_perplexity 238 model 4 1.1631419388926296 20240812-02:31:22 test_accuracy 238 model 4 val 1511 / 1598 20240812-02:31:25 wrote gpt_004.pth 20240812-02:32:26 wrote non_validated_0238_04.png 20240812-02:32:26 wrote state.pth 20240812-02:32:26 --- epoch 239 ---------------------------------------- 20240812-02:32:26 current_test_accuracies 0.9530 0.9478 0.9378 0.9549 0.9456 20240812-02:32:26 training model 2 20240812-02:49:08 train_perplexity 239 model 2 1.1638482007442004 20240812-02:49:24 test_perplexity 239 model 2 1.164916934571159 20240812-02:55:39 test_accuracy 239 model 2 val 1517 / 1601 20240812-02:55:42 wrote gpt_002.pth 20240812-02:56:43 wrote non_validated_0239_02.png 20240812-02:56:43 wrote state.pth 20240812-02:56:43 --- epoch 240 ---------------------------------------- 20240812-02:56:43 current_test_accuracies 0.9530 0.9478 0.9475 0.9549 0.9456 20240812-02:56:43 training model 4 20240812-03:13:24 train_perplexity 240 model 4 1.1631796070277498 20240812-03:13:40 test_perplexity 240 model 4 1.1658490230327283 20240812-03:19:45 test_accuracy 240 model 4 val 1539 / 1621 20240812-03:19:49 wrote gpt_004.pth 20240812-03:20:51 wrote non_validated_0240_04.png 20240812-03:20:51 wrote state.pth 20240812-03:20:51 --- epoch 241 ---------------------------------------- 20240812-03:20:51 current_test_accuracies 0.9530 0.9478 0.9475 0.9549 0.9494 20240812-03:20:51 training model 2 20240812-03:37:32 train_perplexity 241 model 2 1.1636710939517054 20240812-03:37:47 test_perplexity 241 model 2 1.1649629068625718 20240812-03:43:56 test_accuracy 241 model 2 val 1532 / 1617 20240812-03:43:59 wrote gpt_002.pth 20240812-03:45:01 wrote non_validated_0241_02.png 20240812-03:45:01 wrote state.pth 20240812-03:45:01 --- epoch 242 ---------------------------------------- 20240812-03:45:01 current_test_accuracies 0.9530 0.9478 0.9474 0.9549 0.9494 20240812-03:45:01 training model 2 20240812-04:01:42 train_perplexity 242 model 2 1.1634684287622834 20240812-04:01:57 test_perplexity 242 model 2 1.1631513871710648 20240812-04:08:08 test_accuracy 242 model 2 val 1538 / 1619 20240812-04:08:11 wrote gpt_002.pth 20240812-04:09:13 wrote non_validated_0242_02.png 20240812-04:09:13 wrote state.pth 20240812-04:09:13 --- epoch 243 ---------------------------------------- 20240812-04:09:13 current_test_accuracies 0.9530 0.9478 0.9500 0.9549 0.9494 20240812-04:09:13 training model 1 20240812-04:25:53 train_perplexity 243 model 1 1.1635410601630292 20240812-04:26:09 test_perplexity 243 model 1 1.1628463243416547 20240812-04:32:23 test_accuracy 243 model 1 val 1541 / 1607 20240812-04:32:26 wrote gpt_001.pth 20240812-04:33:27 wrote non_validated_0243_01.png 20240812-04:33:27 wrote state.pth 20240812-04:33:27 --- epoch 244 ---------------------------------------- 20240812-04:33:27 current_test_accuracies 0.9530 0.9589 0.9500 0.9549 0.9494 20240812-04:33:27 training model 4 20240812-04:50:08 train_perplexity 244 model 4 1.1637822456264098 20240812-04:50:23 test_perplexity 244 model 4 1.1652849830140493 20240812-04:56:39 test_accuracy 244 model 4 val 1510 / 1582 20240812-04:56:43 wrote gpt_004.pth 20240812-04:57:45 wrote non_validated_0244_04.png 20240812-04:57:45 wrote state.pth 20240812-04:57:45 --- epoch 245 ---------------------------------------- 20240812-04:57:45 current_test_accuracies 0.9530 0.9589 0.9500 0.9549 0.9545 20240812-04:57:45 training model 2 20240812-05:14:25 train_perplexity 245 model 2 1.1633416648236892 20240812-05:14:41 test_perplexity 245 model 2 1.1639861834141603 20240812-05:20:56 test_accuracy 245 model 2 val 1505 / 1586 20240812-05:20:59 wrote gpt_002.pth 20240812-05:22:01 wrote non_validated_0245_02.png 20240812-05:22:01 wrote state.pth 20240812-05:22:01 --- epoch 246 ---------------------------------------- 20240812-05:22:01 current_test_accuracies 0.9530 0.9589 0.9489 0.9549 0.9545 20240812-05:22:01 training model 2 20240812-05:38:41 train_perplexity 246 model 2 1.1632887496577888 20240812-05:38:57 test_perplexity 246 model 2 1.1644093003383431 20240812-05:45:12 test_accuracy 246 model 2 val 1498 / 1598 20240812-05:45:16 wrote gpt_002.pth 20240812-05:46:17 wrote non_validated_0246_02.png 20240812-05:46:17 wrote state.pth 20240812-05:46:17 --- epoch 247 ---------------------------------------- 20240812-05:46:17 current_test_accuracies 0.9530 0.9589 0.9374 0.9549 0.9545 20240812-05:46:17 training model 2 20240812-06:02:59 train_perplexity 247 model 2 1.163431590576144 20240812-06:03:15 test_perplexity 247 model 2 1.1644219834615548 20240812-06:09:21 test_accuracy 247 model 2 val 1546 / 1638 20240812-06:09:24 wrote gpt_002.pth 20240812-06:10:26 wrote non_validated_0247_02.png 20240812-06:10:26 wrote state.pth 20240812-06:10:26 --- epoch 248 ---------------------------------------- 20240812-06:10:26 current_test_accuracies 0.9530 0.9589 0.9438 0.9549 0.9545 20240812-06:10:26 training model 2 20240812-06:27:08 train_perplexity 248 model 2 1.1626166437728398 20240812-06:27:24 test_perplexity 248 model 2 1.1639097954110673 20240812-06:33:30 test_accuracy 248 model 2 val 1514 / 1611 20240812-06:33:33 wrote gpt_002.pth 20240812-06:34:35 wrote non_validated_0248_02.png 20240812-06:34:35 wrote state.pth 20240812-06:34:35 --- epoch 249 ---------------------------------------- 20240812-06:34:35 current_test_accuracies 0.9530 0.9589 0.9398 0.9549 0.9545 20240812-06:34:35 training model 2 20240812-06:51:16 train_perplexity 249 model 2 1.1627256620750672 20240812-06:51:32 test_perplexity 249 model 2 1.1632353669165822 20240812-06:57:39 test_accuracy 249 model 2 val 1507 / 1601 20240812-06:57:42 wrote gpt_002.pth 20240812-06:58:42 wrote non_validated_0249_02.png 20240812-06:58:42 wrote state.pth 20240812-06:58:42 --- epoch 250 ---------------------------------------- 20240812-06:58:42 current_test_accuracies 0.9530 0.9589 0.9413 0.9549 0.9545 20240812-06:58:42 training model 2 20240812-07:15:23 train_perplexity 250 model 2 1.1626332372937938 20240812-07:15:39 test_perplexity 250 model 2 1.1612147093956777 20240812-07:21:55 test_accuracy 250 model 2 val 1509 / 1587 20240812-07:21:59 wrote gpt_002.pth 20240812-07:23:00 wrote non_validated_0250_02.png 20240812-07:23:00 wrote state.pth 20240812-07:23:00 --- epoch 251 ---------------------------------------- 20240812-07:23:00 current_test_accuracies 0.9530 0.9589 0.9509 0.9549 0.9545 20240812-07:30:06 keep c_quizzes model 2 validated 33 / 420 (7.86%) nb_accumulated 33 / 420 (finishes Mon 08:53 -- 278/h) 20240812-07:37:04 keep c_quizzes model 0 validated 37 / 420 (8.81%) nb_accumulated 70 / 420 (finishes Mon 08:47 -- 298/h) 20240812-07:44:00 keep c_quizzes model 2 validated 28 / 420 (6.67%) nb_accumulated 98 / 420 (finishes Mon 08:52 -- 280/h) 20240812-07:50:56 keep c_quizzes model 1 validated 30 / 420 (7.14%) nb_accumulated 128 / 420 (finishes Mon 08:54 -- 275/h) 20240812-07:57:56 keep c_quizzes model 3 validated 45 / 420 (10.71%) nb_accumulated 173 / 420 (finishes Mon 08:47 -- 297/h) 20240812-08:04:57 keep c_quizzes model 2 validated 21 / 420 (5.00%) nb_accumulated 194 / 420 (finishes Mon 08:53 -- 277/h) 20240812-08:11:56 keep c_quizzes model 0 validated 31 / 420 (7.38%) nb_accumulated 225 / 420 (finishes Mon 08:54 -- 275/h) 20240812-08:18:58 keep c_quizzes model 0 validated 31 / 420 (7.38%) nb_accumulated 256 / 420 (finishes Mon 08:54 -- 274/h) 20240812-08:25:59 keep c_quizzes model 1 validated 34 / 420 (8.10%) nb_accumulated 290 / 420 (finishes Mon 08:54 -- 276/h) 20240812-08:32:57 keep c_quizzes model 4 validated 30 / 420 (7.14%) nb_accumulated 320 / 420 (finishes Mon 08:54 -- 274/h) 20240812-08:39:54 keep c_quizzes model 4 validated 33 / 420 (7.86%) nb_accumulated 353 / 420 (finishes Mon 08:54 -- 275/h) 20240812-08:47:02 keep c_quizzes model 4 validated 27 / 420 (6.43%) nb_accumulated 380 / 420 (finishes Mon 08:55 -- 271/h) 20240812-08:54:03 keep c_quizzes model 1 validated 34 / 420 (8.10%) nb_accumulated 414 / 420 (finishes Mon 08:55 -- 272/h) 20240812-09:01:00 keep c_quizzes model 3 validated 25 / 420 (5.95%) nb_accumulated 439 / 420 (finishes now! -- 268/h) 20240812-09:01:32 wrote c_quizzes.pth 20240812-09:01:32 training model 0 20240812-09:18:13 train_perplexity 251 model 0 1.1638424900034485 20240812-09:18:29 test_perplexity 251 model 0 1.1643157947695457 20240812-09:24:37 test_accuracy 251 model 0 val 1518 / 1604 20240812-09:24:41 wrote gpt_000.pth 20240812-09:25:43 wrote non_validated_0251_00.png 20240812-09:25:43 wrote state.pth 20240812-09:25:43 --- epoch 252 ---------------------------------------- 20240812-09:25:43 current_test_accuracies 0.9464 0.0000 0.0000 0.0000 0.0000 20240812-09:25:43 training model 1 20240812-09:42:26 train_perplexity 252 model 1 1.1641895305934178 20240812-09:42:42 test_perplexity 252 model 1 1.1642211114333556 20240812-09:48:58 test_accuracy 252 model 1 val 1490 / 1586 20240812-09:49:02 wrote gpt_001.pth 20240812-09:50:04 wrote non_validated_0252_01.png 20240812-09:50:04 wrote state.pth 20240812-09:50:04 --- epoch 253 ---------------------------------------- 20240812-09:50:04 current_test_accuracies 0.9464 0.9395 0.0000 0.0000 0.0000 20240812-09:50:04 training model 2 20240812-10:06:47 train_perplexity 253 model 2 1.1628752588668974 20240812-10:07:03 test_perplexity 253 model 2 1.1637272503949347 20240812-10:13:15 test_accuracy 253 model 2 val 1538 / 1606 20240812-10:13:19 wrote gpt_002.pth 20240812-10:14:21 wrote non_validated_0253_02.png 20240812-10:14:21 wrote state.pth 20240812-10:14:21 --- epoch 254 ---------------------------------------- 20240812-10:14:21 current_test_accuracies 0.9464 0.9395 0.9577 0.0000 0.0000 20240812-10:14:21 training model 3 20240812-10:31:04 train_perplexity 254 model 3 1.1642884001867484 20240812-10:31:20 test_perplexity 254 model 3 1.1649133635623505 20240812-10:37:36 test_accuracy 254 model 3 val 1508 / 1584 20240812-10:37:39 wrote gpt_003.pth 20240812-10:38:41 wrote non_validated_0254_03.png 20240812-10:38:41 wrote state.pth 20240812-10:38:41 --- epoch 255 ---------------------------------------- 20240812-10:38:41 current_test_accuracies 0.9464 0.9395 0.9577 0.9520 0.0000 20240812-10:38:41 training model 4 20240812-10:55:24 train_perplexity 255 model 4 1.1637539055493755 20240812-10:55:40 test_perplexity 255 model 4 1.1631937072799816 20240812-11:02:00 test_accuracy 255 model 4 val 1505 / 1584 20240812-11:02:03 wrote gpt_004.pth 20240812-11:03:05 wrote non_validated_0255_04.png 20240812-11:03:05 wrote state.pth 20240812-11:03:05 --- epoch 256 ---------------------------------------- 20240812-11:03:05 current_test_accuracies 0.9464 0.9395 0.9577 0.9520 0.9501 20240812-11:03:05 training model 1 20240812-11:19:47 train_perplexity 256 model 1 1.164216281442183 20240812-11:20:03 test_perplexity 256 model 1 1.1632670401344398 20240812-11:26:19 test_accuracy 256 model 1 val 1520 / 1592 20240812-11:26:22 wrote gpt_001.pth 20240812-11:27:24 wrote non_validated_0256_01.png 20240812-11:27:24 wrote state.pth 20240812-11:27:24 --- epoch 257 ---------------------------------------- 20240812-11:27:24 current_test_accuracies 0.9464 0.9548 0.9577 0.9520 0.9501 20240812-11:27:24 training model 0 20240812-11:44:06 train_perplexity 257 model 0 1.1640102708343167 20240812-11:44:21 test_perplexity 257 model 0 1.1632357185711875 20240812-11:50:30 test_accuracy 257 model 0 val 1515 / 1601 20240812-11:50:33 wrote gpt_000.pth 20240812-11:51:35 wrote non_validated_0257_00.png 20240812-11:51:35 wrote state.pth 20240812-11:51:35 --- epoch 258 ---------------------------------------- 20240812-11:51:35 current_test_accuracies 0.9463 0.9548 0.9577 0.9520 0.9501 20240812-11:51:35 training model 0 20240812-12:08:15 train_perplexity 258 model 0 1.164088942679034 20240812-12:08:31 test_perplexity 258 model 0 1.1651173021147296 20240812-12:14:50 test_accuracy 258 model 0 val 1496 / 1587 20240812-12:14:53 wrote gpt_000.pth 20240812-12:15:54 wrote non_validated_0258_00.png 20240812-12:15:54 wrote state.pth 20240812-12:15:54 --- epoch 259 ---------------------------------------- 20240812-12:15:54 current_test_accuracies 0.9427 0.9548 0.9577 0.9520 0.9501 20240812-12:15:54 training model 0 20240812-12:32:35 train_perplexity 259 model 0 1.1637961526160001 20240812-12:32:51 test_perplexity 259 model 0 1.1635733856401906 20240812-12:39:10 test_accuracy 259 model 0 val 1474 / 1578 20240812-12:39:13 wrote gpt_000.pth 20240812-12:40:15 wrote non_validated_0259_00.png 20240812-12:40:15 wrote state.pth 20240812-12:40:15 --- epoch 260 ---------------------------------------- 20240812-12:40:15 current_test_accuracies 0.9341 0.9548 0.9577 0.9520 0.9501 20240812-12:40:15 training model 0 20240812-12:56:56 train_perplexity 260 model 0 1.1636406298172488 20240812-12:57:12 test_perplexity 260 model 0 1.1633846571247932 20240812-13:03:19 test_accuracy 260 model 0 val 1526 / 1612 20240812-13:03:22 wrote gpt_000.pth 20240812-13:04:24 wrote non_validated_0260_00.png 20240812-13:04:24 wrote state.pth 20240812-13:04:24 --- epoch 261 ---------------------------------------- 20240812-13:04:24 current_test_accuracies 0.9467 0.9548 0.9577 0.9520 0.9501 20240812-13:04:24 training model 0 20240812-13:21:06 train_perplexity 261 model 0 1.163719150991025 20240812-13:21:21 test_perplexity 261 model 0 1.164268339739427 20240812-13:27:36 test_accuracy 261 model 0 val 1505 / 1589 20240812-13:27:40 wrote gpt_000.pth 20240812-13:28:42 wrote non_validated_0261_00.png 20240812-13:28:42 wrote state.pth 20240812-13:28:42 --- epoch 262 ---------------------------------------- 20240812-13:28:42 current_test_accuracies 0.9471 0.9548 0.9577 0.9520 0.9501 20240812-13:28:42 training model 0 20240812-13:45:23 train_perplexity 262 model 0 1.1633854898515612 20240812-13:45:39 test_perplexity 262 model 0 1.1628955402950338 20240812-13:51:47 test_accuracy 262 model 0 val 1517 / 1627 20240812-13:51:51 wrote gpt_000.pth 20240812-13:52:52 wrote non_validated_0262_00.png 20240812-13:52:52 wrote state.pth 20240812-13:52:52 --- epoch 263 ---------------------------------------- 20240812-13:52:52 current_test_accuracies 0.9324 0.9548 0.9577 0.9520 0.9501 20240812-13:52:52 training model 0 20240812-14:09:33 train_perplexity 263 model 0 1.163686461287156 20240812-14:09:49 test_perplexity 263 model 0 1.1641781790015213 20240812-14:15:58 test_accuracy 263 model 0 val 1523 / 1602 20240812-14:16:02 wrote gpt_000.pth 20240812-14:17:04 wrote non_validated_0263_00.png 20240812-14:17:04 wrote state.pth 20240812-14:17:04 --- epoch 264 ---------------------------------------- 20240812-14:17:04 current_test_accuracies 0.9507 0.9548 0.9577 0.9520 0.9501 20240812-14:24:11 keep c_quizzes model 2 validated 49 / 420 (11.67%) nb_accumulated 49 / 420 (finishes Mon 15:18 -- 412/h) 20240812-14:31:07 keep c_quizzes model 3 validated 33 / 420 (7.86%) nb_accumulated 82 / 420 (finishes Mon 15:29 -- 349/h) 20240812-14:38:07 keep c_quizzes model 1 validated 27 / 420 (6.43%) nb_accumulated 109 / 420 (finishes Mon 15:38 -- 310/h) 20240812-14:45:05 keep c_quizzes model 1 validated 31 / 420 (7.38%) nb_accumulated 140 / 420 (finishes Mon 15:41 -- 299/h) 20240812-14:52:09 keep c_quizzes model 4 validated 28 / 420 (6.67%) nb_accumulated 168 / 420 (finishes Mon 15:44 -- 287/h) 20240812-14:59:10 keep c_quizzes model 1 validated 38 / 420 (9.05%) nb_accumulated 206 / 420 (finishes Mon 15:42 -- 293/h) 20240812-15:06:11 keep c_quizzes model 4 validated 33 / 420 (7.86%) nb_accumulated 239 / 420 (finishes Mon 15:43 -- 291/h) 20240812-15:13:10 keep c_quizzes model 0 validated 25 / 420 (5.95%) nb_accumulated 264 / 420 (finishes Mon 15:46 -- 282/h) 20240812-15:20:14 keep c_quizzes model 0 validated 29 / 420 (6.90%) nb_accumulated 293 / 420 (finishes Mon 15:47 -- 278/h) 20240812-15:27:10 keep c_quizzes model 2 validated 32 / 420 (7.62%) nb_accumulated 325 / 420 (finishes Mon 15:47 -- 278/h) 20240812-15:34:08 keep c_quizzes model 1 validated 37 / 420 (8.81%) nb_accumulated 362 / 420 (finishes Mon 15:46 -- 281/h) 20240812-15:41:06 keep c_quizzes model 1 validated 33 / 420 (7.86%) nb_accumulated 395 / 420 (finishes Mon 15:46 -- 281/h) 20240812-15:48:09 keep c_quizzes model 0 validated 27 / 420 (6.43%) nb_accumulated 422 / 420 (finishes now! -- 277/h) 20240812-15:48:40 wrote c_quizzes.pth 20240812-15:48:40 training model 0 20240812-16:05:23 train_perplexity 264 model 0 1.163726501188641 20240812-16:05:38 test_perplexity 264 model 0 1.1670117133294242 20240812-16:12:01 test_accuracy 264 model 0 val 1506 / 1594 20240812-16:12:04 wrote gpt_000.pth 20240812-16:13:06 wrote non_validated_0264_00.png 20240812-16:13:06 wrote state.pth 20240812-16:13:06 --- epoch 265 ---------------------------------------- 20240812-16:13:06 current_test_accuracies 0.9448 0.0000 0.0000 0.0000 0.0000 20240812-16:13:06 training model 1 20240812-16:29:48 train_perplexity 265 model 1 1.1647422161490577 20240812-16:30:03 test_perplexity 265 model 1 1.1639383136807784 20240812-16:36:13 test_accuracy 265 model 1 val 1526 / 1618 20240812-16:36:17 wrote gpt_001.pth 20240812-16:37:18 wrote non_validated_0265_01.png 20240812-16:37:19 wrote state.pth 20240812-16:37:19 --- epoch 266 ---------------------------------------- 20240812-16:37:19 current_test_accuracies 0.9448 0.9431 0.0000 0.0000 0.0000 20240812-16:37:19 training model 2 20240812-16:54:02 train_perplexity 266 model 2 1.1641764810688897 20240812-16:54:17 test_perplexity 266 model 2 1.1645568673609732 20240812-17:00:27 test_accuracy 266 model 2 val 1524 / 1615 20240812-17:00:31 wrote gpt_002.pth 20240812-17:01:33 wrote non_validated_0266_02.png 20240812-17:01:33 wrote state.pth 20240812-17:01:33 --- epoch 267 ---------------------------------------- 20240812-17:01:33 current_test_accuracies 0.9448 0.9431 0.9437 0.0000 0.0000 20240812-17:01:33 training model 3 20240812-17:18:16 train_perplexity 267 model 3 1.1652073675191812 20240812-17:18:31 test_perplexity 267 model 3 1.1653024188273597 20240812-17:24:49 test_accuracy 267 model 3 val 1519 / 1609 20240812-17:24:52 wrote gpt_003.pth 20240812-17:25:54 wrote non_validated_0267_03.png 20240812-17:25:54 wrote state.pth 20240812-17:25:54 --- epoch 268 ---------------------------------------- 20240812-17:25:54 current_test_accuracies 0.9448 0.9431 0.9437 0.9441 0.0000 20240812-17:25:54 training model 4 20240812-17:42:38 train_perplexity 268 model 4 1.16461537889965 20240812-17:42:53 test_perplexity 268 model 4 1.1661442789350311 20240812-17:49:13 test_accuracy 268 model 4 val 1455 / 1569 20240812-17:49:17 wrote gpt_004.pth 20240812-17:50:19 wrote non_validated_0268_04.png 20240812-17:50:19 wrote state.pth 20240812-17:50:19 --- epoch 269 ---------------------------------------- 20240812-17:50:19 current_test_accuracies 0.9448 0.9431 0.9437 0.9441 0.9273 20240812-17:50:19 training model 4 20240812-18:07:02 train_perplexity 269 model 4 1.1643268237343691 20240812-18:07:18 test_perplexity 269 model 4 1.1674899611836822 20240812-18:13:30 test_accuracy 269 model 4 val 1514 / 1604 20240812-18:13:34 wrote gpt_004.pth 20240812-18:14:35 wrote non_validated_0269_04.png 20240812-18:14:35 wrote state.pth 20240812-18:14:35 --- epoch 270 ---------------------------------------- 20240812-18:14:35 current_test_accuracies 0.9448 0.9431 0.9437 0.9441 0.9439 20240812-18:14:35 training model 1 20240812-18:31:18 train_perplexity 270 model 1 1.1644000639248697 20240812-18:31:34 test_perplexity 270 model 1 1.1645220585206184 20240812-18:37:36 test_accuracy 270 model 1 val 1533 / 1622 20240812-18:37:39 wrote gpt_001.pth 20240812-18:38:41 wrote non_validated_0270_01.png 20240812-18:38:41 wrote state.pth 20240812-18:38:41 --- epoch 271 ---------------------------------------- 20240812-18:38:41 current_test_accuracies 0.9448 0.9451 0.9437 0.9441 0.9439 20240812-18:38:41 training model 2 20240812-18:55:24 train_perplexity 271 model 2 1.16389850673934 20240812-18:55:40 test_perplexity 271 model 2 1.1665377559740253 20240812-19:02:02 test_accuracy 271 model 2 val 1501 / 1598 20240812-19:02:05 wrote gpt_002.pth 20240812-19:03:06 wrote non_validated_0271_02.png 20240812-19:03:06 wrote state.pth 20240812-19:03:06 --- epoch 272 ---------------------------------------- 20240812-19:03:06 current_test_accuracies 0.9448 0.9451 0.9393 0.9441 0.9439 20240812-19:03:06 training model 2 20240812-19:19:50 train_perplexity 272 model 2 1.1637494101616939 20240812-19:20:06 test_perplexity 272 model 2 1.1672355763375624 20240812-19:26:17 test_accuracy 272 model 2 val 1483 / 1596 20240812-19:26:20 wrote gpt_002.pth 20240812-19:27:22 wrote non_validated_0272_02.png 20240812-19:27:22 wrote state.pth 20240812-19:27:22 --- epoch 273 ---------------------------------------- 20240812-19:27:22 current_test_accuracies 0.9448 0.9451 0.9292 0.9441 0.9439 20240812-19:27:22 training model 2 20240812-19:44:05 train_perplexity 273 model 2 1.1637637814593265 20240812-19:44:21 test_perplexity 273 model 2 1.1661867111956326 20240812-19:50:25 test_accuracy 273 model 2 val 1543 / 1610 20240812-19:50:29 wrote gpt_002.pth 20240812-19:51:30 wrote non_validated_0273_02.png 20240812-19:51:30 wrote state.pth 20240812-19:51:30 --- epoch 274 ---------------------------------------- 20240812-19:51:30 current_test_accuracies 0.9448 0.9451 0.9584 0.9441 0.9439 20240812-19:51:30 training model 4 20240812-20:08:13 train_perplexity 274 model 4 1.1642514024598825 20240812-20:08:28 test_perplexity 274 model 4 1.1661084802822328 20240812-20:14:41 test_accuracy 274 model 4 val 1531 / 1608 20240812-20:14:44 wrote gpt_004.pth 20240812-20:15:46 wrote non_validated_0274_04.png 20240812-20:15:46 wrote state.pth 20240812-20:15:46 --- epoch 275 ---------------------------------------- 20240812-20:15:46 current_test_accuracies 0.9448 0.9451 0.9584 0.9441 0.9521 20240812-20:15:46 training model 3 20240812-20:32:27 train_perplexity 275 model 3 1.1651460988794713 20240812-20:32:43 test_perplexity 275 model 3 1.1668095073461626 20240812-20:38:55 test_accuracy 275 model 3 val 1508 / 1604 20240812-20:38:58 wrote gpt_003.pth 20240812-20:40:00 wrote non_validated_0275_03.png 20240812-20:40:00 wrote state.pth 20240812-20:40:00 --- epoch 276 ---------------------------------------- 20240812-20:40:00 current_test_accuracies 0.9448 0.9451 0.9584 0.9401 0.9521 20240812-20:40:00 training model 3 20240812-20:56:43 train_perplexity 276 model 3 1.1652122049687998 20240812-20:56:59 test_perplexity 276 model 3 1.1650749167945367 20240812-21:03:12 test_accuracy 276 model 3 val 1501 / 1597 20240812-21:03:15 wrote gpt_003.pth 20240812-21:04:15 wrote non_validated_0276_03.png 20240812-21:04:16 wrote state.pth 20240812-21:04:16 --- epoch 277 ---------------------------------------- 20240812-21:04:16 current_test_accuracies 0.9448 0.9451 0.9584 0.9399 0.9521 20240812-21:04:16 training model 3 20240812-21:20:58 train_perplexity 277 model 3 1.1648414105642848 20240812-21:21:14 test_perplexity 277 model 3 1.1649702110713598 20240812-21:27:29 test_accuracy 277 model 3 val 1490 / 1582 20240812-21:27:33 wrote gpt_003.pth 20240812-21:28:33 wrote non_validated_0277_03.png 20240812-21:28:33 wrote state.pth 20240812-21:28:33 --- epoch 278 ---------------------------------------- 20240812-21:28:33 current_test_accuracies 0.9448 0.9451 0.9584 0.9418 0.9521 20240812-21:28:33 training model 3 20240812-21:45:16 train_perplexity 278 model 3 1.1648834963654902 20240812-21:45:31 test_perplexity 278 model 3 1.1661089918861316 20240812-21:51:45 test_accuracy 278 model 3 val 1505 / 1598 20240812-21:51:48 wrote gpt_003.pth 20240812-21:52:50 wrote non_validated_0278_03.png 20240812-21:52:50 wrote state.pth 20240812-21:52:50 --- epoch 279 ---------------------------------------- 20240812-21:52:50 current_test_accuracies 0.9448 0.9451 0.9584 0.9418 0.9521 20240812-21:52:50 training model 3 20240812-22:09:33 train_perplexity 279 model 3 1.1640895458851308 20240812-22:09:48 test_perplexity 279 model 3 1.1653270150505528 20240812-22:15:59 test_accuracy 279 model 3 val 1528 / 1611 20240812-22:16:02 wrote gpt_003.pth 20240812-22:17:04 wrote non_validated_0279_03.png 20240812-22:17:04 wrote state.pth 20240812-22:17:04 --- epoch 280 ---------------------------------------- 20240812-22:17:04 current_test_accuracies 0.9448 0.9451 0.9584 0.9485 0.9521 20240812-22:17:04 training model 0 20240812-22:33:46 train_perplexity 280 model 0 1.163846795035508 20240812-22:34:02 test_perplexity 280 model 0 1.1634141997532286 20240812-22:40:15 test_accuracy 280 model 0 val 1499 / 1580 20240812-22:40:18 wrote gpt_000.pth 20240812-22:41:20 wrote non_validated_0280_00.png 20240812-22:41:20 wrote state.pth 20240812-22:41:20 --- epoch 281 ---------------------------------------- 20240812-22:41:20 current_test_accuracies 0.9487 0.9451 0.9584 0.9485 0.9521 20240812-22:41:20 training model 1 20240812-22:58:02 train_perplexity 281 model 1 1.1639440724059287 20240812-22:58:18 test_perplexity 281 model 1 1.1662217741602317 20240812-23:04:22 test_accuracy 281 model 1 val 1545 / 1629 20240812-23:04:25 wrote gpt_001.pth 20240812-23:05:25 wrote non_validated_0281_01.png 20240812-23:05:25 wrote state.pth 20240812-23:05:25 --- epoch 282 ---------------------------------------- 20240812-23:05:25 current_test_accuracies 0.9487 0.9484 0.9584 0.9485 0.9521 20240812-23:05:25 training model 1 20240812-23:22:08 train_perplexity 282 model 1 1.1641660541891483 20240812-23:22:23 test_perplexity 282 model 1 1.1637393318972125 20240812-23:28:46 test_accuracy 282 model 1 val 1482 / 1575 20240812-23:28:50 wrote gpt_001.pth 20240812-23:29:51 wrote non_validated_0282_01.png 20240812-23:29:51 wrote state.pth 20240812-23:29:51 --- epoch 283 ---------------------------------------- 20240812-23:29:51 current_test_accuracies 0.9487 0.9410 0.9584 0.9485 0.9521 20240812-23:29:51 training model 1 20240812-23:46:34 train_perplexity 283 model 1 1.1639364777838732 20240812-23:46:49 test_perplexity 283 model 1 1.1645886979506703 20240812-23:52:55 test_accuracy 283 model 1 val 1541 / 1619 20240812-23:52:59 wrote gpt_001.pth 20240812-23:54:01 wrote non_validated_0283_01.png 20240812-23:54:01 wrote state.pth 20240812-23:54:01 --- epoch 284 ---------------------------------------- 20240812-23:54:01 current_test_accuracies 0.9487 0.9518 0.9584 0.9485 0.9521 20240812-23:54:01 training model 3 20240813-00:10:43 train_perplexity 284 model 3 1.164051155102298 20240813-00:10:59 test_perplexity 284 model 3 1.164069745506608 20240813-00:17:01 test_accuracy 284 model 3 val 1526 / 1621 20240813-00:17:05 wrote gpt_003.pth 20240813-00:18:06 wrote non_validated_0284_03.png 20240813-00:18:06 wrote state.pth 20240813-00:18:06 --- epoch 285 ---------------------------------------- 20240813-00:18:06 current_test_accuracies 0.9487 0.9518 0.9584 0.9414 0.9521 20240813-00:18:06 training model 3 20240813-00:34:49 train_perplexity 285 model 3 1.165099636266516 20240813-00:35:04 test_perplexity 285 model 3 1.163447361905789 20240813-00:41:24 test_accuracy 285 model 3 val 1477 / 1568 20240813-00:41:27 wrote gpt_003.pth 20240813-00:42:30 wrote non_validated_0285_03.png 20240813-00:42:30 wrote state.pth 20240813-00:42:30 --- epoch 286 ---------------------------------------- 20240813-00:42:30 current_test_accuracies 0.9487 0.9518 0.9584 0.9420 0.9521 20240813-00:42:30 training model 3 20240813-00:59:12 train_perplexity 286 model 3 1.1644747171985015 20240813-00:59:28 test_perplexity 286 model 3 1.1659870459762314 20240813-01:05:35 test_accuracy 286 model 3 val 1517 / 1602 20240813-01:05:39 wrote gpt_003.pth 20240813-01:06:41 wrote non_validated_0286_03.png 20240813-01:06:41 wrote state.pth 20240813-01:06:41 --- epoch 287 ---------------------------------------- 20240813-01:06:41 current_test_accuracies 0.9487 0.9518 0.9584 0.9469 0.9521 20240813-01:06:41 training model 3 20240813-01:23:22 train_perplexity 287 model 3 1.1640932780160929 20240813-01:23:38 test_perplexity 287 model 3 1.1657315836968738 20240813-01:29:44 test_accuracy 287 model 3 val 1514 / 1610 20240813-01:29:48 wrote gpt_003.pth 20240813-01:30:50 wrote non_validated_0287_03.png 20240813-01:30:50 wrote state.pth 20240813-01:30:50 --- epoch 288 ---------------------------------------- 20240813-01:30:50 current_test_accuracies 0.9487 0.9518 0.9584 0.9404 0.9521 20240813-01:30:50 training model 3 20240813-01:47:31 train_perplexity 288 model 3 1.164014990707045 20240813-01:47:47 test_perplexity 288 model 3 1.16594530625193 20240813-01:53:58 test_accuracy 288 model 3 val 1539 / 1626 20240813-01:54:01 wrote gpt_003.pth 20240813-01:55:03 wrote non_validated_0288_03.png 20240813-01:55:03 wrote state.pth 20240813-01:55:03 --- epoch 289 ---------------------------------------- 20240813-01:55:03 current_test_accuracies 0.9487 0.9518 0.9584 0.9465 0.9521 20240813-01:55:03 training model 3 20240813-02:11:44 train_perplexity 289 model 3 1.1639293656946972 20240813-02:12:00 test_perplexity 289 model 3 1.1634545326106145 20240813-02:18:14 test_accuracy 289 model 3 val 1498 / 1595 20240813-02:18:18 wrote gpt_003.pth 20240813-02:19:20 wrote non_validated_0289_03.png 20240813-02:19:20 wrote state.pth 20240813-02:19:20 --- epoch 290 ---------------------------------------- 20240813-02:19:20 current_test_accuracies 0.9487 0.9518 0.9584 0.9392 0.9521 20240813-02:19:20 training model 3 20240813-02:36:01 train_perplexity 290 model 3 1.1637252555334787 20240813-02:36:17 test_perplexity 290 model 3 1.1643624675410738 20240813-02:42:30 test_accuracy 290 model 3 val 1512 / 1595 20240813-02:42:33 wrote gpt_003.pth 20240813-02:43:35 wrote non_validated_0290_03.png 20240813-02:43:35 wrote state.pth 20240813-02:43:35 --- epoch 291 ---------------------------------------- 20240813-02:43:35 current_test_accuracies 0.9487 0.9518 0.9584 0.9480 0.9521 20240813-02:43:35 training model 3 20240813-03:00:16 train_perplexity 291 model 3 1.1634663267229044 20240813-03:00:32 test_perplexity 291 model 3 1.1652168494955544 20240813-03:06:34 test_accuracy 291 model 3 val 1529 / 1621 20240813-03:06:37 wrote gpt_003.pth 20240813-03:07:38 wrote non_validated_0291_03.png 20240813-03:07:38 wrote state.pth 20240813-03:07:38 --- epoch 292 ---------------------------------------- 20240813-03:07:38 current_test_accuracies 0.9487 0.9518 0.9584 0.9432 0.9521 20240813-03:07:38 training model 3 20240813-03:24:19 train_perplexity 292 model 3 1.163930143102714 20240813-03:24:34 test_perplexity 292 model 3 1.165091943993258 20240813-03:30:46 test_accuracy 292 model 3 val 1526 / 1606 20240813-03:30:49 wrote gpt_003.pth 20240813-03:31:51 wrote non_validated_0292_03.png 20240813-03:31:51 wrote state.pth 20240813-03:31:51 --- epoch 293 ---------------------------------------- 20240813-03:31:51 current_test_accuracies 0.9487 0.9518 0.9584 0.9502 0.9521 20240813-03:31:51 training model 0 20240813-03:48:34 train_perplexity 293 model 0 1.1636905630724839 20240813-03:48:49 test_perplexity 293 model 0 1.165741915476437 20240813-03:55:01 test_accuracy 293 model 0 val 1507 / 1591 20240813-03:55:05 wrote gpt_000.pth 20240813-03:56:07 wrote non_validated_0293_00.png 20240813-03:56:07 wrote state.pth 20240813-03:56:07 --- epoch 294 ---------------------------------------- 20240813-03:56:07 current_test_accuracies 0.9472 0.9518 0.9584 0.9502 0.9521 20240813-03:56:07 training model 0 20240813-04:12:50 train_perplexity 294 model 0 1.1638593562443147 20240813-04:13:06 test_perplexity 294 model 0 1.1634954345370085 20240813-04:19:14 test_accuracy 294 model 0 val 1529 / 1613 20240813-04:19:18 wrote gpt_000.pth 20240813-04:20:20 wrote non_validated_0294_00.png 20240813-04:20:20 wrote state.pth 20240813-04:20:20 --- epoch 295 ---------------------------------------- 20240813-04:20:20 current_test_accuracies 0.9479 0.9518 0.9584 0.9502 0.9521 20240813-04:20:20 training model 0 20240813-04:37:02 train_perplexity 295 model 0 1.1632978610975269 20240813-04:37:17 test_perplexity 295 model 0 1.1640476544693017 20240813-04:43:37 test_accuracy 295 model 0 val 1506 / 1570 20240813-04:43:41 wrote gpt_000.pth 20240813-04:44:43 wrote non_validated_0295_00.png 20240813-04:44:43 wrote state.pth 20240813-04:44:43 --- epoch 296 ---------------------------------------- 20240813-04:44:43 current_test_accuracies 0.9592 0.9518 0.9584 0.9502 0.9521 20240813-04:51:46 keep c_quizzes model 3 validated 49 / 420 (11.67%) nb_accumulated 49 / 420 (finishes Tue 05:45 -- 416/h) 20240813-04:58:41 keep c_quizzes model 0 validated 40 / 420 (9.52%) nb_accumulated 89 / 420 (finishes Tue 05:50 -- 382/h) 20240813-05:05:40 keep c_quizzes model 3 validated 50 / 420 (11.90%) nb_accumulated 139 / 420 (finishes Tue 05:48 -- 398/h) 20240813-05:12:33 keep c_quizzes model 4 validated 37 / 420 (8.81%) nb_accumulated 176 / 420 (finishes Tue 05:51 -- 379/h) 20240813-05:19:28 keep c_quizzes model 2 validated 37 / 420 (8.81%) nb_accumulated 213 / 420 (finishes Tue 05:53 -- 367/h) 20240813-05:26:22 keep c_quizzes model 1 validated 39 / 420 (9.29%) nb_accumulated 252 / 420 (finishes Tue 05:54 -- 362/h) 20240813-05:33:16 keep c_quizzes model 2 validated 24 / 420 (5.71%) nb_accumulated 276 / 420 (finishes Tue 05:58 -- 341/h) 20240813-05:40:13 keep c_quizzes model 1 validated 28 / 420 (6.67%) nb_accumulated 304 / 420 (finishes Tue 06:01 -- 328/h) 20240813-05:47:09 keep c_quizzes model 2 validated 35 / 420 (8.33%) nb_accumulated 339 / 420 (finishes Tue 06:02 -- 325/h) 20240813-05:54:01 keep c_quizzes model 0 validated 32 / 420 (7.62%) nb_accumulated 371 / 420 (finishes Tue 06:03 -- 321/h) 20240813-06:00:55 keep c_quizzes model 2 validated 42 / 420 (10.00%) nb_accumulated 413 / 420 (finishes Tue 06:02 -- 325/h) 20240813-06:07:50 keep c_quizzes model 3 validated 33 / 420 (7.86%) nb_accumulated 446 / 420 (finishes now! -- 321/h) 20240813-06:08:22 wrote c_quizzes.pth 20240813-06:08:22 training model 0 20240813-06:25:03 train_perplexity 296 model 0 1.1641987420169373 20240813-06:25:19 test_perplexity 296 model 0 1.1654344198297915 20240813-06:31:35 test_accuracy 296 model 0 val 1489 / 1589 20240813-06:31:39 wrote gpt_000.pth 20240813-06:32:41 wrote non_validated_0296_00.png 20240813-06:32:41 wrote state.pth 20240813-06:32:41 --- epoch 297 ---------------------------------------- 20240813-06:32:41 current_test_accuracies 0.9371 0.0000 0.0000 0.0000 0.0000 20240813-06:32:41 training model 1 20240813-06:49:22 train_perplexity 297 model 1 1.1651817284970634 20240813-06:49:37 test_perplexity 297 model 1 1.1658233788492336 20240813-06:55:56 test_accuracy 297 model 1 val 1491 / 1576 20240813-06:56:00 wrote gpt_001.pth 20240813-06:57:01 wrote non_validated_0297_01.png 20240813-06:57:01 wrote state.pth 20240813-06:57:01 --- epoch 298 ---------------------------------------- 20240813-06:57:01 current_test_accuracies 0.9371 0.9461 0.0000 0.0000 0.0000 20240813-06:57:01 training model 2 20240813-07:13:42 train_perplexity 298 model 2 1.164835846540978 20240813-07:13:58 test_perplexity 298 model 2 1.16620629527856 20240813-07:20:03 test_accuracy 298 model 2 val 1519 / 1610 20240813-07:20:06 wrote gpt_002.pth 20240813-07:21:08 wrote non_validated_0298_02.png 20240813-07:21:08 wrote state.pth 20240813-07:21:08 --- epoch 299 ---------------------------------------- 20240813-07:21:08 current_test_accuracies 0.9371 0.9461 0.9435 0.0000 0.0000 20240813-07:21:08 training model 3 20240813-07:37:48 train_perplexity 299 model 3 1.1642164648080646 20240813-07:38:04 test_perplexity 299 model 3 1.1651876818760876 20240813-07:44:05 test_accuracy 299 model 3 val 1540 / 1626 20240813-07:44:09 wrote gpt_003.pth 20240813-07:45:10 wrote non_validated_0299_03.png 20240813-07:45:10 wrote state.pth 20240813-07:45:10 --- epoch 300 ---------------------------------------- 20240813-07:45:10 current_test_accuracies 0.9371 0.9461 0.9435 0.9471 0.0000 20240813-07:45:10 training model 4 20240813-08:01:51 train_perplexity 300 model 4 1.1655662871100394 20240813-08:02:07 test_perplexity 300 model 4 1.1662028033395866 20240813-08:08:25 test_accuracy 300 model 4 val 1475 / 1565 20240813-08:08:28 wrote gpt_004.pth 20240813-08:09:30 wrote non_validated_0300_04.png 20240813-08:09:30 wrote state.pth 20240813-08:09:30 --- epoch 301 ---------------------------------------- 20240813-08:09:30 current_test_accuracies 0.9371 0.9461 0.9435 0.9471 0.9425 20240813-08:09:30 training model 0 20240813-08:26:11 train_perplexity 301 model 0 1.1642392714699878 20240813-08:26:27 test_perplexity 301 model 0 1.1659340064270973 20240813-08:32:32 test_accuracy 301 model 0 val 1517 / 1610 20240813-08:32:35 wrote gpt_000.pth 20240813-08:33:37 wrote non_validated_0301_00.png 20240813-08:33:37 wrote state.pth 20240813-08:33:37 --- epoch 302 ---------------------------------------- 20240813-08:33:37 current_test_accuracies 0.9422 0.9461 0.9435 0.9471 0.9425 20240813-08:33:37 training model 0 20240813-08:50:18 train_perplexity 302 model 0 1.1643474546183044 20240813-08:50:34 test_perplexity 302 model 0 1.1655296436768214 20240813-08:56:38 test_accuracy 302 model 0 val 1533 / 1613 20240813-08:56:42 wrote gpt_000.pth 20240813-08:57:43 wrote non_validated_0302_00.png 20240813-08:57:43 wrote state.pth 20240813-08:57:43 --- epoch 303 ---------------------------------------- 20240813-08:57:43 current_test_accuracies 0.9504 0.9461 0.9435 0.9471 0.9425 20240813-08:57:43 training model 4 20240813-09:14:25 train_perplexity 303 model 4 1.1645329570128284 20240813-09:14:40 test_perplexity 303 model 4 1.166806632135319 20240813-09:20:50 test_accuracy 303 model 4 val 1500 / 1590 20240813-09:20:54 wrote gpt_004.pth 20240813-09:21:56 wrote non_validated_0303_04.png 20240813-09:21:56 wrote state.pth 20240813-09:21:56 --- epoch 304 ---------------------------------------- 20240813-09:21:56 current_test_accuracies 0.9504 0.9461 0.9435 0.9471 0.9434 20240813-09:21:56 training model 4 20240813-09:38:40 train_perplexity 304 model 4 1.1647185504416149 20240813-09:38:55 test_perplexity 304 model 4 1.1663151273098809 20240813-09:45:07 test_accuracy 304 model 4 val 1466 / 1571 20240813-09:45:10 wrote gpt_004.pth 20240813-09:46:12 wrote non_validated_0304_04.png 20240813-09:46:12 wrote state.pth 20240813-09:46:12 --- epoch 305 ---------------------------------------- 20240813-09:46:12 current_test_accuracies 0.9504 0.9461 0.9435 0.9471 0.9332 20240813-09:46:12 training model 4 20240813-10:02:56 train_perplexity 305 model 4 1.1644374356498428 20240813-10:03:12 test_perplexity 305 model 4 1.164743539035854 20240813-10:09:27 test_accuracy 305 model 4 val 1475 / 1581 20240813-10:09:31 wrote gpt_004.pth 20240813-10:10:33 wrote non_validated_0305_04.png 20240813-10:10:33 wrote state.pth 20240813-10:10:33 --- epoch 306 ---------------------------------------- 20240813-10:10:33 current_test_accuracies 0.9504 0.9461 0.9435 0.9471 0.9330 20240813-10:10:33 training model 4 20240813-10:27:17 train_perplexity 306 model 4 1.1645902080032893 20240813-10:27:32 test_perplexity 306 model 4 1.1653071557907617 20240813-10:33:47 test_accuracy 306 model 4 val 1495 / 1593 20240813-10:33:50 wrote gpt_004.pth 20240813-10:34:52 wrote non_validated_0306_04.png 20240813-10:34:52 wrote state.pth 20240813-10:34:52 --- epoch 307 ---------------------------------------- 20240813-10:34:52 current_test_accuracies 0.9504 0.9461 0.9435 0.9471 0.9385 20240813-10:34:52 training model 4 20240813-10:51:35 train_perplexity 307 model 4 1.1647579434961897 20240813-10:51:50 test_perplexity 307 model 4 1.1653361692923645 20240813-10:57:55 test_accuracy 307 model 4 val 1517 / 1613 20240813-10:57:59 wrote gpt_004.pth 20240813-10:59:00 wrote non_validated_0307_04.png 20240813-10:59:00 wrote state.pth 20240813-10:59:00 --- epoch 308 ---------------------------------------- 20240813-10:59:00 current_test_accuracies 0.9504 0.9461 0.9435 0.9471 0.9405 20240813-10:59:00 training model 4 20240813-11:15:43 train_perplexity 308 model 4 1.1643434543208526 20240813-11:15:59 test_perplexity 308 model 4 1.165139031879812 20240813-11:22:04 test_accuracy 308 model 4 val 1529 / 1621 20240813-11:22:07 wrote gpt_004.pth 20240813-11:23:08 wrote non_validated_0308_04.png 20240813-11:23:08 wrote state.pth 20240813-11:23:08 --- epoch 309 ---------------------------------------- 20240813-11:23:08 current_test_accuracies 0.9504 0.9461 0.9435 0.9471 0.9432 20240813-11:23:08 training model 4 20240813-11:39:52 train_perplexity 309 model 4 1.1641443235814628 20240813-11:40:07 test_perplexity 309 model 4 1.1675554118023375 20240813-11:46:13 test_accuracy 309 model 4 val 1518 / 1623 20240813-11:46:17 wrote gpt_004.pth 20240813-11:47:18 wrote non_validated_0309_04.png 20240813-11:47:18 wrote state.pth 20240813-11:47:18 --- epoch 310 ---------------------------------------- 20240813-11:47:18 current_test_accuracies 0.9504 0.9461 0.9435 0.9471 0.9353 20240813-11:47:18 training model 4 20240813-12:04:00 train_perplexity 310 model 4 1.1644459806684964 20240813-12:04:15 test_perplexity 310 model 4 1.1678742896564513 20240813-12:10:23 test_accuracy 310 model 4 val 1525 / 1605 20240813-12:10:27 wrote gpt_004.pth 20240813-12:11:29 wrote non_validated_0310_04.png 20240813-12:11:29 wrote state.pth 20240813-12:11:29 --- epoch 311 ---------------------------------------- 20240813-12:11:29 current_test_accuracies 0.9504 0.9461 0.9435 0.9471 0.9502 20240813-12:11:29 training model 2 20240813-12:28:11 train_perplexity 311 model 2 1.164629826052441 20240813-12:28:27 test_perplexity 311 model 2 1.1658706236834044