From 5bd6636ed1b3c15e5a98a32d945a7c81215ff859 Mon Sep 17 00:00:00 2001
From: Fedor Vitiugin <fedor.vitiugin@gmail.com>
Date: Thu, 25 Sep 2025 18:27:37 +0300
Subject: [PATCH 1/2] training args update

---
 ex4_parameters.ipynb | 1342 +++++++++++++++++++++---------------------
 1 file changed, 672 insertions(+), 670 deletions(-)
diff --git a/ex4_parameters.ipynb b/ex4_parameters.ipynb
index 81d79e3..50eede3 100644
--- a/ex4_parameters.ipynb
+++ b/ex4_parameters.ipynb
@@ -3,8 +3,8 @@
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "view-in-github",
-        "colab_type": "text"
+        "colab_type": "text",
+        "id": "view-in-github"
       },
       "source": [
         "<a href=\"https://colab.research.google.com/github/TurkuNLP/Deep_Learning_in_LangTech_course/blob/master/ex4_parameters.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
@@ -12,96 +12,78 @@
     },
     {
       "cell_type": "markdown",
-      "source": [
-        "# Setup"
-      ],
       "metadata": {
         "id": "wo13ZXoZYB6J"
-      }
+      },
+      "source": [
+        "# Setup"
+      ]
     },
     {
       "cell_type": "code",
-      "source": [
-        "!pip3 install -q transformers datasets evaluate accelerate"
-      ],
+      "execution_count": 6,
       "metadata": {
         "id": "4pquj9Xoxaza"
       },
-      "execution_count": 6,
-      "outputs": []
+      "outputs": [],
+      "source": [
+        "!pip3 install -q transformers datasets evaluate accelerate"
+      ]
     },
     {
       "cell_type": "code",
+      "execution_count": 7,
+      "metadata": {
+        "id": "cQ63zw6BY7tn"
+      },
+      "outputs": [],
       "source": [
         "from pprint import pprint\n",
         "import logging\n",
         "\n",
         "logging.disable(logging.INFO)"
-      ],
-      "metadata": {
-        "id": "cQ63zw6BY7tn"
-      },
-      "execution_count": 7,
-      "outputs": []
+      ]
     },
     {
       "cell_type": "markdown",
+      "metadata": {
+        "id": "W4x7GbT2ZKUJ"
+      },
       "source": [
         "---\n",
         "# Download and prepare data"
-      ],
-      "metadata": {
-        "id": "W4x7GbT2ZKUJ"
-      }
+      ]
     },
     {
       "cell_type": "code",
+      "execution_count": 8,
+      "metadata": {
+        "id": "5DKskTuoyCf-"
+      },
+      "outputs": [],
       "source": [
         "import datasets\n",
         "\n",
         "dataset = datasets.load_dataset('imdb')\n",
         "dataset = dataset.shuffle() #This is never a bad idea, datasets may have ordering to them, which is not what we want\n",
         "del dataset[\"unsupervised\"] # Delete the unlabeled part of the dataset to make things faster"
-      ],
-      "metadata": {
-        "id": "5DKskTuoyCf-"
-      },
-      "execution_count": 8,
-      "outputs": []
+      ]
     },
     {
       "cell_type": "markdown",
+      "metadata": {
+        "id": "8KF9UtzUbrBA"
+      },
       "source": [
         "---\n",
         "\n",
         "# Tokenize and vectorize data"
-      ],
-      "metadata": {
-        "id": "8KF9UtzUbrBA"
-      }
+      ]
     },
     {
       "cell_type": "code",
-      "source": [
-        "import transformers\n",
-        "\n",
-        "model_name = \"bert-base-cased\"\n",
-        "tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)\n",
-        "\n",
-        "# Define a simple function that applies the tokenizer\n",
-        "def tokenize(example):\n",
-        "    return tokenizer(\n",
-        "        example[\"text\"],\n",
-        "        max_length=128,\n",
-        "        truncation=True,\n",
-        "    )\n",
-        "\n",
-        "# Apply the tokenizer to the whole dataset using .map()\n",
-        "dataset = dataset.map(tokenize)"
-      ],
+      "execution_count": 9,
       "metadata": {
-        "id": "wjrAGcFtymJF",
-        "outputId": "9a35e1d5-c074-4598-828f-71326bf24f87",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 137,
@@ -129,63 +111,86 @@
             "1bdf39342f314fbba05e0a0d066f2a0f",
             "432f18bd11114415bd5fb91cce8de324"
           ]
-        }
+        },
+        "id": "wjrAGcFtymJF",
+        "outputId": "9a35e1d5-c074-4598-828f-71326bf24f87"
       },
-      "execution_count": 9,
       "outputs": [
         {
-          "output_type": "stream",
           "name": "stderr",
+          "output_type": "stream",
           "text": [
             "/usr/local/lib/python3.10/dist-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n",
             "  warnings.warn(\n"
           ]
         },
         {
-          "output_type": "display_data",
           "data": {
-            "text/plain": [
-              "Map:   0%|          | 0/25000 [00:00<?, ? examples/s]"
-            ],
             "application/vnd.jupyter.widget-view+json": {
+              "model_id": "6a6875eea30b4a64b87996f30d6b9b0e",
               "version_major": 2,
-              "version_minor": 0,
-              "model_id": "6a6875eea30b4a64b87996f30d6b9b0e"
-            }
+              "version_minor": 0
+            },
+            "text/plain": [
+              "Map:   0%|          | 0/25000 [00:00<?, ? examples/s]"
+            ]
           },
-          "metadata": {}
+          "metadata": {},
+          "output_type": "display_data"
         },
         {
-          "output_type": "display_data",
           "data": {
-            "text/plain": [
-              "Map:   0%|          | 0/25000 [00:00<?, ? examples/s]"
-            ],
             "application/vnd.jupyter.widget-view+json": {
+              "model_id": "6d68fd610b8c48f08ee6f56101119802",
               "version_major": 2,
-              "version_minor": 0,
-              "model_id": "6d68fd610b8c48f08ee6f56101119802"
-            }
+              "version_minor": 0
+            },
+            "text/plain": [
+              "Map:   0%|          | 0/25000 [00:00<?, ? examples/s]"
+            ]
           },
-          "metadata": {}
+          "metadata": {},
+          "output_type": "display_data"
         }
+      ],
+      "source": [
+        "import transformers\n",
+        "\n",
+        "model_name = \"bert-base-cased\"\n",
+        "tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)\n",
+        "\n",
+        "# Define a simple function that applies the tokenizer\n",
+        "def tokenize(example):\n",
+        "    return tokenizer(\n",
+        "        example[\"text\"],\n",
+        "        max_length=128,\n",
+        "        truncation=True,\n",
+        "    )\n",
+        "\n",
+        "# Apply the tokenizer to the whole dataset using .map()\n",
+        "dataset = dataset.map(tokenize)"
       ]
     },
     {
       "cell_type": "markdown",
+      "metadata": {
+        "id": "AiX7WeW2X5gv"
+      },
       "source": [
         "---\n",
         "\n",
         "# Define model\n",
         "\n",
         "(Note that here we define the model structure and computation without setting any parameters yet!)"
-      ],
-      "metadata": {
-        "id": "AiX7WeW2X5gv"
-      }
+      ]
     },
     {
       "cell_type": "code",
+      "execution_count": 10,
+      "metadata": {
+        "id": "9kyQQhu0_wep"
+      },
+      "outputs": [],
       "source": [
         "import torch\n",
         "\n",
@@ -252,27 +257,60 @@
         "        else:\n",
         "            # No labels, so just return the output\n",
         "            return (output,)"
-      ],
-      "metadata": {
-        "id": "9kyQQhu0_wep"
-      },
-      "execution_count": 10,
-      "outputs": []
+      ]
     },
     {
       "cell_type": "markdown",
+      "metadata": {
+        "id": "EF5Pau_PjN-J"
+      },
       "source": [
         "---\n",
         "# Define training support\n",
         "\n",
         "(Collator, evaluation, Callbacks)"
-      ],
-      "metadata": {
-        "id": "EF5Pau_PjN-J"
-      }
+      ]
     },
     {
       "cell_type": "code",
+      "execution_count": 11,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 69,
+          "referenced_widgets": [
+            "ee04263c2ef846e5b1ba93ad687e37b1",
+            "bde5f2c9b4ef4e8c82fddbdb83a67a1e",
+            "1c4288674e7245e9b4697b55950731ef",
+            "eb5b4d35582542d1bdd0296817db1daf",
+            "61a2564e98294a3b98f202ecb051e1ce",
+            "ab20b48f72ce4486a08ccea3f4624486",
+            "ca67aa97ba124742a9c3688e03f21f96",
+            "6c8d2ad0ffce4d2081369030a4c4304d",
+            "db761c9a410f4b8bb1b2ec88fb0b847a",
+            "7a10fc4726e6413397d474c3b5f187b9",
+            "82de00af0c6140c49a1ab5868158b93b"
+          ]
+        },
+        "id": "wBn6iLcMjXGr",
+        "outputId": "2602f8eb-89cf-4052-cabb-c0363be850d6"
+      },
+      "outputs": [
+        {
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "ee04263c2ef846e5b1ba93ad687e37b1",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        }
+      ],
       "source": [
         "import evaluate\n",
         "\n",
@@ -303,112 +341,33 @@
         "            for k, v in logs.items():\n",
         "                if k != \"epoch\" or v not in self.logs[k]:\n",
         "                    self.logs[k].append(v)"
-      ],
-      "metadata": {
-        "id": "wBn6iLcMjXGr",
-        "outputId": "2602f8eb-89cf-4052-cabb-c0363be850d6",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 69,
-          "referenced_widgets": [
-            "ee04263c2ef846e5b1ba93ad687e37b1",
-            "bde5f2c9b4ef4e8c82fddbdb83a67a1e",
-            "1c4288674e7245e9b4697b55950731ef",
-            "eb5b4d35582542d1bdd0296817db1daf",
-            "61a2564e98294a3b98f202ecb051e1ce",
-            "ab20b48f72ce4486a08ccea3f4624486",
-            "ca67aa97ba124742a9c3688e03f21f96",
-            "6c8d2ad0ffce4d2081369030a4c4304d",
-            "db761c9a410f4b8bb1b2ec88fb0b847a",
-            "7a10fc4726e6413397d474c3b5f187b9",
-            "82de00af0c6140c49a1ab5868158b93b"
-          ]
-        }
-      },
-      "execution_count": 11,
-      "outputs": [
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": [
-              "Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]"
-            ],
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "ee04263c2ef846e5b1ba93ad687e37b1"
-            }
-          },
-          "metadata": {}
-        }
       ]
     },
     {
       "cell_type": "markdown",
+      "metadata": {
+        "id": "pVTopuNPW26S"
+      },
       "source": [
         "---\n",
         "# Hyperparameter search - First option"
-      ],
-      "metadata": {
-        "id": "pVTopuNPW26S"
-      }
+      ]
     },
     {
       "cell_type": "code",
-      "source": [
-        "for lr in [0.000005, 0.00005, 0.0005, 0.005, 0.05, 0.5]:\n",
-        "\n",
-        "    # create the model\n",
-        "    config = BasicConfig(\n",
-        "        vocab_size = tokenizer.vocab_size,\n",
-        "        num_labels = len(set(dataset['train']['label'])),\n",
-        "        embedding_dim = 64,\n",
-        "        filter_size = 3,\n",
-        "        num_filters = 10,\n",
-        "    )\n",
-        "\n",
-        "    model = SimpleCNN(config)\n",
-        "\n",
-        "    # Set training arguments\n",
-        "    trainer_args = transformers.TrainingArguments(\n",
-        "        \"checkpoints\",\n",
-        "        evaluation_strategy=\"steps\",\n",
-        "        logging_strategy=\"steps\",\n",
-        "        load_best_model_at_end=True,\n",
-        "        eval_steps=500,\n",
-        "        logging_steps=500,\n",
-        "        learning_rate=lr, # <--- parameter goes here\n",
-        "        per_device_train_batch_size=8,\n",
-        "        max_steps=2500,\n",
-        "    )\n",
-        "\n",
-        "    trainer = transformers.Trainer(\n",
-        "        model=model,\n",
-        "        args=trainer_args,\n",
-        "        train_dataset=dataset[\"train\"],\n",
-        "        eval_dataset=dataset[\"test\"],\n",
-        "        compute_metrics=compute_accuracy,\n",
-        "        data_collator=data_collator,\n",
-        "        callbacks=[transformers.EarlyStoppingCallback(early_stopping_patience=5), LogSavingCallback()]\n",
-        "    )\n",
-        "\n",
-        "    trainer.train()\n",
-        "    eval_results = trainer.evaluate(dataset[\"test\"])\n",
-        "    print('Learning rate:', lr, 'Accuracy:', eval_results['eval_accuracy'])"
-      ],
+      "execution_count": null,
       "metadata": {
-        "id": "nd_usta7WzTX",
-        "outputId": "80a41eb7-ce0f-4e04-ab0c-ce56e2519bc0",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 1000
-        }
+        },
+        "id": "nd_usta7WzTX",
+        "outputId": "80a41eb7-ce0f-4e04-ab0c-ce56e2519bc0"
       },
-      "execution_count": 12,
       "outputs": [
         {
-          "output_type": "stream",
           "name": "stderr",
+          "output_type": "stream",
           "text": [
             "/usr/local/lib/python3.10/dist-packages/transformers/training_args.py:1525: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n",
             "  warnings.warn(\n",
@@ -416,11 +375,7 @@
           ]
         },
         {
-          "output_type": "display_data",
           "data": {
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ],
             "text/html": [
               "\n",
               "    <div>\n",
@@ -470,16 +425,16 @@
               "    </tr>\n",
               "  </tbody>\n",
               "</table><p>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
             ]
           },
-          "metadata": {}
+          "metadata": {},
+          "output_type": "display_data"
         },
         {
-          "output_type": "display_data",
           "data": {
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ],
             "text/html": [
               "\n",
               "    <div>\n",
@@ -488,13 +443,17 @@
               "      [3125/3125 00:14]\n",
               "    </div>\n",
               "    "
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
             ]
           },
-          "metadata": {}
+          "metadata": {},
+          "output_type": "display_data"
         },
         {
-          "output_type": "stream",
           "name": "stderr",
+          "output_type": "stream",
           "text": [
             "/usr/local/lib/python3.10/dist-packages/transformers/training_args.py:1525: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n",
             "  warnings.warn(\n",
@@ -502,18 +461,14 @@
           ]
         },
         {
-          "output_type": "stream",
           "name": "stdout",
+          "output_type": "stream",
           "text": [
             "Learning rate: 5e-06 Accuracy: 0.5\n"
           ]
         },
         {
-          "output_type": "display_data",
           "data": {
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ],
             "text/html": [
               "\n",
               "    <div>\n",
@@ -563,16 +518,16 @@
               "    </tr>\n",
               "  </tbody>\n",
               "</table><p>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
             ]
           },
-          "metadata": {}
+          "metadata": {},
+          "output_type": "display_data"
         },
         {
-          "output_type": "display_data",
           "data": {
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ],
             "text/html": [
               "\n",
               "    <div>\n",
@@ -581,13 +536,17 @@
               "      [3125/3125 00:12]\n",
               "    </div>\n",
               "    "
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
             ]
           },
-          "metadata": {}
+          "metadata": {},
+          "output_type": "display_data"
         },
         {
-          "output_type": "stream",
           "name": "stderr",
+          "output_type": "stream",
           "text": [
             "/usr/local/lib/python3.10/dist-packages/transformers/training_args.py:1525: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n",
             "  warnings.warn(\n",
@@ -595,18 +554,14 @@
           ]
         },
         {
-          "output_type": "stream",
           "name": "stdout",
+          "output_type": "stream",
           "text": [
             "Learning rate: 5e-05 Accuracy: 0.52612\n"
           ]
         },
         {
-          "output_type": "display_data",
           "data": {
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ],
             "text/html": [
               "\n",
               "    <div>\n",
@@ -656,16 +611,16 @@
               "    </tr>\n",
               "  </tbody>\n",
               "</table><p>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
             ]
           },
-          "metadata": {}
+          "metadata": {},
+          "output_type": "display_data"
         },
         {
-          "output_type": "display_data",
           "data": {
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ],
             "text/html": [
               "\n",
               "    <div>\n",
@@ -674,13 +629,17 @@
               "      [3125/3125 00:12]\n",
               "    </div>\n",
               "    "
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
             ]
           },
-          "metadata": {}
+          "metadata": {},
+          "output_type": "display_data"
         },
         {
-          "output_type": "stream",
           "name": "stderr",
+          "output_type": "stream",
           "text": [
             "/usr/local/lib/python3.10/dist-packages/transformers/training_args.py:1525: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n",
             "  warnings.warn(\n",
@@ -688,18 +647,14 @@
           ]
         },
         {
-          "output_type": "stream",
           "name": "stdout",
+          "output_type": "stream",
           "text": [
             "Learning rate: 0.0005 Accuracy: 0.65312\n"
           ]
         },
         {
-          "output_type": "display_data",
           "data": {
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ],
             "text/html": [
               "\n",
               "    <div>\n",
@@ -749,16 +704,16 @@
               "    </tr>\n",
               "  </tbody>\n",
               "</table><p>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
             ]
           },
-          "metadata": {}
+          "metadata": {},
+          "output_type": "display_data"
         },
         {
-          "output_type": "display_data",
           "data": {
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ],
             "text/html": [
               "\n",
               "    <div>\n",
@@ -767,13 +722,17 @@
               "      [3125/3125 00:12]\n",
               "    </div>\n",
               "    "
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
             ]
           },
-          "metadata": {}
+          "metadata": {},
+          "output_type": "display_data"
         },
         {
-          "output_type": "stream",
           "name": "stderr",
+          "output_type": "stream",
           "text": [
             "/usr/local/lib/python3.10/dist-packages/transformers/training_args.py:1525: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n",
             "  warnings.warn(\n",
@@ -781,18 +740,14 @@
           ]
         },
         {
-          "output_type": "stream",
           "name": "stdout",
+          "output_type": "stream",
           "text": [
             "Learning rate: 0.005 Accuracy: 0.72672\n"
           ]
         },
         {
-          "output_type": "display_data",
           "data": {
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ],
             "text/html": [
               "\n",
               "    <div>\n",
@@ -842,16 +797,16 @@
               "    </tr>\n",
               "  </tbody>\n",
               "</table><p>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
             ]
           },
-          "metadata": {}
+          "metadata": {},
+          "output_type": "display_data"
         },
         {
-          "output_type": "display_data",
           "data": {
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ],
             "text/html": [
               "\n",
               "    <div>\n",
@@ -860,13 +815,17 @@
               "      [3125/3125 00:12]\n",
               "    </div>\n",
               "    "
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
             ]
           },
-          "metadata": {}
+          "metadata": {},
+          "output_type": "display_data"
         },
         {
-          "output_type": "stream",
           "name": "stderr",
+          "output_type": "stream",
           "text": [
             "/usr/local/lib/python3.10/dist-packages/transformers/training_args.py:1525: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n",
             "  warnings.warn(\n",
@@ -874,18 +833,14 @@
           ]
         },
         {
-          "output_type": "stream",
           "name": "stdout",
+          "output_type": "stream",
           "text": [
             "Learning rate: 0.005 Accuracy: 0.72672\n"
           ]
         },
         {
-          "output_type": "display_data",
           "data": {
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ],
             "text/html": [
               "\n",
               "    <div>\n",
@@ -935,16 +890,16 @@
               "    </tr>\n",
               "  </tbody>\n",
               "</table><p>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
             ]
           },
-          "metadata": {}
+          "metadata": {},
+          "output_type": "display_data"
         },
         {
-          "output_type": "display_data",
           "data": {
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ],
             "text/html": [
               "\n",
               "    <div>\n",
@@ -953,13 +908,17 @@
               "      [3125/3125 00:12]\n",
               "    </div>\n",
               "    "
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
             ]
           },
-          "metadata": {}
+          "metadata": {},
+          "output_type": "display_data"
         },
         {
-          "output_type": "stream",
           "name": "stderr",
+          "output_type": "stream",
           "text": [
             "/usr/local/lib/python3.10/dist-packages/transformers/training_args.py:1525: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n",
             "  warnings.warn(\n",
@@ -967,18 +926,14 @@
           ]
         },
         {
-          "output_type": "stream",
           "name": "stdout",
+          "output_type": "stream",
           "text": [
             "Learning rate: 0.05 Accuracy: 0.7134\n"
           ]
         },
         {
-          "output_type": "display_data",
           "data": {
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ],
             "text/html": [
               "\n",
               "    <div>\n",
@@ -1028,16 +983,16 @@
               "    </tr>\n",
               "  </tbody>\n",
               "</table><p>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
             ]
           },
-          "metadata": {}
+          "metadata": {},
+          "output_type": "display_data"
         },
         {
-          "output_type": "display_data",
           "data": {
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ],
             "text/html": [
               "\n",
               "    <div>\n",
@@ -1046,48 +1001,91 @@
               "      [3125/3125 00:12]\n",
               "    </div>\n",
               "    "
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
             ]
           },
-          "metadata": {}
+          "metadata": {},
+          "output_type": "display_data"
         },
         {
-          "output_type": "stream",
           "name": "stdout",
+          "output_type": "stream",
           "text": [
             "Learning rate: 0.5 Accuracy: 0.5198\n"
           ]
         }
+      ],
+      "source": [
+        "for lr in [0.000005, 0.00005, 0.0005, 0.005, 0.05, 0.5]:\n",
+        "\n",
+        "    # create the model\n",
+        "    config = BasicConfig(\n",
+        "        vocab_size = tokenizer.vocab_size,\n",
+        "        num_labels = len(set(dataset['train']['label'])),\n",
+        "        embedding_dim = 64,\n",
+        "        filter_size = 3,\n",
+        "        num_filters = 10,\n",
+        "    )\n",
+        "\n",
+        "    model = SimpleCNN(config)\n",
+        "\n",
+        "    # Set training arguments\n",
+        "    trainer_args = transformers.TrainingArguments(\n",
+        "        \"checkpoints\",\n",
+        "        eval_strategy=\"steps\",\n",
+        "        logging_strategy=\"steps\",\n",
+        "        load_best_model_at_end=True,\n",
+        "        eval_steps=500,\n",
+        "        logging_steps=500,\n",
+        "        learning_rate=lr, # <--- parameter goes here\n",
+        "        per_device_train_batch_size=8,\n",
+        "        max_steps=2500,\n",
+        "        report_to=\"none\", # skip wandb login\n",
+        "    )\n",
+        "\n",
+        "    trainer = transformers.Trainer(\n",
+        "        model=model,\n",
+        "        args=trainer_args,\n",
+        "        train_dataset=dataset[\"train\"],\n",
+        "        eval_dataset=dataset[\"test\"],\n",
+        "        compute_metrics=compute_accuracy,\n",
+        "        data_collator=data_collator,\n",
+        "        callbacks=[transformers.EarlyStoppingCallback(early_stopping_patience=5), LogSavingCallback()]\n",
+        "    )\n",
+        "\n",
+        "    trainer.train()\n",
+        "    eval_results = trainer.evaluate(dataset[\"test\"])\n",
+        "    print('Learning rate:', lr, 'Accuracy:', eval_results['eval_accuracy'])"
       ]
     },
     {
       "cell_type": "markdown",
+      "metadata": {
+        "id": "KL6N5tz1mddD"
+      },
       "source": [
         "---\n",
         "# Hyperparameter search – Second option\n",
         "\n",
         "* Hyperparameter search using [Optuna](https://optuna.org/)"
-      ],
-      "metadata": {
-        "id": "KL6N5tz1mddD"
-      }
+      ]
     },
     {
       "cell_type": "code",
-      "source": [
-        "!pip install optuna"
-      ],
+      "execution_count": 13,
       "metadata": {
-        "id": "Hes_HBvOmrKD",
-        "outputId": "1e5f78e0-116b-4a95-f731-a4efcfd2a353",
         "colab": {
           "base_uri": "https://localhost:8080/"
-        }
+        },
+        "id": "Hes_HBvOmrKD",
+        "outputId": "1e5f78e0-116b-4a95-f731-a4efcfd2a353"
       },
-      "execution_count": 13,
       "outputs": [
         {
-          "output_type": "stream",
           "name": "stdout",
+          "output_type": "stream",
           "text": [
             "Collecting optuna\n",
             "  Downloading optuna-4.0.0-py3-none-any.whl.metadata (16 kB)\n",
@@ -1116,75 +1114,26 @@
             "Successfully installed Mako-1.3.5 alembic-1.13.2 colorlog-6.8.2 optuna-4.0.0\n"
           ]
         }
+      ],
+      "source": [
+        "!pip install optuna"
       ]
     },
     {
       "cell_type": "code",
-      "source": [
-        "import optuna\n",
-        "\n",
-        "def objective(trial):\n",
-        "    # Define the search space for hyperparameters\n",
-        "    learning_rate = trial.suggest_float(\"learning_rate\", 5e-4, 5e-2, log=True)\n",
-        "    num_filters = trial.suggest_categorical(\"num_filters\", [10, 16, 24])\n",
-        "\n",
-        "    # create the model\n",
-        "    config = BasicConfig(\n",
-        "        vocab_size = tokenizer.vocab_size,\n",
-        "        num_labels = len(set(dataset['train']['label'])),\n",
-        "        embedding_dim = 64,\n",
-        "        filter_size = 3,\n",
-        "        num_filters = num_filters, # <--- parameter goes here\n",
-        "    )\n",
-        "\n",
-        "    model = SimpleCNN(config)\n",
-        "\n",
-        "    # Set training arguments\n",
-        "    trainer_args = transformers.TrainingArguments(\n",
-        "        \"checkpoints\",\n",
-        "        evaluation_strategy=\"steps\",\n",
-        "        logging_strategy=\"steps\",\n",
-        "        load_best_model_at_end=True,\n",
-        "        eval_steps=500,\n",
-        "        logging_steps=500,\n",
-        "        learning_rate=learning_rate, # <--- parameter goes here\n",
-        "        per_device_train_batch_size=8,\n",
-        "        max_steps=2500,\n",
-        "    )\n",
-        "\n",
-        "    trainer = transformers.Trainer(\n",
-        "        model=model,\n",
-        "        args=trainer_args,\n",
-        "        train_dataset=dataset[\"train\"],\n",
-        "        eval_dataset=dataset[\"test\"],\n",
-        "        compute_metrics=compute_accuracy,\n",
-        "        data_collator=data_collator,\n",
-        "        callbacks=[transformers.EarlyStoppingCallback(early_stopping_patience=5), LogSavingCallback()]\n",
-        "    )\n",
-        "\n",
-        "    trainer.train()\n",
-        "    eval_results = trainer.evaluate(dataset[\"test\"])\n",
-        "    print('Learning rate:', learning_rate, 'Filters:', num_filters, 'Accuracy:', eval_results['eval_accuracy'])\n",
-        "    return eval_results['eval_accuracy']\n",
-        "\n",
-        "\n",
-        "\n",
-        "study = optuna.create_study(direction=\"maximize\")\n",
-        "study.optimize(objective, n_trials=3) # <--- How many trials we run, more would be needed in real case!"
-      ],
+      "execution_count": null,
       "metadata": {
-        "id": "Ag66TkGumvSU",
-        "outputId": "1d0ad2ac-4874-41ef-db7f-c873b23468f1",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 962
-        }
+        },
+        "id": "Ag66TkGumvSU",
+        "outputId": "1d0ad2ac-4874-41ef-db7f-c873b23468f1"
       },
-      "execution_count": 18,
       "outputs": [
         {
-          "output_type": "stream",
           "name": "stderr",
+          "output_type": "stream",
           "text": [
             "/usr/local/lib/python3.10/dist-packages/transformers/training_args.py:1525: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n",
             "  warnings.warn(\n",
@@ -1192,11 +1141,7 @@
           ]
         },
         {
-          "output_type": "display_data",
           "data": {
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ],
             "text/html": [
               "\n",
               "    <div>\n",
@@ -1246,16 +1191,16 @@
               "    </tr>\n",
               "  </tbody>\n",
               "</table><p>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
             ]
           },
-          "metadata": {}
+          "metadata": {},
+          "output_type": "display_data"
         },
         {
-          "output_type": "display_data",
           "data": {
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ],
             "text/html": [
               "\n",
               "    <div>\n",
@@ -1264,13 +1209,17 @@
               "      [3125/3125 00:13]\n",
               "    </div>\n",
               "    "
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
             ]
           },
-          "metadata": {}
+          "metadata": {},
+          "output_type": "display_data"
         },
         {
-          "output_type": "stream",
           "name": "stderr",
+          "output_type": "stream",
           "text": [
             "/usr/local/lib/python3.10/dist-packages/transformers/training_args.py:1525: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n",
             "  warnings.warn(\n",
@@ -1278,18 +1227,14 @@
           ]
         },
         {
-          "output_type": "stream",
           "name": "stdout",
+          "output_type": "stream",
           "text": [
             "Learning rate: 0.0032891344286570525 Filters: 10 Accuracy: 0.72072\n"
           ]
         },
         {
-          "output_type": "display_data",
           "data": {
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ],
             "text/html": [
               "\n",
               "    <div>\n",
@@ -1339,16 +1284,16 @@
               "    </tr>\n",
               "  </tbody>\n",
               "</table><p>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
             ]
           },
-          "metadata": {}
+          "metadata": {},
+          "output_type": "display_data"
         },
         {
-          "output_type": "display_data",
           "data": {
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ],
             "text/html": [
               "\n",
               "    <div>\n",
@@ -1357,13 +1302,17 @@
               "      [3125/3125 00:12]\n",
               "    </div>\n",
               "    "
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
             ]
           },
-          "metadata": {}
+          "metadata": {},
+          "output_type": "display_data"
         },
         {
-          "output_type": "stream",
           "name": "stderr",
+          "output_type": "stream",
           "text": [
             "/usr/local/lib/python3.10/dist-packages/transformers/training_args.py:1525: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n",
             "  warnings.warn(\n",
@@ -1371,18 +1320,14 @@
           ]
         },
         {
-          "output_type": "stream",
           "name": "stdout",
+          "output_type": "stream",
           "text": [
             "Learning rate: 0.002434791974958158 Filters: 16 Accuracy: 0.73228\n"
           ]
         },
         {
-          "output_type": "display_data",
           "data": {
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ],
             "text/html": [
               "\n",
               "    <div>\n",
@@ -1432,16 +1377,16 @@
               "    </tr>\n",
               "  </tbody>\n",
               "</table><p>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
             ]
           },
-          "metadata": {}
+          "metadata": {},
+          "output_type": "display_data"
         },
         {
-          "output_type": "display_data",
           "data": {
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ],
             "text/html": [
               "\n",
               "    <div>\n",
@@ -1450,29 +1395,87 @@
               "      [3125/3125 00:13]\n",
               "    </div>\n",
               "    "
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
             ]
           },
-          "metadata": {}
+          "metadata": {},
+          "output_type": "display_data"
         },
         {
-          "output_type": "stream",
           "name": "stdout",
+          "output_type": "stream",
           "text": [
             "Learning rate: 0.03601436163120907 Filters: 24 Accuracy: 0.7332\n"
           ]
         }
+      ],
+      "source": [
+        "import optuna\n",
+        "\n",
+        "def objective(trial):\n",
+        "    # Define the search space for hyperparameters\n",
+        "    learning_rate = trial.suggest_float(\"learning_rate\", 5e-4, 5e-2, log=True)\n",
+        "    num_filters = trial.suggest_categorical(\"num_filters\", [10, 16, 24])\n",
+        "\n",
+        "    # create the model\n",
+        "    config = BasicConfig(\n",
+        "        vocab_size = tokenizer.vocab_size,\n",
+        "        num_labels = len(set(dataset['train']['label'])),\n",
+        "        embedding_dim = 64,\n",
+        "        filter_size = 3,\n",
+        "        num_filters = num_filters, # <--- parameter goes here\n",
+        "    )\n",
+        "\n",
+        "    model = SimpleCNN(config)\n",
+        "\n",
+        "    # Set training arguments\n",
+        "    trainer_args = transformers.TrainingArguments(\n",
+        "        \"checkpoints\",\n",
+        "        eval_strategy=\"steps\",\n",
+        "        logging_strategy=\"steps\",\n",
+        "        load_best_model_at_end=True,\n",
+        "        eval_steps=500,\n",
+        "        logging_steps=500,\n",
+        "        learning_rate=learning_rate, # <--- parameter goes here\n",
+        "        per_device_train_batch_size=8,\n",
+        "        max_steps=2500,\n",
+        "        report_to=\"none\", # skip wandb login\n",
+        "    )\n",
+        "\n",
+        "    trainer = transformers.Trainer(\n",
+        "        model=model,\n",
+        "        args=trainer_args,\n",
+        "        train_dataset=dataset[\"train\"],\n",
+        "        eval_dataset=dataset[\"test\"],\n",
+        "        compute_metrics=compute_accuracy,\n",
+        "        data_collator=data_collator,\n",
+        "        callbacks=[transformers.EarlyStoppingCallback(early_stopping_patience=5), LogSavingCallback()]\n",
+        "    )\n",
+        "\n",
+        "    trainer.train()\n",
+        "    eval_results = trainer.evaluate(dataset[\"test\"])\n",
+        "    print('Learning rate:', learning_rate, 'Filters:', num_filters, 'Accuracy:', eval_results['eval_accuracy'])\n",
+        "    return eval_results['eval_accuracy']\n",
+        "\n",
+        "\n",
+        "\n",
+        "study = optuna.create_study(direction=\"maximize\")\n",
+        "study.optimize(objective, n_trials=3) # <--- How many trials we run, more would be needed in real case!"
       ]
     }
   ],
   "metadata": {
+    "accelerator": "GPU",
     "colab": {
       "collapsed_sections": [
         "wo13ZXoZYB6J"
       ],
-      "name": "hf_trainer_cnn.ipynb",
-      "provenance": [],
       "gpuType": "T4",
-      "include_colab_link": true
+      "include_colab_link": true,
+      "name": "hf_trainer_cnn.ipynb",
+      "provenance": []
     },
     "kernelspec": {
       "display_name": "Python 3",
@@ -1483,98 +1486,25 @@
     },
     "widgets": {
       "application/vnd.jupyter.widget-state+json": {
-        "6a6875eea30b4a64b87996f30d6b9b0e": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HBoxModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_ca38330d9fee4f6eb3db8dd7c62d3b86",
-              "IPY_MODEL_ac737737ea08455dbcad7d813b3ba794",
-              "IPY_MODEL_f80427bc9bb34432a15c110c41552f1d"
-            ],
-            "layout": "IPY_MODEL_d2f642b153b441b39d3e13c8dc291de5"
-          }
-        },
-        "ca38330d9fee4f6eb3db8dd7c62d3b86": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_976a880692e2476c8bf9e0f6a6822129",
-            "placeholder": "​",
-            "style": "IPY_MODEL_bec81b91b5bd4dd9a425ab7c65ccbb3c",
-            "value": "Map: 100%"
-          }
-        },
-        "ac737737ea08455dbcad7d813b3ba794": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "FloatProgressModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_f05ac4f5a4d44ec2b659a467f037978d",
-            "max": 25000,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_fa20945c5d7a4089abba70a393de4173",
-            "value": 25000
-          }
-        },
-        "f80427bc9bb34432a15c110c41552f1d": {
+        "0485ff6f20ec4e3bb225c942ba914ef0": {
           "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
           "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
           "state": {
-            "_dom_classes": [],
             "_model_module": "@jupyter-widgets/controls",
             "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
+            "_model_name": "DescriptionStyleModel",
             "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_2f7a6c1f2e3a49c1b2b9df8b263e4437",
-            "placeholder": "​",
-            "style": "IPY_MODEL_eec6fa9095a141ef95285777a970a390",
-            "value": " 25000/25000 [01:33&lt;00:00, 204.73 examples/s]"
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
           }
         },
-        "d2f642b153b441b39d3e13c8dc291de5": {
+        "1bdf39342f314fbba05e0a0d066f2a0f": {
           "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
           "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
           "state": {
             "_model_module": "@jupyter-widgets/base",
             "_model_module_version": "1.2.0",
@@ -1623,10 +1553,34 @@
             "width": null
           }
         },
-        "976a880692e2476c8bf9e0f6a6822129": {
+        "1c4288674e7245e9b4697b55950731ef": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "FloatProgressModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_6c8d2ad0ffce4d2081369030a4c4304d",
+            "max": 4203,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_db761c9a410f4b8bb1b2ec88fb0b847a",
+            "value": 4203
+          }
+        },
+        "2f7a6c1f2e3a49c1b2b9df8b263e4437": {
           "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
           "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
           "state": {
             "_model_module": "@jupyter-widgets/base",
             "_model_module_version": "1.2.0",
@@ -1675,10 +1629,10 @@
             "width": null
           }
         },
-        "bec81b91b5bd4dd9a425ab7c65ccbb3c": {
+        "432f18bd11114415bd5fb91cce8de324": {
           "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
           "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
           "state": {
             "_model_module": "@jupyter-widgets/controls",
             "_model_module_version": "1.5.0",
@@ -1690,10 +1644,31 @@
             "description_width": ""
           }
         },
-        "f05ac4f5a4d44ec2b659a467f037978d": {
+        "484ce987ce134017a6444a883f483eb1": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_8a578b9edde748a9bee19f220e90bcf7",
+            "placeholder": "​",
+            "style": "IPY_MODEL_0485ff6f20ec4e3bb225c942ba914ef0",
+            "value": "Map: 100%"
+          }
+        },
+        "61a2564e98294a3b98f202ecb051e1ce": {
           "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
           "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
           "state": {
             "_model_module": "@jupyter-widgets/base",
             "_model_module_version": "1.2.0",
@@ -1742,26 +1717,31 @@
             "width": null
           }
         },
-        "fa20945c5d7a4089abba70a393de4173": {
+        "63f44a1307c44b9e80d33f556bf6c862": {
           "model_module": "@jupyter-widgets/controls",
-          "model_name": "ProgressStyleModel",
           "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
           "state": {
+            "_dom_classes": [],
             "_model_module": "@jupyter-widgets/controls",
             "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
+            "_model_name": "HTMLModel",
             "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_1bdf39342f314fbba05e0a0d066f2a0f",
+            "placeholder": "​",
+            "style": "IPY_MODEL_432f18bd11114415bd5fb91cce8de324",
+            "value": " 25000/25000 [01:06&lt;00:00, 786.87 examples/s]"
           }
         },
-        "2f7a6c1f2e3a49c1b2b9df8b263e4437": {
+        "646fc70d8d214df5905d0783193312fa": {
           "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
           "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
           "state": {
             "_model_module": "@jupyter-widgets/base",
             "_model_module_version": "1.2.0",
@@ -1810,25 +1790,10 @@
             "width": null
           }
         },
-        "eec6fa9095a141ef95285777a970a390": {
+        "6a6875eea30b4a64b87996f30d6b9b0e": {
           "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
           "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "6d68fd610b8c48f08ee6f56101119802": {
-          "model_module": "@jupyter-widgets/controls",
           "model_name": "HBoxModel",
-          "model_module_version": "1.5.0",
           "state": {
             "_dom_classes": [],
             "_model_module": "@jupyter-widgets/controls",
@@ -1840,83 +1805,17 @@
             "_view_name": "HBoxView",
             "box_style": "",
             "children": [
-              "IPY_MODEL_484ce987ce134017a6444a883f483eb1",
-              "IPY_MODEL_d0d203490e554fe5b62a2aca06ec2b85",
-              "IPY_MODEL_63f44a1307c44b9e80d33f556bf6c862"
+              "IPY_MODEL_ca38330d9fee4f6eb3db8dd7c62d3b86",
+              "IPY_MODEL_ac737737ea08455dbcad7d813b3ba794",
+              "IPY_MODEL_f80427bc9bb34432a15c110c41552f1d"
             ],
-            "layout": "IPY_MODEL_fd91bdfb63c9476195d08b56fcf8f503"
-          }
-        },
-        "484ce987ce134017a6444a883f483eb1": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_8a578b9edde748a9bee19f220e90bcf7",
-            "placeholder": "​",
-            "style": "IPY_MODEL_0485ff6f20ec4e3bb225c942ba914ef0",
-            "value": "Map: 100%"
-          }
-        },
-        "d0d203490e554fe5b62a2aca06ec2b85": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "FloatProgressModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_646fc70d8d214df5905d0783193312fa",
-            "max": 25000,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_958792a865f94e04afac06938ab615f9",
-            "value": 25000
-          }
-        },
-        "63f44a1307c44b9e80d33f556bf6c862": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_1bdf39342f314fbba05e0a0d066f2a0f",
-            "placeholder": "​",
-            "style": "IPY_MODEL_432f18bd11114415bd5fb91cce8de324",
-            "value": " 25000/25000 [01:06&lt;00:00, 786.87 examples/s]"
+            "layout": "IPY_MODEL_d2f642b153b441b39d3e13c8dc291de5"
           }
         },
-        "fd91bdfb63c9476195d08b56fcf8f503": {
+        "6c8d2ad0ffce4d2081369030a4c4304d": {
           "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
           "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
           "state": {
             "_model_module": "@jupyter-widgets/base",
             "_model_module_version": "1.2.0",
@@ -1965,10 +1864,32 @@
             "width": null
           }
         },
-        "8a578b9edde748a9bee19f220e90bcf7": {
+        "6d68fd610b8c48f08ee6f56101119802": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HBoxModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_484ce987ce134017a6444a883f483eb1",
+              "IPY_MODEL_d0d203490e554fe5b62a2aca06ec2b85",
+              "IPY_MODEL_63f44a1307c44b9e80d33f556bf6c862"
+            ],
+            "layout": "IPY_MODEL_fd91bdfb63c9476195d08b56fcf8f503"
+          }
+        },
+        "7a10fc4726e6413397d474c3b5f187b9": {
           "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
           "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
           "state": {
             "_model_module": "@jupyter-widgets/base",
             "_model_module_version": "1.2.0",
@@ -2017,10 +1938,10 @@
             "width": null
           }
         },
-        "0485ff6f20ec4e3bb225c942ba914ef0": {
+        "82de00af0c6140c49a1ab5868158b93b": {
           "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
           "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
           "state": {
             "_model_module": "@jupyter-widgets/controls",
             "_model_module_version": "1.5.0",
@@ -2032,10 +1953,10 @@
             "description_width": ""
           }
         },
-        "646fc70d8d214df5905d0783193312fa": {
+        "8a578b9edde748a9bee19f220e90bcf7": {
           "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
           "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
           "state": {
             "_model_module": "@jupyter-widgets/base",
             "_model_module_version": "1.2.0",
@@ -2086,8 +2007,8 @@
         },
         "958792a865f94e04afac06938ab615f9": {
           "model_module": "@jupyter-widgets/controls",
-          "model_name": "ProgressStyleModel",
           "model_module_version": "1.5.0",
+          "model_name": "ProgressStyleModel",
           "state": {
             "_model_module": "@jupyter-widgets/controls",
             "_model_module_version": "1.5.0",
@@ -2100,10 +2021,10 @@
             "description_width": ""
           }
         },
-        "1bdf39342f314fbba05e0a0d066f2a0f": {
+        "976a880692e2476c8bf9e0f6a6822129": {
           "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
           "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
           "state": {
             "_model_module": "@jupyter-widgets/base",
             "_model_module_version": "1.2.0",
@@ -2152,47 +2073,86 @@
             "width": null
           }
         },
-        "432f18bd11114415bd5fb91cce8de324": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
-          "model_module_version": "1.5.0",
+        "ab20b48f72ce4486a08ccea3f4624486": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
           "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
             "_view_count": null,
             "_view_module": "@jupyter-widgets/base",
             "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
           }
         },
-        "ee04263c2ef846e5b1ba93ad687e37b1": {
+        "ac737737ea08455dbcad7d813b3ba794": {
           "model_module": "@jupyter-widgets/controls",
-          "model_name": "HBoxModel",
           "model_module_version": "1.5.0",
+          "model_name": "FloatProgressModel",
           "state": {
             "_dom_classes": [],
             "_model_module": "@jupyter-widgets/controls",
             "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
+            "_model_name": "FloatProgressModel",
             "_view_count": null,
             "_view_module": "@jupyter-widgets/controls",
             "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_bde5f2c9b4ef4e8c82fddbdb83a67a1e",
-              "IPY_MODEL_1c4288674e7245e9b4697b55950731ef",
-              "IPY_MODEL_eb5b4d35582542d1bdd0296817db1daf"
-            ],
-            "layout": "IPY_MODEL_61a2564e98294a3b98f202ecb051e1ce"
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_f05ac4f5a4d44ec2b659a467f037978d",
+            "max": 25000,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_fa20945c5d7a4089abba70a393de4173",
+            "value": 25000
           }
         },
         "bde5f2c9b4ef4e8c82fddbdb83a67a1e": {
           "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
           "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
           "state": {
             "_dom_classes": [],
             "_model_module": "@jupyter-widgets/controls",
@@ -2210,55 +2170,85 @@
             "value": "Downloading builder script: 100%"
           }
         },
-        "1c4288674e7245e9b4697b55950731ef": {
+        "bec81b91b5bd4dd9a425ab7c65ccbb3c": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "ca38330d9fee4f6eb3db8dd7c62d3b86": {
           "model_module": "@jupyter-widgets/controls",
-          "model_name": "FloatProgressModel",
           "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
           "state": {
             "_dom_classes": [],
             "_model_module": "@jupyter-widgets/controls",
             "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
+            "_model_name": "HTMLModel",
             "_view_count": null,
             "_view_module": "@jupyter-widgets/controls",
             "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
+            "_view_name": "HTMLView",
             "description": "",
             "description_tooltip": null,
-            "layout": "IPY_MODEL_6c8d2ad0ffce4d2081369030a4c4304d",
-            "max": 4203,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_db761c9a410f4b8bb1b2ec88fb0b847a",
-            "value": 4203
+            "layout": "IPY_MODEL_976a880692e2476c8bf9e0f6a6822129",
+            "placeholder": "​",
+            "style": "IPY_MODEL_bec81b91b5bd4dd9a425ab7c65ccbb3c",
+            "value": "Map: 100%"
           }
         },
-        "eb5b4d35582542d1bdd0296817db1daf": {
+        "ca67aa97ba124742a9c3688e03f21f96": {
           "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
           "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "d0d203490e554fe5b62a2aca06ec2b85": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "FloatProgressModel",
           "state": {
             "_dom_classes": [],
             "_model_module": "@jupyter-widgets/controls",
             "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
+            "_model_name": "FloatProgressModel",
             "_view_count": null,
             "_view_module": "@jupyter-widgets/controls",
             "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
             "description": "",
             "description_tooltip": null,
-            "layout": "IPY_MODEL_7a10fc4726e6413397d474c3b5f187b9",
-            "placeholder": "​",
-            "style": "IPY_MODEL_82de00af0c6140c49a1ab5868158b93b",
-            "value": " 4.20k/4.20k [00:00&lt;00:00, 182kB/s]"
+            "layout": "IPY_MODEL_646fc70d8d214df5905d0783193312fa",
+            "max": 25000,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_958792a865f94e04afac06938ab615f9",
+            "value": 25000
           }
         },
-        "61a2564e98294a3b98f202ecb051e1ce": {
+        "d2f642b153b441b39d3e13c8dc291de5": {
           "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
           "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
           "state": {
             "_model_module": "@jupyter-widgets/base",
             "_model_module_version": "1.2.0",
@@ -2307,62 +2297,69 @@
             "width": null
           }
         },
-        "ab20b48f72ce4486a08ccea3f4624486": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
+        "db761c9a410f4b8bb1b2ec88fb0b847a": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "ProgressStyleModel",
           "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
             "_view_count": null,
             "_view_module": "@jupyter-widgets/base",
             "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
           }
         },
-        "ca67aa97ba124742a9c3688e03f21f96": {
+        "eb5b4d35582542d1bdd0296817db1daf": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_7a10fc4726e6413397d474c3b5f187b9",
+            "placeholder": "​",
+            "style": "IPY_MODEL_82de00af0c6140c49a1ab5868158b93b",
+            "value": " 4.20k/4.20k [00:00&lt;00:00, 182kB/s]"
+          }
+        },
+        "ee04263c2ef846e5b1ba93ad687e37b1": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HBoxModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_bde5f2c9b4ef4e8c82fddbdb83a67a1e",
+              "IPY_MODEL_1c4288674e7245e9b4697b55950731ef",
+              "IPY_MODEL_eb5b4d35582542d1bdd0296817db1daf"
+            ],
+            "layout": "IPY_MODEL_61a2564e98294a3b98f202ecb051e1ce"
+          }
+        },
+        "eec6fa9095a141ef95285777a970a390": {
           "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
           "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
           "state": {
             "_model_module": "@jupyter-widgets/controls",
             "_model_module_version": "1.5.0",
@@ -2374,10 +2371,10 @@
             "description_width": ""
           }
         },
-        "6c8d2ad0ffce4d2081369030a4c4304d": {
+        "f05ac4f5a4d44ec2b659a467f037978d": {
           "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
           "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
           "state": {
             "_model_module": "@jupyter-widgets/base",
             "_model_module_version": "1.2.0",
@@ -2426,10 +2423,31 @@
             "width": null
           }
         },
-        "db761c9a410f4b8bb1b2ec88fb0b847a": {
+        "f80427bc9bb34432a15c110c41552f1d": {
           "model_module": "@jupyter-widgets/controls",
-          "model_name": "ProgressStyleModel",
           "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_2f7a6c1f2e3a49c1b2b9df8b263e4437",
+            "placeholder": "​",
+            "style": "IPY_MODEL_eec6fa9095a141ef95285777a970a390",
+            "value": " 25000/25000 [01:33&lt;00:00, 204.73 examples/s]"
+          }
+        },
+        "fa20945c5d7a4089abba70a393de4173": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "ProgressStyleModel",
           "state": {
             "_model_module": "@jupyter-widgets/controls",
             "_model_module_version": "1.5.0",
@@ -2442,10 +2460,10 @@
             "description_width": ""
           }
         },
-        "7a10fc4726e6413397d474c3b5f187b9": {
+        "fd91bdfb63c9476195d08b56fcf8f503": {
           "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
           "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
           "state": {
             "_model_module": "@jupyter-widgets/base",
             "_model_module_version": "1.2.0",
@@ -2493,26 +2511,10 @@
             "visibility": null,
             "width": null
           }
-        },
-        "82de00af0c6140c49a1ab5868158b93b": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
         }
       }
-    },
-    "accelerator": "GPU"
+    }
   },
   "nbformat": 4,
   "nbformat_minor": 0
-}
\ No newline at end of file
+}

From 6888715d2da5d0344a1523ee8d5a46f3ef6c8023 Mon Sep 17 00:00:00 2001
From: Fedor Vitiugin <fedor.vitiugin@gmail.com>
Date: Thu, 25 Sep 2025 18:31:19 +0300
Subject: [PATCH 2/2] cleared outputs

---
 ex4_parameters.ipynb | 1040 +-----------------------------------------
 1 file changed, 12 insertions(+), 1028 deletions(-)

diff --git a/ex4_parameters.ipynb b/ex4_parameters.ipynb
index 50eede3..a030193 100644
--- a/ex4_parameters.ipynb
+++ b/ex4_parameters.ipynb
@@ -21,7 +21,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 6,
+      "execution_count": null,
       "metadata": {
         "id": "4pquj9Xoxaza"
       },
@@ -32,7 +32,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 7,
+      "execution_count": null,
       "metadata": {
         "id": "cQ63zw6BY7tn"
       },
@@ -56,7 +56,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 8,
+      "execution_count": null,
       "metadata": {
         "id": "5DKskTuoyCf-"
       },
@@ -82,7 +82,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 9,
+      "execution_count": null,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/",
@@ -115,44 +115,7 @@
         "id": "wjrAGcFtymJF",
         "outputId": "9a35e1d5-c074-4598-828f-71326bf24f87"
       },
-      "outputs": [
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "/usr/local/lib/python3.10/dist-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n",
-            "  warnings.warn(\n"
-          ]
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "6a6875eea30b4a64b87996f30d6b9b0e",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "Map:   0%|          | 0/25000 [00:00<?, ? examples/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "6d68fd610b8c48f08ee6f56101119802",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "Map:   0%|          | 0/25000 [00:00<?, ? examples/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        }
-      ],
+      "outputs": [],
       "source": [
         "import transformers\n",
         "\n",
@@ -186,7 +149,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 10,
+      "execution_count": null,
       "metadata": {
         "id": "9kyQQhu0_wep"
       },
@@ -273,7 +236,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 11,
+      "execution_count": null,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/",
@@ -295,22 +258,7 @@
         "id": "wBn6iLcMjXGr",
         "outputId": "2602f8eb-89cf-4052-cabb-c0363be850d6"
       },
-      "outputs": [
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "ee04263c2ef846e5b1ba93ad687e37b1",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        }
-      ],
+      "outputs": [],
       "source": [
         "import evaluate\n",
         "\n",
@@ -364,659 +312,7 @@
         "id": "nd_usta7WzTX",
         "outputId": "80a41eb7-ce0f-4e04-ab0c-ce56e2519bc0"
       },
-      "outputs": [
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "/usr/local/lib/python3.10/dist-packages/transformers/training_args.py:1525: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n",
-            "  warnings.warn(\n",
-            "max_steps is given, it will override any value given in num_train_epochs\n"
-          ]
-        },
-        {
-          "data": {
-            "text/html": [
-              "\n",
-              "    <div>\n",
-              "      \n",
-              "      <progress value='2500' max='2500' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-              "      [2500/2500 03:13, Epoch 0/1]\n",
-              "    </div>\n",
-              "    <table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              " <tr style=\"text-align: left;\">\n",
-              "      <th>Step</th>\n",
-              "      <th>Training Loss</th>\n",
-              "      <th>Validation Loss</th>\n",
-              "      <th>Accuracy</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <td>500</td>\n",
-              "      <td>0.964600</td>\n",
-              "      <td>0.964276</td>\n",
-              "      <td>0.500000</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>1000</td>\n",
-              "      <td>0.960000</td>\n",
-              "      <td>0.937627</td>\n",
-              "      <td>0.500000</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>1500</td>\n",
-              "      <td>0.949200</td>\n",
-              "      <td>0.919602</td>\n",
-              "      <td>0.500000</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>2000</td>\n",
-              "      <td>0.916000</td>\n",
-              "      <td>0.909444</td>\n",
-              "      <td>0.500000</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>2500</td>\n",
-              "      <td>0.906600</td>\n",
-              "      <td>0.906230</td>\n",
-              "      <td>0.500000</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table><p>"
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "text/html": [
-              "\n",
-              "    <div>\n",
-              "      \n",
-              "      <progress value='3125' max='3125' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-              "      [3125/3125 00:14]\n",
-              "    </div>\n",
-              "    "
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "/usr/local/lib/python3.10/dist-packages/transformers/training_args.py:1525: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n",
-            "  warnings.warn(\n",
-            "max_steps is given, it will override any value given in num_train_epochs\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Learning rate: 5e-06 Accuracy: 0.5\n"
-          ]
-        },
-        {
-          "data": {
-            "text/html": [
-              "\n",
-              "    <div>\n",
-              "      \n",
-              "      <progress value='2500' max='2500' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-              "      [2500/2500 02:34, Epoch 0/1]\n",
-              "    </div>\n",
-              "    <table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              " <tr style=\"text-align: left;\">\n",
-              "      <th>Step</th>\n",
-              "      <th>Training Loss</th>\n",
-              "      <th>Validation Loss</th>\n",
-              "      <th>Accuracy</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <td>500</td>\n",
-              "      <td>0.696700</td>\n",
-              "      <td>0.695665</td>\n",
-              "      <td>0.509920</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>1000</td>\n",
-              "      <td>0.695600</td>\n",
-              "      <td>0.693803</td>\n",
-              "      <td>0.518040</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>1500</td>\n",
-              "      <td>0.693400</td>\n",
-              "      <td>0.692734</td>\n",
-              "      <td>0.523360</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>2000</td>\n",
-              "      <td>0.691800</td>\n",
-              "      <td>0.691876</td>\n",
-              "      <td>0.525560</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>2500</td>\n",
-              "      <td>0.691800</td>\n",
-              "      <td>0.691665</td>\n",
-              "      <td>0.526120</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table><p>"
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "text/html": [
-              "\n",
-              "    <div>\n",
-              "      \n",
-              "      <progress value='3125' max='3125' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-              "      [3125/3125 00:12]\n",
-              "    </div>\n",
-              "    "
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "/usr/local/lib/python3.10/dist-packages/transformers/training_args.py:1525: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n",
-            "  warnings.warn(\n",
-            "max_steps is given, it will override any value given in num_train_epochs\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Learning rate: 5e-05 Accuracy: 0.52612\n"
-          ]
-        },
-        {
-          "data": {
-            "text/html": [
-              "\n",
-              "    <div>\n",
-              "      \n",
-              "      <progress value='2500' max='2500' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-              "      [2500/2500 02:58, Epoch 0/1]\n",
-              "    </div>\n",
-              "    <table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              " <tr style=\"text-align: left;\">\n",
-              "      <th>Step</th>\n",
-              "      <th>Training Loss</th>\n",
-              "      <th>Validation Loss</th>\n",
-              "      <th>Accuracy</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <td>500</td>\n",
-              "      <td>0.688100</td>\n",
-              "      <td>0.673649</td>\n",
-              "      <td>0.578840</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>1000</td>\n",
-              "      <td>0.664200</td>\n",
-              "      <td>0.652278</td>\n",
-              "      <td>0.599360</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>1500</td>\n",
-              "      <td>0.638000</td>\n",
-              "      <td>0.629875</td>\n",
-              "      <td>0.639240</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>2000</td>\n",
-              "      <td>0.619400</td>\n",
-              "      <td>0.619752</td>\n",
-              "      <td>0.647800</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>2500</td>\n",
-              "      <td>0.616800</td>\n",
-              "      <td>0.616634</td>\n",
-              "      <td>0.653120</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table><p>"
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "text/html": [
-              "\n",
-              "    <div>\n",
-              "      \n",
-              "      <progress value='3125' max='3125' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-              "      [3125/3125 00:12]\n",
-              "    </div>\n",
-              "    "
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "/usr/local/lib/python3.10/dist-packages/transformers/training_args.py:1525: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n",
-            "  warnings.warn(\n",
-            "max_steps is given, it will override any value given in num_train_epochs\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Learning rate: 0.0005 Accuracy: 0.65312\n"
-          ]
-        },
-        {
-          "data": {
-            "text/html": [
-              "\n",
-              "    <div>\n",
-              "      \n",
-              "      <progress value='2500' max='2500' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-              "      [2500/2500 02:25, Epoch 0/1]\n",
-              "    </div>\n",
-              "    <table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              " <tr style=\"text-align: left;\">\n",
-              "      <th>Step</th>\n",
-              "      <th>Training Loss</th>\n",
-              "      <th>Validation Loss</th>\n",
-              "      <th>Accuracy</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <td>500</td>\n",
-              "      <td>0.686500</td>\n",
-              "      <td>0.637228</td>\n",
-              "      <td>0.626240</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>1000</td>\n",
-              "      <td>0.615900</td>\n",
-              "      <td>0.664938</td>\n",
-              "      <td>0.622760</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>1500</td>\n",
-              "      <td>0.582600</td>\n",
-              "      <td>0.574918</td>\n",
-              "      <td>0.701320</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>2000</td>\n",
-              "      <td>0.549100</td>\n",
-              "      <td>0.561285</td>\n",
-              "      <td>0.706720</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>2500</td>\n",
-              "      <td>0.549200</td>\n",
-              "      <td>0.534774</td>\n",
-              "      <td>0.726720</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table><p>"
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "text/html": [
-              "\n",
-              "    <div>\n",
-              "      \n",
-              "      <progress value='3125' max='3125' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-              "      [3125/3125 00:12]\n",
-              "    </div>\n",
-              "    "
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "/usr/local/lib/python3.10/dist-packages/transformers/training_args.py:1525: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n",
-            "  warnings.warn(\n",
-            "max_steps is given, it will override any value given in num_train_epochs\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Learning rate: 0.005 Accuracy: 0.72672\n"
-          ]
-        },
-        {
-          "data": {
-            "text/html": [
-              "\n",
-              "    <div>\n",
-              "      \n",
-              "      <progress value='2500' max='2500' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-              "      [2500/2500 03:04, Epoch 0/1]\n",
-              "    </div>\n",
-              "    <table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              " <tr style=\"text-align: left;\">\n",
-              "      <th>Step</th>\n",
-              "      <th>Training Loss</th>\n",
-              "      <th>Validation Loss</th>\n",
-              "      <th>Accuracy</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <td>500</td>\n",
-              "      <td>0.686500</td>\n",
-              "      <td>0.637228</td>\n",
-              "      <td>0.626240</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>1000</td>\n",
-              "      <td>0.615900</td>\n",
-              "      <td>0.664938</td>\n",
-              "      <td>0.622760</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>1500</td>\n",
-              "      <td>0.582600</td>\n",
-              "      <td>0.574918</td>\n",
-              "      <td>0.701320</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>2000</td>\n",
-              "      <td>0.549100</td>\n",
-              "      <td>0.561285</td>\n",
-              "      <td>0.706720</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>2500</td>\n",
-              "      <td>0.549200</td>\n",
-              "      <td>0.534774</td>\n",
-              "      <td>0.726720</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table><p>"
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "text/html": [
-              "\n",
-              "    <div>\n",
-              "      \n",
-              "      <progress value='3125' max='3125' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-              "      [3125/3125 00:12]\n",
-              "    </div>\n",
-              "    "
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "/usr/local/lib/python3.10/dist-packages/transformers/training_args.py:1525: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n",
-            "  warnings.warn(\n",
-            "max_steps is given, it will override any value given in num_train_epochs\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Learning rate: 0.005 Accuracy: 0.72672\n"
-          ]
-        },
-        {
-          "data": {
-            "text/html": [
-              "\n",
-              "    <div>\n",
-              "      \n",
-              "      <progress value='2500' max='2500' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-              "      [2500/2500 02:27, Epoch 0/1]\n",
-              "    </div>\n",
-              "    <table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              " <tr style=\"text-align: left;\">\n",
-              "      <th>Step</th>\n",
-              "      <th>Training Loss</th>\n",
-              "      <th>Validation Loss</th>\n",
-              "      <th>Accuracy</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <td>500</td>\n",
-              "      <td>1.332000</td>\n",
-              "      <td>1.047678</td>\n",
-              "      <td>0.551920</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>1000</td>\n",
-              "      <td>1.246800</td>\n",
-              "      <td>0.884091</td>\n",
-              "      <td>0.574600</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>1500</td>\n",
-              "      <td>0.988700</td>\n",
-              "      <td>0.730061</td>\n",
-              "      <td>0.610720</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>2000</td>\n",
-              "      <td>0.837300</td>\n",
-              "      <td>0.830893</td>\n",
-              "      <td>0.609360</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>2500</td>\n",
-              "      <td>0.614600</td>\n",
-              "      <td>0.571852</td>\n",
-              "      <td>0.713400</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table><p>"
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "text/html": [
-              "\n",
-              "    <div>\n",
-              "      \n",
-              "      <progress value='3125' max='3125' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-              "      [3125/3125 00:12]\n",
-              "    </div>\n",
-              "    "
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "/usr/local/lib/python3.10/dist-packages/transformers/training_args.py:1525: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n",
-            "  warnings.warn(\n",
-            "max_steps is given, it will override any value given in num_train_epochs\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Learning rate: 0.05 Accuracy: 0.7134\n"
-          ]
-        },
-        {
-          "data": {
-            "text/html": [
-              "\n",
-              "    <div>\n",
-              "      \n",
-              "      <progress value='2500' max='2500' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-              "      [2500/2500 02:33, Epoch 0/1]\n",
-              "    </div>\n",
-              "    <table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              " <tr style=\"text-align: left;\">\n",
-              "      <th>Step</th>\n",
-              "      <th>Training Loss</th>\n",
-              "      <th>Validation Loss</th>\n",
-              "      <th>Accuracy</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <td>500</td>\n",
-              "      <td>373.884900</td>\n",
-              "      <td>464.389679</td>\n",
-              "      <td>0.501080</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>1000</td>\n",
-              "      <td>434.300200</td>\n",
-              "      <td>237.639359</td>\n",
-              "      <td>0.523120</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>1500</td>\n",
-              "      <td>248.097100</td>\n",
-              "      <td>204.130096</td>\n",
-              "      <td>0.504400</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>2000</td>\n",
-              "      <td>118.084200</td>\n",
-              "      <td>35.678982</td>\n",
-              "      <td>0.493760</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>2500</td>\n",
-              "      <td>30.116600</td>\n",
-              "      <td>1.654839</td>\n",
-              "      <td>0.519800</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table><p>"
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "text/html": [
-              "\n",
-              "    <div>\n",
-              "      \n",
-              "      <progress value='3125' max='3125' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-              "      [3125/3125 00:12]\n",
-              "    </div>\n",
-              "    "
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Learning rate: 0.5 Accuracy: 0.5198\n"
-          ]
-        }
-      ],
+      "outputs": [],
       "source": [
         "for lr in [0.000005, 0.00005, 0.0005, 0.005, 0.05, 0.5]:\n",
         "\n",
@@ -1074,7 +370,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 13,
+      "execution_count": null,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
@@ -1082,39 +378,7 @@
         "id": "Hes_HBvOmrKD",
         "outputId": "1e5f78e0-116b-4a95-f731-a4efcfd2a353"
       },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Collecting optuna\n",
-            "  Downloading optuna-4.0.0-py3-none-any.whl.metadata (16 kB)\n",
-            "Collecting alembic>=1.5.0 (from optuna)\n",
-            "  Downloading alembic-1.13.2-py3-none-any.whl.metadata (7.4 kB)\n",
-            "Collecting colorlog (from optuna)\n",
-            "  Downloading colorlog-6.8.2-py3-none-any.whl.metadata (10 kB)\n",
-            "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from optuna) (1.26.4)\n",
-            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from optuna) (24.1)\n",
-            "Requirement already satisfied: sqlalchemy>=1.3.0 in /usr/local/lib/python3.10/dist-packages (from optuna) (2.0.34)\n",
-            "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from optuna) (4.66.5)\n",
-            "Requirement already satisfied: PyYAML in /usr/local/lib/python3.10/dist-packages (from optuna) (6.0.2)\n",
-            "Collecting Mako (from alembic>=1.5.0->optuna)\n",
-            "  Downloading Mako-1.3.5-py3-none-any.whl.metadata (2.9 kB)\n",
-            "Requirement already satisfied: typing-extensions>=4 in /usr/local/lib/python3.10/dist-packages (from alembic>=1.5.0->optuna) (4.12.2)\n",
-            "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from sqlalchemy>=1.3.0->optuna) (3.1.0)\n",
-            "Requirement already satisfied: MarkupSafe>=0.9.2 in /usr/local/lib/python3.10/dist-packages (from Mako->alembic>=1.5.0->optuna) (2.1.5)\n",
-            "Downloading optuna-4.0.0-py3-none-any.whl (362 kB)\n",
-            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m362.8/362.8 kB\u001b[0m \u001b[31m6.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hDownloading alembic-1.13.2-py3-none-any.whl (232 kB)\n",
-            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m233.0/233.0 kB\u001b[0m \u001b[31m16.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hDownloading colorlog-6.8.2-py3-none-any.whl (11 kB)\n",
-            "Downloading Mako-1.3.5-py3-none-any.whl (78 kB)\n",
-            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m78.6/78.6 kB\u001b[0m \u001b[31m5.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hInstalling collected packages: Mako, colorlog, alembic, optuna\n",
-            "Successfully installed Mako-1.3.5 alembic-1.13.2 colorlog-6.8.2 optuna-4.0.0\n"
-          ]
-        }
-      ],
+      "outputs": [],
       "source": [
         "!pip install optuna"
       ]
@@ -1130,287 +394,7 @@
         "id": "Ag66TkGumvSU",
         "outputId": "1d0ad2ac-4874-41ef-db7f-c873b23468f1"
       },
-      "outputs": [
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "/usr/local/lib/python3.10/dist-packages/transformers/training_args.py:1525: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n",
-            "  warnings.warn(\n",
-            "max_steps is given, it will override any value given in num_train_epochs\n"
-          ]
-        },
-        {
-          "data": {
-            "text/html": [
-              "\n",
-              "    <div>\n",
-              "      \n",
-              "      <progress value='2500' max='2500' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-              "      [2500/2500 03:21, Epoch 0/1]\n",
-              "    </div>\n",
-              "    <table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              " <tr style=\"text-align: left;\">\n",
-              "      <th>Step</th>\n",
-              "      <th>Training Loss</th>\n",
-              "      <th>Validation Loss</th>\n",
-              "      <th>Accuracy</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <td>500</td>\n",
-              "      <td>0.672200</td>\n",
-              "      <td>0.611683</td>\n",
-              "      <td>0.656320</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>1000</td>\n",
-              "      <td>0.609000</td>\n",
-              "      <td>0.637774</td>\n",
-              "      <td>0.634880</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>1500</td>\n",
-              "      <td>0.575700</td>\n",
-              "      <td>0.579635</td>\n",
-              "      <td>0.694880</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>2000</td>\n",
-              "      <td>0.552900</td>\n",
-              "      <td>0.561009</td>\n",
-              "      <td>0.709440</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>2500</td>\n",
-              "      <td>0.543300</td>\n",
-              "      <td>0.546472</td>\n",
-              "      <td>0.720720</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table><p>"
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "text/html": [
-              "\n",
-              "    <div>\n",
-              "      \n",
-              "      <progress value='3125' max='3125' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-              "      [3125/3125 00:13]\n",
-              "    </div>\n",
-              "    "
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "/usr/local/lib/python3.10/dist-packages/transformers/training_args.py:1525: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n",
-            "  warnings.warn(\n",
-            "max_steps is given, it will override any value given in num_train_epochs\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Learning rate: 0.0032891344286570525 Filters: 10 Accuracy: 0.72072\n"
-          ]
-        },
-        {
-          "data": {
-            "text/html": [
-              "\n",
-              "    <div>\n",
-              "      \n",
-              "      <progress value='2500' max='2500' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-              "      [2500/2500 03:13, Epoch 0/1]\n",
-              "    </div>\n",
-              "    <table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              " <tr style=\"text-align: left;\">\n",
-              "      <th>Step</th>\n",
-              "      <th>Training Loss</th>\n",
-              "      <th>Validation Loss</th>\n",
-              "      <th>Accuracy</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <td>500</td>\n",
-              "      <td>0.685100</td>\n",
-              "      <td>0.636767</td>\n",
-              "      <td>0.627360</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>1000</td>\n",
-              "      <td>0.606900</td>\n",
-              "      <td>0.614509</td>\n",
-              "      <td>0.656960</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>1500</td>\n",
-              "      <td>0.565400</td>\n",
-              "      <td>0.556262</td>\n",
-              "      <td>0.711400</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>2000</td>\n",
-              "      <td>0.521800</td>\n",
-              "      <td>0.534269</td>\n",
-              "      <td>0.724560</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>2500</td>\n",
-              "      <td>0.521600</td>\n",
-              "      <td>0.522814</td>\n",
-              "      <td>0.732280</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table><p>"
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "text/html": [
-              "\n",
-              "    <div>\n",
-              "      \n",
-              "      <progress value='3125' max='3125' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-              "      [3125/3125 00:12]\n",
-              "    </div>\n",
-              "    "
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "/usr/local/lib/python3.10/dist-packages/transformers/training_args.py:1525: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n",
-            "  warnings.warn(\n",
-            "max_steps is given, it will override any value given in num_train_epochs\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Learning rate: 0.002434791974958158 Filters: 16 Accuracy: 0.73228\n"
-          ]
-        },
-        {
-          "data": {
-            "text/html": [
-              "\n",
-              "    <div>\n",
-              "      \n",
-              "      <progress value='2500' max='2500' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-              "      [2500/2500 03:04, Epoch 0/1]\n",
-              "    </div>\n",
-              "    <table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              " <tr style=\"text-align: left;\">\n",
-              "      <th>Step</th>\n",
-              "      <th>Training Loss</th>\n",
-              "      <th>Validation Loss</th>\n",
-              "      <th>Accuracy</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <td>500</td>\n",
-              "      <td>1.604300</td>\n",
-              "      <td>0.883112</td>\n",
-              "      <td>0.592320</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>1000</td>\n",
-              "      <td>1.514200</td>\n",
-              "      <td>1.527417</td>\n",
-              "      <td>0.613640</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>1500</td>\n",
-              "      <td>1.391500</td>\n",
-              "      <td>0.802837</td>\n",
-              "      <td>0.654840</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>2000</td>\n",
-              "      <td>0.769500</td>\n",
-              "      <td>0.660751</td>\n",
-              "      <td>0.682480</td>\n",
-              "    </tr>\n",
-              "    <tr>\n",
-              "      <td>2500</td>\n",
-              "      <td>0.618300</td>\n",
-              "      <td>0.536751</td>\n",
-              "      <td>0.733200</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table><p>"
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "text/html": [
-              "\n",
-              "    <div>\n",
-              "      \n",
-              "      <progress value='3125' max='3125' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-              "      [3125/3125 00:13]\n",
-              "    </div>\n",
-              "    "
-            ],
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Learning rate: 0.03601436163120907 Filters: 24 Accuracy: 0.7332\n"
-          ]
-        }
-      ],
+      "outputs": [],
       "source": [
         "import optuna\n",
         "\n",

Step	Training Loss	Validation Loss	Accuracy
500	0.964600	0.964276	0.500000
1000	0.960000	0.937627	0.500000
1500	0.949200	0.919602	0.500000
2000	0.916000	0.909444	0.500000
2500	0.906600	0.906230	0.500000
Step	Training Loss	Validation Loss	Accuracy
500	0.696700	0.695665	0.509920
1000	0.695600	0.693803	0.518040
1500	0.693400	0.692734	0.523360
2000	0.691800	0.691876	0.525560
2500	0.691800	0.691665	0.526120
Step	Training Loss	Validation Loss	Accuracy
500	0.688100	0.673649	0.578840
1000	0.664200	0.652278	0.599360
1500	0.638000	0.629875	0.639240
2000	0.619400	0.619752	0.647800
2500	0.616800	0.616634	0.653120
Step	Training Loss	Validation Loss	Accuracy
500	0.686500	0.637228	0.626240
1000	0.615900	0.664938	0.622760
1500	0.582600	0.574918	0.701320
2000	0.549100	0.561285	0.706720
2500	0.549200	0.534774	0.726720
Step	Training Loss	Validation Loss	Accuracy
500	1.332000	1.047678	0.551920
1000	1.246800	0.884091	0.574600
1500	0.988700	0.730061	0.610720
2000	0.837300	0.830893	0.609360
2500	0.614600	0.571852	0.713400
Step	Training Loss	Validation Loss	Accuracy
500	373.884900	464.389679	0.501080
1000	434.300200	237.639359	0.523120
1500	248.097100	204.130096	0.504400
2000	118.084200	35.678982	0.493760
2500	30.116600	1.654839	0.519800
Step	Training Loss	Validation Loss	Accuracy
500	0.672200	0.611683	0.656320
1000	0.609000	0.637774	0.634880
1500	0.575700	0.579635	0.694880
2000	0.552900	0.561009	0.709440
2500	0.543300	0.546472	0.720720
Step	Training Loss	Validation Loss	Accuracy
500	0.685100	0.636767	0.627360
1000	0.606900	0.614509	0.656960
1500	0.565400	0.556262	0.711400
2000	0.521800	0.534269	0.724560
2500	0.521600	0.522814	0.732280
Step	Training Loss	Validation Loss	Accuracy
500	1.604300	0.883112	0.592320
1000	1.514200	1.527417	0.613640
1500	1.391500	0.802837	0.654840
2000	0.769500	0.660751	0.682480
2500	0.618300	0.536751	0.733200