diff --git a/notebook-tutorials/introduction_to_optimum_graphcore.ipynb b/notebook-tutorials/introduction_to_optimum_graphcore.ipynb index 1fdf835..b71f193 100644 --- a/notebook-tutorials/introduction_to_optimum_graphcore.ipynb +++ b/notebook-tutorials/introduction_to_optimum_graphcore.ipynb @@ -59,52 +59,15 @@ }, { "cell_type": "code", - "execution_count": 3, - "id": "75e68205-8fdb-4e5c-a14b-23f000a90cbe", - "metadata": { - "execution": { - "iopub.execute_input": "2022-08-03T22:01:26.633078Z", - "iopub.status.busy": "2022-08-03T22:01:26.632737Z", - "iopub.status.idle": "2022-08-03T22:01:29.749125Z", - "shell.execute_reply": "2022-08-03T22:01:29.748057Z", - "shell.execute_reply.started": "2022-08-03T22:01:26.633053Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: pip in /usr/local/lib/python3.8/dist-packages (22.2.1)\n", - "Collecting pip\n", - " Downloading pip-22.2.2-py3-none-any.whl (2.0 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m43.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", - "\u001b[?25hInstalling collected packages: pip\n", - " Attempting uninstall: pip\n", - " Found existing installation: pip 22.2.1\n", - " Uninstalling pip-22.2.1:\n", - " Successfully uninstalled pip-22.2.1\n", - "Successfully installed pip-22.2.2\n", - "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", - "\u001b[0mNote: you may need to restart the kernel to use updated packages.\n" - ] - } - ], - "source": [ - "%pip install --upgrade pip" - ] - }, - { - "cell_type": "code", - "execution_count": 4, + "execution_count": 1, "id": "e087c904", "metadata": { "execution": { - "iopub.execute_input": "2022-08-03T22:01:33.210191Z", - "iopub.status.busy": "2022-08-03T22:01:33.209978Z", - "iopub.status.idle": "2022-08-03T22:01:35.134933Z", - "shell.execute_reply": "2022-08-03T22:01:35.133939Z", - "shell.execute_reply.started": "2022-08-03T22:01:33.210168Z" + "iopub.execute_input": "2022-08-09T23:49:49.117204Z", + "iopub.status.busy": "2022-08-09T23:49:49.116854Z", + "iopub.status.idle": "2022-08-09T23:50:18.671513Z", + "shell.execute_reply": "2022-08-09T23:50:18.670415Z", + "shell.execute_reply.started": "2022-08-09T23:49:49.117136Z" } }, "outputs": [ @@ -112,50 +75,123 @@ "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: optimum[graphcore] in /usr/local/lib/python3.8/dist-packages (1.3.0)\n", - "Requirement already satisfied: sympy in /usr/local/lib/python3.8/dist-packages (from optimum[graphcore]) (1.10.1)\n", - "Requirement already satisfied: torch>=1.9 in /usr/local/lib/python3.8/dist-packages (from optimum[graphcore]) (1.10.0+cpu)\n", + "Collecting optimum[graphcore]\n", + " Downloading optimum-1.3.0.tar.gz (93 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m93.4/93.4 kB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Installing build dependencies ... \u001b[?25ldone\n", + "\u001b[?25h Getting requirements to build wheel ... \u001b[?25ldone\n", + "\u001b[?25h Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n", + "\u001b[?25hCollecting huggingface-hub>=0.4.0\n", + " Downloading huggingface_hub-0.8.1-py3-none-any.whl (101 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m101.5/101.5 kB\u001b[0m \u001b[31m34.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting numpy\n", + " Downloading numpy-1.23.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.1/17.1 MB\u001b[0m \u001b[31m77.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hCollecting coloredlogs\n", + " Downloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m14.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting sympy\n", + " Downloading sympy-1.10.1-py3-none-any.whl (6.4 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.4/6.4 MB\u001b[0m \u001b[31m78.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hCollecting transformers[sentencepiece]>=4.18.0\n", + " Downloading transformers-4.21.1-py3-none-any.whl (4.7 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.7/4.7 MB\u001b[0m \u001b[31m80.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mta \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: torch>=1.9 in /usr/local/lib/python3.8/dist-packages (from optimum[graphcore]) (1.10.0+cpu)\n", "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from optimum[graphcore]) (21.3)\n", - "Requirement already satisfied: coloredlogs in /usr/local/lib/python3.8/dist-packages (from optimum[graphcore]) (15.0.1)\n", - "Requirement already satisfied: huggingface-hub>=0.4.0 in /usr/local/lib/python3.8/dist-packages (from optimum[graphcore]) (0.8.1)\n", - "Requirement already satisfied: numpy in /usr/local/lib/python3.8/dist-packages (from optimum[graphcore]) (1.23.1)\n", - "Requirement already satisfied: transformers[sentencepiece]>=4.18.0 in /usr/local/lib/python3.8/dist-packages (from optimum[graphcore]) (4.21.0)\n", - "Requirement already satisfied: optimum-graphcore in /usr/local/lib/python3.8/dist-packages (from optimum[graphcore]) (0.3.1)\n", - "Requirement already satisfied: requests in /usr/lib/python3/dist-packages (from huggingface-hub>=0.4.0->optimum[graphcore]) (2.22.0)\n", - "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.8/dist-packages (from huggingface-hub>=0.4.0->optimum[graphcore]) (5.4.1)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.8/dist-packages (from huggingface-hub>=0.4.0->optimum[graphcore]) (3.7.1)\n", + "Collecting optimum-graphcore\n", + " Downloading optimum_graphcore-0.3.1-py3-none-any.whl (150 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m150.8/150.8 kB\u001b[0m \u001b[31m54.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting filelock\n", + " Downloading filelock-3.7.1-py3-none-any.whl (10 kB)\n", "Requirement already satisfied: tqdm in /usr/local/lib/python3.8/dist-packages (from huggingface-hub>=0.4.0->optimum[graphcore]) (4.64.0)\n", + "Requirement already satisfied: requests in /usr/lib/python3/dist-packages (from huggingface-hub>=0.4.0->optimum[graphcore]) (2.22.0)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.8/dist-packages (from huggingface-hub>=0.4.0->optimum[graphcore]) (4.3.0)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.8/dist-packages (from huggingface-hub>=0.4.0->optimum[graphcore]) (5.4.1)\n", "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.8/dist-packages (from packaging->optimum[graphcore]) (3.0.9)\n", - "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.8/dist-packages (from transformers[sentencepiece]>=4.18.0->optimum[graphcore]) (2022.7.25)\n", - "Requirement already satisfied: tokenizers!=0.11.3,<0.13,>=0.11.1 in /usr/local/lib/python3.8/dist-packages (from transformers[sentencepiece]>=4.18.0->optimum[graphcore]) (0.12.1)\n", - "Requirement already satisfied: protobuf<=3.20.1 in /usr/local/lib/python3.8/dist-packages (from transformers[sentencepiece]>=4.18.0->optimum[graphcore]) (3.20.1)\n", - "Requirement already satisfied: sentencepiece!=0.1.92,>=0.1.91 in /usr/local/lib/python3.8/dist-packages (from transformers[sentencepiece]>=4.18.0->optimum[graphcore]) (0.1.96)\n", - "Requirement already satisfied: humanfriendly>=9.1 in /usr/local/lib/python3.8/dist-packages (from coloredlogs->optimum[graphcore]) (10.0)\n", - "Requirement already satisfied: scipy in /usr/local/lib/python3.8/dist-packages (from optimum-graphcore->optimum[graphcore]) (1.9.0)\n", - "Requirement already satisfied: pillow in /usr/local/lib/python3.8/dist-packages (from optimum-graphcore->optimum[graphcore]) (9.2.0)\n", - "Requirement already satisfied: datasets in /usr/local/lib/python3.8/dist-packages (from optimum-graphcore->optimum[graphcore]) (2.4.0)\n", - "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.8/dist-packages (from sympy->optimum[graphcore]) (1.2.1)\n", - "Requirement already satisfied: fsspec[http]>=2021.11.1 in /usr/local/lib/python3.8/dist-packages (from datasets->optimum-graphcore->optimum[graphcore]) (2022.7.1)\n", - "Requirement already satisfied: dill<0.3.6 in /usr/local/lib/python3.8/dist-packages (from datasets->optimum-graphcore->optimum[graphcore]) (0.3.5.1)\n", - "Requirement already satisfied: responses<0.19 in /usr/local/lib/python3.8/dist-packages (from datasets->optimum-graphcore->optimum[graphcore]) (0.18.0)\n", - "Requirement already satisfied: xxhash in /usr/local/lib/python3.8/dist-packages (from datasets->optimum-graphcore->optimum[graphcore]) (3.0.0)\n", - "Requirement already satisfied: aiohttp in /usr/local/lib/python3.8/dist-packages (from datasets->optimum-graphcore->optimum[graphcore]) (3.8.1)\n", - "Requirement already satisfied: pyarrow>=6.0.0 in /usr/local/lib/python3.8/dist-packages (from datasets->optimum-graphcore->optimum[graphcore]) (9.0.0)\n", - "Requirement already satisfied: multiprocess in /usr/local/lib/python3.8/dist-packages (from datasets->optimum-graphcore->optimum[graphcore]) (0.70.13)\n", - "Requirement already satisfied: pandas in /usr/local/lib/python3.8/dist-packages (from datasets->optimum-graphcore->optimum[graphcore]) (1.4.3)\n", - "Requirement already satisfied: urllib3>=1.25.10 in /usr/local/lib/python3.8/dist-packages (from responses<0.19->datasets->optimum-graphcore->optimum[graphcore]) (1.26.11)\n", - "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets->optimum-graphcore->optimum[graphcore]) (1.8.1)\n", - "Requirement already satisfied: charset-normalizer<3.0,>=2.0 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets->optimum-graphcore->optimum[graphcore]) (2.1.0)\n", - "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets->optimum-graphcore->optimum[graphcore]) (1.2.0)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets->optimum-graphcore->optimum[graphcore]) (6.0.2)\n", - "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets->optimum-graphcore->optimum[graphcore]) (22.1.0)\n", - "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets->optimum-graphcore->optimum[graphcore]) (4.0.2)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets->optimum-graphcore->optimum[graphcore]) (1.3.1)\n", + "Collecting regex!=2019.12.17\n", + " Downloading regex-2022.7.25-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (768 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m768.2/768.2 kB\u001b[0m \u001b[31m89.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting tokenizers!=0.11.3,<0.13,>=0.11.1\n", + " Downloading tokenizers-0.12.1-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.6/6.6 MB\u001b[0m \u001b[31m21.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hCollecting sentencepiece!=0.1.92,>=0.1.91\n", + " Downloading sentencepiece-0.1.97-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m74.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting protobuf<=3.20.1\n", + " Downloading protobuf-3.20.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl (1.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.0/1.0 MB\u001b[0m \u001b[31m87.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting humanfriendly>=9.1\n", + " Downloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m36.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting pillow\n", + " Downloading Pillow-9.2.0-cp38-cp38-manylinux_2_28_x86_64.whl (3.2 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.2/3.2 MB\u001b[0m \u001b[31m82.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m\n", + "\u001b[?25hCollecting datasets\n", + " Downloading datasets-2.4.0-py3-none-any.whl (365 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m365.7/365.7 kB\u001b[0m \u001b[31m83.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting scipy\n", + " Downloading scipy-1.9.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (43.4 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.4/43.4 MB\u001b[0m \u001b[31m52.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hCollecting mpmath>=0.19\n", + " Downloading mpmath-1.2.1-py3-none-any.whl (532 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m532.6/532.6 kB\u001b[0m \u001b[31m94.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting aiohttp\n", + " Downloading aiohttp-3.8.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.3 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m88.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting pandas\n", + " Downloading pandas-1.4.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (11.7 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m11.7/11.7 MB\u001b[0m \u001b[31m81.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hCollecting dill<0.3.6\n", + " Downloading dill-0.3.5.1-py2.py3-none-any.whl (95 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m95.8/95.8 kB\u001b[0m \u001b[31m38.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting xxhash\n", + " Downloading xxhash-3.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (212 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.1/212.1 kB\u001b[0m \u001b[31m64.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting fsspec[http]>=2021.11.1\n", + " Downloading fsspec-2022.7.1-py3-none-any.whl (141 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m141.2/141.2 kB\u001b[0m \u001b[31m52.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting pyarrow>=6.0.0\n", + " Downloading pyarrow-9.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (35.3 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m35.3/35.3 MB\u001b[0m \u001b[31m49.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hCollecting responses<0.19\n", + " Downloading responses-0.18.0-py3-none-any.whl (38 kB)\n", + "Collecting multiprocess\n", + " Downloading multiprocess-0.70.13-py38-none-any.whl (131 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m131.4/131.4 kB\u001b[0m \u001b[31m51.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting urllib3>=1.25.10\n", + " Downloading urllib3-1.26.11-py2.py3-none-any.whl (139 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m139.9/139.9 kB\u001b[0m \u001b[31m46.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets->optimum-graphcore->optimum[graphcore]) (22.1.0)\n", + "Collecting async-timeout<5.0,>=4.0.0a3\n", + " Downloading async_timeout-4.0.2-py3-none-any.whl (5.8 kB)\n", + "Collecting aiosignal>=1.1.2\n", + " Downloading aiosignal-1.2.0-py3-none-any.whl (8.2 kB)\n", + "Collecting yarl<2.0,>=1.0\n", + " Downloading yarl-1.8.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (262 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m262.1/262.1 kB\u001b[0m \u001b[31m67.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting multidict<7.0,>=4.5\n", + " Downloading multidict-6.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (121 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m121.3/121.3 kB\u001b[0m \u001b[31m37.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting charset-normalizer<3.0,>=2.0\n", + " Downloading charset_normalizer-2.1.0-py3-none-any.whl (39 kB)\n", + "Collecting frozenlist>=1.1.1\n", + " Downloading frozenlist-1.3.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (161 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m161.3/161.3 kB\u001b[0m \u001b[31m55.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.8/dist-packages (from pandas->datasets->optimum-graphcore->optimum[graphcore]) (2022.1)\n", "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.8/dist-packages (from pandas->datasets->optimum-graphcore->optimum[graphcore]) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.8/dist-packages (from pandas->datasets->optimum-graphcore->optimum[graphcore]) (2022.1)\n", "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.8.1->pandas->datasets->optimum-graphcore->optimum[graphcore]) (1.14.0)\n", "Requirement already satisfied: idna>=2.0 in /usr/lib/python3/dist-packages (from yarl<2.0,>=1.0->aiohttp->datasets->optimum-graphcore->optimum[graphcore]) (2.8)\n", + "Building wheels for collected packages: optimum\n", + " Building wheel for optimum (pyproject.toml) ... \u001b[?25ldone\n", + "\u001b[?25h Created wheel for optimum: filename=optimum-1.3.0-py3-none-any.whl size=116103 sha256=b04891910b1c1fcc4f90dc845905c05a3cf1c411edbaf66a51a7385dc8010e04\n", + " Stored in directory: /root/.cache/pip/wheels/a8/49/a9/ab524a92fe1551a7bb40122388362379df296e7997819e1d76\n", + "Successfully built optimum\n", + "Installing collected packages: tokenizers, sentencepiece, mpmath, xxhash, urllib3, sympy, regex, protobuf, pillow, numpy, multidict, humanfriendly, fsspec, frozenlist, filelock, dill, charset-normalizer, async-timeout, yarl, scipy, responses, pyarrow, pandas, multiprocess, huggingface-hub, coloredlogs, aiosignal, transformers, aiohttp, optimum, datasets, optimum-graphcore\n", + " Attempting uninstall: urllib3\n", + " Found existing installation: urllib3 1.25.8\n", + " Uninstalling urllib3-1.25.8:\n", + " Successfully uninstalled urllib3-1.25.8\n", + "Successfully installed aiohttp-3.8.1 aiosignal-1.2.0 async-timeout-4.0.2 charset-normalizer-2.1.0 coloredlogs-15.0.1 datasets-2.4.0 dill-0.3.5.1 filelock-3.7.1 frozenlist-1.3.1 fsspec-2022.7.1 huggingface-hub-0.8.1 humanfriendly-10.0 mpmath-1.2.1 multidict-6.0.2 multiprocess-0.70.13 numpy-1.23.1 optimum-1.3.0 optimum-graphcore-0.3.1 pandas-1.4.3 pillow-9.2.0 protobuf-3.20.1 pyarrow-9.0.0 regex-2022.7.25 responses-0.18.0 scipy-1.9.0 sentencepiece-0.1.97 sympy-1.10.1 tokenizers-0.12.1 transformers-4.21.1 urllib3-1.26.11 xxhash-3.0.0 yarl-1.8.1\n", "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", "\u001b[0mNote: you may need to restart the kernel to use updated packages.\n" ] @@ -175,15 +211,15 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 2, "id": "57c2e987-a031-4278-9bba-2916ef535583", "metadata": { "execution": { - "iopub.execute_input": "2022-08-03T22:03:38.969839Z", - "iopub.status.busy": "2022-08-03T22:03:38.969475Z", - "iopub.status.idle": "2022-08-03T22:03:48.741584Z", - "shell.execute_reply": "2022-08-03T22:03:48.740684Z", - "shell.execute_reply.started": "2022-08-03T22:03:38.969813Z" + "iopub.execute_input": "2022-08-09T23:50:18.673247Z", + "iopub.status.busy": "2022-08-09T23:50:18.673051Z", + "iopub.status.idle": "2022-08-09T23:50:28.354984Z", + "shell.execute_reply": "2022-08-09T23:50:28.354194Z", + "shell.execute_reply.started": "2022-08-09T23:50:18.673224Z" } }, "outputs": [ @@ -193,45 +229,45 @@ "text": [ "Collecting transformers==4.20.0\n", " Downloading transformers-4.20.0-py3-none-any.whl (4.4 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.4/4.4 MB\u001b[0m \u001b[31m41.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", - "\u001b[?25hCollecting pyyaml>=5.1\n", - " Downloading PyYAML-6.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (701 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m701.2/701.2 kB\u001b[0m \u001b[31m50.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.4/4.4 MB\u001b[0m \u001b[31m50.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", "\u001b[?25hCollecting numpy>=1.17\n", " Using cached numpy-1.23.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.1 MB)\n", - "Collecting requests\n", - " Downloading requests-2.28.1-py3-none-any.whl (62 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.8/62.8 kB\u001b[0m \u001b[31m23.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting packaging>=20.0\n", + "Collecting packaging>=20.0\n", " Downloading packaging-21.3-py3-none-any.whl (40 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m40.8/40.8 kB\u001b[0m \u001b[31m10.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m40.8/40.8 kB\u001b[0m \u001b[31m12.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting pyyaml>=5.1\n", + " Downloading PyYAML-6.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (701 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m701.2/701.2 kB\u001b[0m \u001b[31m86.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting tokenizers!=0.11.3,<0.13,>=0.11.1\n", " Using cached tokenizers-0.12.1-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.6 MB)\n", - "Collecting tqdm>=4.27\n", - " Downloading tqdm-4.64.0-py2.py3-none-any.whl (78 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m78.4/78.4 kB\u001b[0m \u001b[31m29.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting filelock\n", - " Using cached filelock-3.7.1-py3-none-any.whl (10 kB)\n", "Collecting regex!=2019.12.17\n", " Using cached regex-2022.7.25-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (768 kB)\n", "Collecting huggingface-hub<1.0,>=0.1.0\n", " Using cached huggingface_hub-0.8.1-py3-none-any.whl (101 kB)\n", - "Collecting typing-extensions>=3.7.4.3\n", + "Collecting filelock\n", + " Using cached filelock-3.7.1-py3-none-any.whl (10 kB)\n", + "Collecting tqdm>=4.27\n", + " Downloading tqdm-4.64.0-py2.py3-none-any.whl (78 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m78.4/78.4 kB\u001b[0m \u001b[31m27.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting requests\n", + " Downloading requests-2.28.1-py3-none-any.whl (62 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.8/62.8 kB\u001b[0m \u001b[31m19.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting typing-extensions>=3.7.4.3\n", " Downloading typing_extensions-4.3.0-py3-none-any.whl (25 kB)\n", "Collecting pyparsing!=3.0.5,>=2.0.2\n", " Downloading pyparsing-3.0.9-py3-none-any.whl (98 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m98.3/98.3 kB\u001b[0m \u001b[31m37.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting charset-normalizer<3,>=2\n", - " Using cached charset_normalizer-2.1.0-py3-none-any.whl (39 kB)\n", - "Collecting certifi>=2017.4.17\n", - " Downloading certifi-2022.6.15-py3-none-any.whl (160 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m160.2/160.2 kB\u001b[0m \u001b[31m53.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m98.3/98.3 kB\u001b[0m \u001b[31m3.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting urllib3<1.27,>=1.21.1\n", " Using cached urllib3-1.26.11-py2.py3-none-any.whl (139 kB)\n", - "Collecting idna<4,>=2.5\n", + "Collecting certifi>=2017.4.17\n", + " Downloading certifi-2022.6.15-py3-none-any.whl (160 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m160.2/160.2 kB\u001b[0m \u001b[31m48.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting idna<4,>=2.5\n", " Downloading idna-3.3-py3-none-any.whl (61 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.2/61.2 kB\u001b[0m \u001b[31m19.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hInstalling collected packages: tokenizers, urllib3, typing-extensions, tqdm, regex, pyyaml, pyparsing, numpy, idna, filelock, charset-normalizer, certifi, requests, packaging, huggingface-hub, transformers\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.2/61.2 kB\u001b[0m \u001b[31m22.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting charset-normalizer<3,>=2\n", + " Using cached charset_normalizer-2.1.0-py3-none-any.whl (39 kB)\n", + "Installing collected packages: tokenizers, urllib3, typing-extensions, tqdm, regex, pyyaml, pyparsing, numpy, idna, filelock, charset-normalizer, certifi, requests, packaging, huggingface-hub, transformers\n", " Attempting uninstall: tokenizers\n", " Found existing installation: tokenizers 0.12.1\n", " Uninstalling tokenizers-0.12.1:\n", @@ -293,11 +329,11 @@ " Uninstalling huggingface-hub-0.8.1:\n", " Successfully uninstalled huggingface-hub-0.8.1\n", " Attempting uninstall: transformers\n", - " Found existing installation: transformers 4.21.0\n", - " Uninstalling transformers-4.21.0:\n", - " Successfully uninstalled transformers-4.21.0\n", + " Found existing installation: transformers 4.21.1\n", + " Uninstalling transformers-4.21.1:\n", + " Successfully uninstalled transformers-4.21.1\n", "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", - "awscli 1.25.40 requires PyYAML<5.5,>=3.10, but you have pyyaml 6.0 which is incompatible.\u001b[0m\u001b[31m\n", + "awscli 1.25.45 requires PyYAML<5.5,>=3.10, but you have pyyaml 6.0 which is incompatible.\u001b[0m\u001b[31m\n", "\u001b[0mSuccessfully installed certifi-2022.6.15 charset-normalizer-2.1.0 filelock-3.7.1 huggingface-hub-0.8.1 idna-3.3 numpy-1.23.1 packaging-21.3 pyparsing-3.0.9 pyyaml-6.0 regex-2022.7.25 requests-2.28.1 tokenizers-0.12.1 tqdm-4.64.0 transformers-4.20.0 typing-extensions-4.3.0 urllib3-1.26.11\n", "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", "\u001b[0mNote: you may need to restart the kernel to use updated packages.\n" @@ -310,15 +346,15 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 3, "id": "e81f8d2a", "metadata": { "execution": { - "iopub.execute_input": "2022-08-03T22:04:13.057906Z", - "iopub.status.busy": "2022-08-03T22:04:13.057548Z", - "iopub.status.idle": "2022-08-03T22:04:14.962239Z", - "shell.execute_reply": "2022-08-03T22:04:14.961325Z", - "shell.execute_reply.started": "2022-08-03T22:04:13.057879Z" + "iopub.execute_input": "2022-08-09T23:50:28.356923Z", + "iopub.status.busy": "2022-08-09T23:50:28.356711Z", + "iopub.status.idle": "2022-08-09T23:50:30.123723Z", + "shell.execute_reply": "2022-08-09T23:50:30.123103Z", + "shell.execute_reply.started": "2022-08-09T23:50:28.356903Z" } }, "outputs": [ @@ -357,15 +393,15 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 4, "id": "cefc3902", "metadata": { "execution": { - "iopub.execute_input": "2022-08-03T22:04:22.242620Z", - "iopub.status.busy": "2022-08-03T22:04:22.242227Z", - "iopub.status.idle": "2022-08-03T22:04:22.246945Z", - "shell.execute_reply": "2022-08-03T22:04:22.246324Z", - "shell.execute_reply.started": "2022-08-03T22:04:22.242599Z" + "iopub.execute_input": "2022-08-09T23:50:30.125241Z", + "iopub.status.busy": "2022-08-09T23:50:30.124952Z", + "iopub.status.idle": "2022-08-09T23:50:30.129030Z", + "shell.execute_reply": "2022-08-09T23:50:30.128424Z", + "shell.execute_reply.started": "2022-08-09T23:50:30.125222Z" } }, "outputs": [], @@ -386,15 +422,15 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 5, "id": "ac4440fd", "metadata": { "execution": { - "iopub.execute_input": "2022-08-03T22:04:24.950808Z", - "iopub.status.busy": "2022-08-03T22:04:24.950501Z", - "iopub.status.idle": "2022-08-03T22:04:24.954508Z", - "shell.execute_reply": "2022-08-03T22:04:24.953814Z", - "shell.execute_reply.started": "2022-08-03T22:04:24.950789Z" + "iopub.execute_input": "2022-08-09T23:50:30.129906Z", + "iopub.status.busy": "2022-08-09T23:50:30.129717Z", + "iopub.status.idle": "2022-08-09T23:50:30.134553Z", + "shell.execute_reply": "2022-08-09T23:50:30.133891Z", + "shell.execute_reply.started": "2022-08-09T23:50:30.129882Z" } }, "outputs": [], @@ -436,22 +472,22 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 6, "id": "53501b52", "metadata": { "execution": { - "iopub.execute_input": "2022-08-03T22:04:28.025954Z", - "iopub.status.busy": "2022-08-03T22:04:28.025763Z", - "iopub.status.idle": "2022-08-03T22:04:37.714390Z", - "shell.execute_reply": "2022-08-03T22:04:37.713451Z", - "shell.execute_reply.started": "2022-08-03T22:04:28.025938Z" + "iopub.execute_input": "2022-08-09T23:50:30.135442Z", + "iopub.status.busy": "2022-08-09T23:50:30.135264Z", + "iopub.status.idle": "2022-08-09T23:50:39.949252Z", + "shell.execute_reply": "2022-08-09T23:50:39.948440Z", + "shell.execute_reply.started": "2022-08-09T23:50:30.135425Z" } }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "2df4f83dc0624c6b98c0119876e9c996", + "model_id": "dbbd676384d449f993908710fb627ec0", "version_major": 2, "version_minor": 0 }, @@ -465,7 +501,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "76d0004062d24f9a93c5cd49eac73731", + "model_id": "eabf1bcdb696404599e16ace5ba28e75", "version_major": 2, "version_minor": 0 }, @@ -486,7 +522,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "862c926165114b828b95350eb548423d", + "model_id": "94e48b3a91134ff4aceb6eea25b7cc97", "version_major": 2, "version_minor": 0 }, @@ -500,7 +536,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "6ec71509f3e24fe5870211fd2fb11161", + "model_id": "0c5c809262c646fca66a0b88a7dfd4fd", "version_major": 2, "version_minor": 0 }, @@ -514,7 +550,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "4f845d8c0bb94a65b4657da213d0f57b", + "model_id": "38251faabe0941f7bb6bcf3d65b8dd0c", "version_major": 2, "version_minor": 0 }, @@ -528,7 +564,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "20f855dd2de44c3e980368330649cba1", + "model_id": "ca401b264912461d8fafb9cf21a8dbef", "version_major": 2, "version_minor": 0 }, @@ -542,7 +578,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "be6a9e2db93c4e1d9366088bbaa65b7e", + "model_id": "768d7468c2f14e1da3712dc868003dbb", "version_major": 2, "version_minor": 0 }, @@ -556,7 +592,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "8b4a3367aaaf4a18aec372b0039d26c3", + "model_id": "05359e88dd164a598c09d3d0aef0699e", "version_major": 2, "version_minor": 0 }, @@ -577,7 +613,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "bbb6a3187bec4e279d0fc76765e2ff70", + "model_id": "9d1dbd46285644269fc6541b64aca072", "version_major": 2, "version_minor": 0 }, @@ -603,15 +639,15 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 7, "id": "a8269c84", "metadata": { "execution": { - "iopub.execute_input": "2022-08-03T22:04:46.593028Z", - "iopub.status.busy": "2022-08-03T22:04:46.592742Z", - "iopub.status.idle": "2022-08-03T22:04:46.602712Z", - "shell.execute_reply": "2022-08-03T22:04:46.601904Z", - "shell.execute_reply.started": "2022-08-03T22:04:46.592999Z" + "iopub.execute_input": "2022-08-09T23:50:39.950525Z", + "iopub.status.busy": "2022-08-09T23:50:39.950353Z", + "iopub.status.idle": "2022-08-09T23:50:39.958794Z", + "shell.execute_reply": "2022-08-09T23:50:39.958199Z", + "shell.execute_reply.started": "2022-08-09T23:50:39.950508Z" } }, "outputs": [ @@ -630,7 +666,7 @@ "})" ] }, - "execution_count": 10, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -651,15 +687,15 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 8, "id": "35d928f2", "metadata": { "execution": { - "iopub.execute_input": "2022-08-03T22:04:50.213827Z", - "iopub.status.busy": "2022-08-03T22:04:50.213626Z", - "iopub.status.idle": "2022-08-03T22:04:50.219972Z", - "shell.execute_reply": "2022-08-03T22:04:50.219202Z", - "shell.execute_reply.started": "2022-08-03T22:04:50.213810Z" + "iopub.execute_input": "2022-08-09T23:50:39.961568Z", + "iopub.status.busy": "2022-08-09T23:50:39.961373Z", + "iopub.status.idle": "2022-08-09T23:50:39.965800Z", + "shell.execute_reply": "2022-08-09T23:50:39.965249Z", + "shell.execute_reply.started": "2022-08-09T23:50:39.961551Z" } }, "outputs": [ @@ -673,7 +709,7 @@ " 'answers': {'text': ['middle class'], 'answer_start': [274]}}" ] }, - "execution_count": 11, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -694,22 +730,22 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 9, "id": "05b9d67d", "metadata": { "execution": { - "iopub.execute_input": "2022-08-03T22:04:53.481567Z", - "iopub.status.busy": "2022-08-03T22:04:53.481382Z", - "iopub.status.idle": "2022-08-03T22:04:54.688922Z", - "shell.execute_reply": "2022-08-03T22:04:54.687960Z", - "shell.execute_reply.started": "2022-08-03T22:04:53.481550Z" + "iopub.execute_input": "2022-08-09T23:50:39.968582Z", + "iopub.status.busy": "2022-08-09T23:50:39.968423Z", + "iopub.status.idle": "2022-08-09T23:50:41.029453Z", + "shell.execute_reply": "2022-08-09T23:50:41.028692Z", + "shell.execute_reply.started": "2022-08-09T23:50:39.968566Z" } }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "64e7254faa4741cc8f49016d425b98bc", + "model_id": "79e8fd41adf540edb4df20d5ea298a3f", "version_major": 2, "version_minor": 0 }, @@ -723,7 +759,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "a1235610a0ad49908a88736962951fba", + "model_id": "44cf75be3ecd492f9200fe168f3b8923", "version_major": 2, "version_minor": 0 }, @@ -737,7 +773,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "a60c494f33804006bf53d1b2a06f8c8d", + "model_id": "81a66f4638d849dfbb55de1bf038d206", "version_major": 2, "version_minor": 0 }, @@ -751,7 +787,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "3106465384344ab39035bd62fe034f47", + "model_id": "564e7a86595343ef9c8eaff951c591b8", "version_major": 2, "version_minor": 0 }, @@ -769,15 +805,15 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 10, "id": "dce8edb5", "metadata": { "execution": { - "iopub.execute_input": "2022-08-03T22:04:57.025252Z", - "iopub.status.busy": "2022-08-03T22:04:57.024904Z", - "iopub.status.idle": "2022-08-03T22:04:57.047832Z", - "shell.execute_reply": "2022-08-03T22:04:57.046904Z", - "shell.execute_reply.started": "2022-08-03T22:04:57.025232Z" + "iopub.execute_input": "2022-08-09T23:50:41.032062Z", + "iopub.status.busy": "2022-08-09T23:50:41.031865Z", + "iopub.status.idle": "2022-08-09T23:50:41.048898Z", + "shell.execute_reply": "2022-08-09T23:50:41.048275Z", + "shell.execute_reply.started": "2022-08-09T23:50:41.032044Z" } }, "outputs": [], @@ -798,15 +834,15 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 11, "id": "aa946365", "metadata": { "execution": { - "iopub.execute_input": "2022-08-03T22:05:01.159401Z", - "iopub.status.busy": "2022-08-03T22:05:01.159093Z", - "iopub.status.idle": "2022-08-03T22:05:01.164347Z", - "shell.execute_reply": "2022-08-03T22:05:01.163417Z", - "shell.execute_reply.started": "2022-08-03T22:05:01.159382Z" + "iopub.execute_input": "2022-08-09T23:50:41.051069Z", + "iopub.status.busy": "2022-08-09T23:50:41.050904Z", + "iopub.status.idle": "2022-08-09T23:50:41.054752Z", + "shell.execute_reply": "2022-08-09T23:50:41.054184Z", + "shell.execute_reply.started": "2022-08-09T23:50:41.051051Z" }, "scrolled": true }, @@ -817,7 +853,7 @@ "dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'offset_mapping', 'overflow_to_sample_mapping'])" ] }, - "execution_count": 14, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -836,15 +872,15 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 12, "id": "e2c1babc", "metadata": { "execution": { - "iopub.execute_input": "2022-08-03T22:05:03.427294Z", - "iopub.status.busy": "2022-08-03T22:05:03.427093Z", - "iopub.status.idle": "2022-08-03T22:05:03.432865Z", - "shell.execute_reply": "2022-08-03T22:05:03.432206Z", - "shell.execute_reply.started": "2022-08-03T22:05:03.427277Z" + "iopub.execute_input": "2022-08-09T23:50:41.056695Z", + "iopub.status.busy": "2022-08-09T23:50:41.056537Z", + "iopub.status.idle": "2022-08-09T23:50:41.060439Z", + "shell.execute_reply": "2022-08-09T23:50:41.059851Z", + "shell.execute_reply.started": "2022-08-09T23:50:41.056679Z" } }, "outputs": [ @@ -885,7 +921,7 @@ " 0]" ] }, - "execution_count": 15, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -896,15 +932,15 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 13, "id": "803b71d1", "metadata": { "execution": { - "iopub.execute_input": "2022-08-03T22:05:06.411553Z", - "iopub.status.busy": "2022-08-03T22:05:06.411277Z", - "iopub.status.idle": "2022-08-03T22:05:06.416664Z", - "shell.execute_reply": "2022-08-03T22:05:06.415638Z", - "shell.execute_reply.started": "2022-08-03T22:05:06.411534Z" + "iopub.execute_input": "2022-08-09T23:50:41.062391Z", + "iopub.status.busy": "2022-08-09T23:50:41.062236Z", + "iopub.status.idle": "2022-08-09T23:50:41.065805Z", + "shell.execute_reply": "2022-08-09T23:50:41.065263Z", + "shell.execute_reply.started": "2022-08-09T23:50:41.062376Z" } }, "outputs": [ @@ -914,7 +950,7 @@ "'[CLS] when were institutes of technology developed? [SEP] institutes of technology in venezuela were developed in the 1950s [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]'" ] }, - "execution_count": 16, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -933,15 +969,15 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 14, "id": "ba329d1e", "metadata": { "execution": { - "iopub.execute_input": "2022-08-03T22:05:08.972723Z", - "iopub.status.busy": "2022-08-03T22:05:08.972370Z", - "iopub.status.idle": "2022-08-03T22:05:08.976164Z", - "shell.execute_reply": "2022-08-03T22:05:08.975409Z", - "shell.execute_reply.started": "2022-08-03T22:05:08.972702Z" + "iopub.execute_input": "2022-08-09T23:50:41.067637Z", + "iopub.status.busy": "2022-08-09T23:50:41.067481Z", + "iopub.status.idle": "2022-08-09T23:50:41.070148Z", + "shell.execute_reply": "2022-08-09T23:50:41.069596Z", + "shell.execute_reply.started": "2022-08-09T23:50:41.067622Z" } }, "outputs": [], @@ -951,22 +987,22 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 15, "id": "176caef7", "metadata": { "execution": { - "iopub.execute_input": "2022-08-03T22:05:10.990655Z", - "iopub.status.busy": "2022-08-03T22:05:10.990366Z", - "iopub.status.idle": "2022-08-03T22:06:18.326563Z", - "shell.execute_reply": "2022-08-03T22:06:18.325642Z", - "shell.execute_reply.started": "2022-08-03T22:05:10.990636Z" + "iopub.execute_input": "2022-08-09T23:50:41.072072Z", + "iopub.status.busy": "2022-08-09T23:50:41.071916Z", + "iopub.status.idle": "2022-08-09T23:51:45.475466Z", + "shell.execute_reply": "2022-08-09T23:51:45.474592Z", + "shell.execute_reply.started": "2022-08-09T23:50:41.072056Z" } }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "009029d424d346acbb6932a92b2a36f8", + "model_id": "596ac67f50b3409cb1210caa4162a59a", "version_major": 2, "version_minor": 0 }, @@ -980,7 +1016,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "f503372bad034255bd49dad91dc37a78", + "model_id": "50e7f589c8fa4082b437ae17cf27585d", "version_major": 2, "version_minor": 0 }, @@ -1029,22 +1065,22 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 16, "id": "61f65ec9", "metadata": { "execution": { - "iopub.execute_input": "2022-08-03T22:20:57.330909Z", - "iopub.status.busy": "2022-08-03T22:20:57.330631Z", - "iopub.status.idle": "2022-08-03T22:21:19.265639Z", - "shell.execute_reply": "2022-08-03T22:21:19.264788Z", - "shell.execute_reply.started": "2022-08-03T22:20:57.330890Z" + "iopub.execute_input": "2022-08-09T23:51:45.483369Z", + "iopub.status.busy": "2022-08-09T23:51:45.483180Z", + "iopub.status.idle": "2022-08-09T23:51:57.803522Z", + "shell.execute_reply": "2022-08-09T23:51:57.802903Z", + "shell.execute_reply.started": "2022-08-09T23:51:45.483348Z" } }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "5fdb14e675754dd4a31153624fe6bf50", + "model_id": "15a7a9cc322e4ab4a79a47649eb33a73", "version_major": 2, "version_minor": 0 }, @@ -1058,7 +1094,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "34da621d16154a34b04d0834567bfe3c", + "model_id": "abd291e071e84c57b89013befe725bd7", "version_major": 2, "version_minor": 0 }, @@ -1073,7 +1109,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Some weights of the model checkpoint at Graphcore/bert-large-uncased were not used when initializing BertForQuestionAnswering: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias']\n", + "Some weights of the model checkpoint at Graphcore/bert-large-uncased were not used when initializing BertForQuestionAnswering: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.bias']\n", "- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", "Some weights of BertForQuestionAnswering were not initialized from the model checkpoint at Graphcore/bert-large-uncased and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']\n", @@ -1166,22 +1202,22 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 17, "id": "f06484c4", "metadata": { "execution": { - "iopub.execute_input": "2022-08-03T22:21:28.423906Z", - "iopub.status.busy": "2022-08-03T22:21:28.423675Z", - "iopub.status.idle": "2022-08-03T22:21:28.739224Z", - "shell.execute_reply": "2022-08-03T22:21:28.738491Z", - "shell.execute_reply.started": "2022-08-03T22:21:28.423884Z" + "iopub.execute_input": "2022-08-09T23:51:57.806519Z", + "iopub.status.busy": "2022-08-09T23:51:57.806312Z", + "iopub.status.idle": "2022-08-09T23:51:58.058693Z", + "shell.execute_reply": "2022-08-09T23:51:58.058128Z", + "shell.execute_reply.started": "2022-08-09T23:51:57.806498Z" } }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "e162b7946a034b1fb0d5c28c1f041e12", + "model_id": "5bbfce95e8eb4660b9c35f9cd3f2639e", "version_major": 2, "version_minor": 0 }, @@ -1199,15 +1235,15 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 18, "id": "c6e31859", "metadata": { "execution": { - "iopub.execute_input": "2022-08-03T22:21:32.864701Z", - "iopub.status.busy": "2022-08-03T22:21:32.864482Z", - "iopub.status.idle": "2022-08-03T22:21:32.870140Z", - "shell.execute_reply": "2022-08-03T22:21:32.869240Z", - "shell.execute_reply.started": "2022-08-03T22:21:32.864683Z" + "iopub.execute_input": "2022-08-09T23:51:58.060730Z", + "iopub.status.busy": "2022-08-09T23:51:58.060551Z", + "iopub.status.idle": "2022-08-09T23:51:58.065412Z", + "shell.execute_reply": "2022-08-09T23:51:58.064937Z", + "shell.execute_reply.started": "2022-08-09T23:51:58.060713Z" } }, "outputs": [ @@ -1220,7 +1256,7 @@ " \"embedding_serialization_factor\": 2,\n", " \"enable_half_first_order_momentum\": true,\n", " \"enable_half_partials\": true,\n", - " \"executable_cache_dir\": \"./exe_cache\",\n", + " \"executable_cache_dir\": \"/tmp/exe_cache/\",\n", " \"execute_encoder_on_cpu_for_generation\": false,\n", " \"gradient_accumulation_steps\": 16,\n", " \"inference_device_iterations\": 4,\n", @@ -1266,7 +1302,7 @@ "}" ] }, - "execution_count": 21, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -1285,15 +1321,15 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 19, "id": "12936f32", "metadata": { "execution": { - "iopub.execute_input": "2022-08-03T22:21:38.149579Z", - "iopub.status.busy": "2022-08-03T22:21:38.149356Z", - "iopub.status.idle": "2022-08-03T22:21:38.153335Z", - "shell.execute_reply": "2022-08-03T22:21:38.152710Z", - "shell.execute_reply.started": "2022-08-03T22:21:38.149561Z" + "iopub.execute_input": "2022-08-09T23:51:58.067591Z", + "iopub.status.busy": "2022-08-09T23:51:58.067430Z", + "iopub.status.idle": "2022-08-09T23:51:58.070499Z", + "shell.execute_reply": "2022-08-09T23:51:58.070005Z", + "shell.execute_reply.started": "2022-08-09T23:51:58.067575Z" } }, "outputs": [], @@ -1307,15 +1343,15 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 20, "id": "d9193054", "metadata": { "execution": { - "iopub.execute_input": "2022-08-03T22:21:45.645244Z", - "iopub.status.busy": "2022-08-03T22:21:45.644945Z", - "iopub.status.idle": "2022-08-03T22:21:45.650344Z", - "shell.execute_reply": "2022-08-03T22:21:45.649610Z", - "shell.execute_reply.started": "2022-08-03T22:21:45.645224Z" + "iopub.execute_input": "2022-08-09T23:51:58.072539Z", + "iopub.status.busy": "2022-08-09T23:51:58.072378Z", + "iopub.status.idle": "2022-08-09T23:51:58.076643Z", + "shell.execute_reply": "2022-08-09T23:51:58.076106Z", + "shell.execute_reply.started": "2022-08-09T23:51:58.072519Z" } }, "outputs": [], @@ -1348,15 +1384,15 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 21, "id": "d041a185", "metadata": { "execution": { - "iopub.execute_input": "2022-08-03T22:21:50.738312Z", - "iopub.status.busy": "2022-08-03T22:21:50.738105Z", - "iopub.status.idle": "2022-08-03T22:21:50.741741Z", - "shell.execute_reply": "2022-08-03T22:21:50.740936Z", - "shell.execute_reply.started": "2022-08-03T22:21:50.738295Z" + "iopub.execute_input": "2022-08-09T23:51:58.078721Z", + "iopub.status.busy": "2022-08-09T23:51:58.078440Z", + "iopub.status.idle": "2022-08-09T23:51:58.081638Z", + "shell.execute_reply": "2022-08-09T23:51:58.080665Z", + "shell.execute_reply.started": "2022-08-09T23:51:58.078704Z" } }, "outputs": [], @@ -1374,15 +1410,15 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 22, "id": "d4abf64f", "metadata": { "execution": { - "iopub.execute_input": "2022-08-03T22:21:54.841569Z", - "iopub.status.busy": "2022-08-03T22:21:54.841305Z", - "iopub.status.idle": "2022-08-03T22:22:02.412103Z", - "shell.execute_reply": "2022-08-03T22:22:02.410949Z", - "shell.execute_reply.started": "2022-08-03T22:21:54.841549Z" + "iopub.execute_input": "2022-08-09T23:51:58.084107Z", + "iopub.status.busy": "2022-08-09T23:51:58.083920Z", + "iopub.status.idle": "2022-08-09T23:52:03.729749Z", + "shell.execute_reply": "2022-08-09T23:52:03.728827Z", + "shell.execute_reply.started": "2022-08-09T23:51:58.084090Z" } }, "outputs": [ @@ -1433,15 +1469,15 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 23, "id": "336f77b2", "metadata": { "execution": { - "iopub.execute_input": "2022-08-03T22:22:21.681476Z", - "iopub.status.busy": "2022-08-03T22:22:21.681198Z", - "iopub.status.idle": "2022-08-03T22:42:07.601548Z", - "shell.execute_reply": "2022-08-03T22:42:07.600603Z", - "shell.execute_reply.started": "2022-08-03T22:22:21.681455Z" + "iopub.execute_input": "2022-08-09T23:52:03.732843Z", + "iopub.status.busy": "2022-08-09T23:52:03.732625Z", + "iopub.status.idle": "2022-08-10T00:09:58.578624Z", + "shell.execute_reply": "2022-08-10T00:09:58.577773Z", + "shell.execute_reply.started": "2022-08-09T23:52:03.732821Z" }, "scrolled": true }, @@ -1451,8 +1487,8 @@ "output_type": "stream", "text": [ "Compiling Model...\n", - "Graph compilation: 100%|██████████| 100/100 [08:03<00:00]\n", - "Compiled/Loaded model in 630.04055990302 secs\n", + "Graph compilation: 100%|██████████| 100/100 [07:44<00:00]\n", + "Compiled/Loaded model in 567.0647092440631 secs\n", "***** Running training *****\n", " Num examples = 88524\n", " Num Epochs = 2\n", @@ -1467,7 +1503,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "6153426a72434866ac5efaa761334949", + "model_id": "2c89324758d04b32b4ec2ee5d0127073", "version_major": 2, "version_minor": 0 }, @@ -1482,31 +1518,31 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 2.9382, 'learning_rate': 0.0001927536231884058, 'epoch': 0.07}\n", - "{'loss': 1.4743, 'learning_rate': 0.0001855072463768116, 'epoch': 0.14}\n", - "{'loss': 1.2007, 'learning_rate': 0.0001782608695652174, 'epoch': 0.22}\n", - "{'loss': 1.1122, 'learning_rate': 0.0001710144927536232, 'epoch': 0.29}\n", - "{'loss': 0.9775, 'learning_rate': 0.000163768115942029, 'epoch': 0.36}\n", - "{'loss': 0.9398, 'learning_rate': 0.0001565217391304348, 'epoch': 0.43}\n", - "{'loss': 1.0378, 'learning_rate': 0.00014927536231884058, 'epoch': 0.51}\n", - "{'loss': 1.1629, 'learning_rate': 0.00014202898550724638, 'epoch': 0.58}\n", - "{'loss': 0.9239, 'learning_rate': 0.0001347826086956522, 'epoch': 0.65}\n", - "{'loss': 1.0644, 'learning_rate': 0.00012753623188405797, 'epoch': 0.72}\n", - "{'loss': 0.9864, 'learning_rate': 0.00012028985507246378, 'epoch': 0.8}\n" + "{'loss': 3.1793, 'learning_rate': 0.0001927536231884058, 'epoch': 0.07}\n", + "{'loss': 1.4808, 'learning_rate': 0.0001855072463768116, 'epoch': 0.14}\n", + "{'loss': 1.1683, 'learning_rate': 0.0001782608695652174, 'epoch': 0.22}\n", + "{'loss': 1.0593, 'learning_rate': 0.0001710144927536232, 'epoch': 0.29}\n", + "{'loss': 0.9102, 'learning_rate': 0.000163768115942029, 'epoch': 0.36}\n", + "{'loss': 0.9987, 'learning_rate': 0.0001565217391304348, 'epoch': 0.43}\n", + "{'loss': 0.9337, 'learning_rate': 0.00014927536231884058, 'epoch': 0.51}\n", + "{'loss': 1.2123, 'learning_rate': 0.00014202898550724638, 'epoch': 0.58}\n", + "{'loss': 0.9466, 'learning_rate': 0.0001347826086956522, 'epoch': 0.65}\n", + "{'loss': 1.0442, 'learning_rate': 0.00012753623188405797, 'epoch': 0.72}\n", + "{'loss': 0.9601, 'learning_rate': 0.00012028985507246378, 'epoch': 0.8}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "Saving model checkpoint to ./outputs/checkpoint-300\n" + "Saving model checkpoint to /tmp/outputs/checkpoint-300\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.897, 'learning_rate': 0.00011304347826086956, 'epoch': 0.87}\n" + "{'loss': 0.8503, 'learning_rate': 0.00011304347826086956, 'epoch': 0.87}\n" ] }, { @@ -1541,38 +1577,38 @@ "Encoder 23 --> IPU 3\n", "QA Outputs --> IPU 3\n", "-----------------------------------------------------------\n", - "Configuration saved in ./outputs/checkpoint-300/ipu_config.json\n" + "Configuration saved in /tmp/outputs/checkpoint-300/ipu_config.json\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.8295, 'learning_rate': 0.00010579710144927538, 'epoch': 0.94}\n", - "{'loss': 0.9868, 'learning_rate': 9.855072463768117e-05, 'epoch': 1.01}\n", - "{'loss': 0.6338, 'learning_rate': 9.130434782608696e-05, 'epoch': 1.09}\n", - "{'loss': 0.6879, 'learning_rate': 8.405797101449276e-05, 'epoch': 1.16}\n", - "{'loss': 0.5351, 'learning_rate': 7.681159420289855e-05, 'epoch': 1.23}\n", - "{'loss': 0.5708, 'learning_rate': 6.956521739130436e-05, 'epoch': 1.3}\n", - "{'loss': 0.6664, 'learning_rate': 6.231884057971015e-05, 'epoch': 1.38}\n", - "{'loss': 0.6121, 'learning_rate': 5.507246376811594e-05, 'epoch': 1.45}\n", - "{'loss': 0.6671, 'learning_rate': 4.782608695652174e-05, 'epoch': 1.52}\n", - "{'loss': 0.6459, 'learning_rate': 4.057971014492754e-05, 'epoch': 1.59}\n", - "{'loss': 0.6245, 'learning_rate': 3.3333333333333335e-05, 'epoch': 1.67}\n" + "{'loss': 0.7496, 'learning_rate': 0.00010579710144927538, 'epoch': 0.94}\n", + "{'loss': 0.8778, 'learning_rate': 9.855072463768117e-05, 'epoch': 1.01}\n", + "{'loss': 0.6298, 'learning_rate': 9.130434782608696e-05, 'epoch': 1.09}\n", + "{'loss': 0.7537, 'learning_rate': 8.405797101449276e-05, 'epoch': 1.16}\n", + "{'loss': 0.5141, 'learning_rate': 7.681159420289855e-05, 'epoch': 1.23}\n", + "{'loss': 0.5786, 'learning_rate': 6.956521739130436e-05, 'epoch': 1.3}\n", + "{'loss': 0.5782, 'learning_rate': 6.231884057971015e-05, 'epoch': 1.38}\n", + "{'loss': 0.577, 'learning_rate': 5.507246376811594e-05, 'epoch': 1.45}\n", + "{'loss': 0.6481, 'learning_rate': 4.782608695652174e-05, 'epoch': 1.52}\n", + "{'loss': 0.5955, 'learning_rate': 4.057971014492754e-05, 'epoch': 1.59}\n", + "{'loss': 0.6576, 'learning_rate': 3.3333333333333335e-05, 'epoch': 1.67}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "Saving model checkpoint to ./outputs/checkpoint-600\n" + "Saving model checkpoint to /tmp/outputs/checkpoint-600\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.5686, 'learning_rate': 2.608695652173913e-05, 'epoch': 1.74}\n" + "{'loss': 0.5979, 'learning_rate': 2.608695652173913e-05, 'epoch': 1.74}\n" ] }, { @@ -1607,16 +1643,16 @@ "Encoder 23 --> IPU 3\n", "QA Outputs --> IPU 3\n", "-----------------------------------------------------------\n", - "Configuration saved in ./outputs/checkpoint-600/ipu_config.json\n" + "Configuration saved in /tmp/outputs/checkpoint-600/ipu_config.json\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "{'loss': 0.6963, 'learning_rate': 1.8840579710144928e-05, 'epoch': 1.81}\n", - "{'loss': 0.4872, 'learning_rate': 1.1594202898550725e-05, 'epoch': 1.88}\n", - "{'loss': 0.9261, 'learning_rate': 4.347826086956522e-06, 'epoch': 1.96}\n" + "{'loss': 0.7306, 'learning_rate': 1.8840579710144928e-05, 'epoch': 1.81}\n", + "{'loss': 0.5083, 'learning_rate': 1.1594202898550725e-05, 'epoch': 1.88}\n", + "{'loss': 0.7656, 'learning_rate': 4.347826086956522e-06, 'epoch': 1.96}\n" ] }, { @@ -1634,16 +1670,16 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'train_runtime': 536.9026, 'train_samples_per_second': 328.998, 'train_steps_per_second': 1.285, 'train_loss': 0.9179919698963994, 'epoch': 2.0}\n" + "{'train_runtime': 488.6573, 'train_samples_per_second': 361.48, 'train_steps_per_second': 1.412, 'train_loss': 0.9048264213230299, 'epoch': 2.0}\n" ] }, { "data": { "text/plain": [ - "TrainOutput(global_step=690, training_loss=0.9179919698963994, metrics={'train_runtime': 536.9026, 'train_samples_per_second': 328.998, 'train_steps_per_second': 1.285, 'train_loss': 0.9179919698963994, 'epoch': 2.0})" + "TrainOutput(global_step=690, training_loss=0.9048264213230299, metrics={'train_runtime': 488.6573, 'train_samples_per_second': 361.48, 'train_steps_per_second': 1.412, 'train_loss': 0.9048264213230299, 'epoch': 2.0})" ] }, - "execution_count": 26, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -1662,15 +1698,15 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 24, "id": "9efa9c3b", "metadata": { "execution": { - "iopub.execute_input": "2022-08-03T22:46:25.051618Z", - "iopub.status.busy": "2022-08-03T22:46:25.051346Z", - "iopub.status.idle": "2022-08-03T22:46:32.400187Z", - "shell.execute_reply": "2022-08-03T22:46:32.399512Z", - "shell.execute_reply.started": "2022-08-03T22:46:25.051600Z" + "iopub.execute_input": "2022-08-10T00:09:58.581233Z", + "iopub.status.busy": "2022-08-10T00:09:58.581057Z", + "iopub.status.idle": "2022-08-10T00:09:59.276586Z", + "shell.execute_reply": "2022-08-10T00:09:59.275930Z", + "shell.execute_reply.started": "2022-08-10T00:09:58.581216Z" } }, "outputs": [ @@ -1678,7 +1714,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Saving model checkpoint to ./outputs\n", + "Saving model checkpoint to /tmp/outputs\n", "-------------------- Device Allocation --------------------\n", "Embedding --> IPU 0\n", "Encoder 0 --> IPU 0\n", @@ -1707,7 +1743,7 @@ "Encoder 23 --> IPU 3\n", "QA Outputs --> IPU 3\n", "-----------------------------------------------------------\n", - "Configuration saved in ./outputs/ipu_config.json\n" + "Configuration saved in /tmp/outputs/ipu_config.json\n" ] } ], @@ -1735,15 +1771,15 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 25, "id": "7d11df40", "metadata": { "execution": { - "iopub.execute_input": "2022-08-03T22:46:38.382431Z", - "iopub.status.busy": "2022-08-03T22:46:38.382055Z", - "iopub.status.idle": "2022-08-03T22:49:53.231799Z", - "shell.execute_reply": "2022-08-03T22:49:53.230686Z", - "shell.execute_reply.started": "2022-08-03T22:46:38.382411Z" + "iopub.execute_input": "2022-08-10T00:09:59.279150Z", + "iopub.status.busy": "2022-08-10T00:09:59.278964Z", + "iopub.status.idle": "2022-08-10T00:12:58.513096Z", + "shell.execute_reply": "2022-08-10T00:12:58.512029Z", + "shell.execute_reply.started": "2022-08-10T00:09:59.279131Z" } }, "outputs": [ @@ -1751,10 +1787,10 @@ "name": "stderr", "output_type": "stream", "text": [ - "The following columns in the test set don't have a corresponding argument in `PoptorchPipelinedBertForQuestionAnswering.forward` and have been ignored: example_id, offset_mapping.\n", + "The following columns in the test set don't have a corresponding argument in `PoptorchPipelinedBertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.\n", "Compiling Model...\n", - "Graph compilation: 100%|██████████| 100/100 [01:49<00:00]\n", - "Compiled/Loaded model in 175.55460721894633 secs\n", + "Graph compilation: 100%|██████████| 100/100 [01:47<00:00]\n", + "Compiled/Loaded model in 160.53022746089846 secs\n", "***** Running Prediction *****\n", " Num examples = 10784\n", " Batch size = 32\n" @@ -1763,7 +1799,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "147f7635c8984579bc83d4cb788b7455", + "model_id": "712eda4b55c4426cb25d2fb98bdeee60", "version_major": 2, "version_minor": 0 }, @@ -1781,15 +1817,15 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 26, "id": "8bc99abf", "metadata": { "execution": { - "iopub.execute_input": "2022-08-03T23:04:35.404016Z", - "iopub.status.busy": "2022-08-03T23:04:35.403731Z", - "iopub.status.idle": "2022-08-03T23:04:35.408842Z", - "shell.execute_reply": "2022-08-03T23:04:35.407839Z", - "shell.execute_reply.started": "2022-08-03T23:04:35.403989Z" + "iopub.execute_input": "2022-08-10T00:12:58.515673Z", + "iopub.status.busy": "2022-08-10T00:12:58.515504Z", + "iopub.status.idle": "2022-08-10T00:12:58.518602Z", + "shell.execute_reply": "2022-08-10T00:12:58.518034Z", + "shell.execute_reply.started": "2022-08-10T00:12:58.515655Z" } }, "outputs": [], @@ -1800,15 +1836,15 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 27, "id": "24948756", "metadata": { "execution": { - "iopub.execute_input": "2022-08-03T23:04:38.002946Z", - "iopub.status.busy": "2022-08-03T23:04:38.002710Z", - "iopub.status.idle": "2022-08-03T23:04:38.041165Z", - "shell.execute_reply": "2022-08-03T23:04:38.040217Z", - "shell.execute_reply.started": "2022-08-03T23:04:38.002929Z" + "iopub.execute_input": "2022-08-10T00:12:58.521068Z", + "iopub.status.busy": "2022-08-10T00:12:58.520894Z", + "iopub.status.idle": "2022-08-10T00:12:58.561244Z", + "shell.execute_reply": "2022-08-10T00:12:58.560251Z", + "shell.execute_reply.started": "2022-08-10T00:12:58.521051Z" } }, "outputs": [], @@ -1820,15 +1856,15 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 28, "id": "3addecf3", "metadata": { "execution": { - "iopub.execute_input": "2022-08-03T23:04:40.664170Z", - "iopub.status.busy": "2022-08-03T23:04:40.663916Z", - "iopub.status.idle": "2022-08-03T23:04:40.669267Z", - "shell.execute_reply": "2022-08-03T23:04:40.668485Z", - "shell.execute_reply.started": "2022-08-03T23:04:40.664152Z" + "iopub.execute_input": "2022-08-10T00:12:58.563960Z", + "iopub.status.busy": "2022-08-10T00:12:58.563788Z", + "iopub.status.idle": "2022-08-10T00:12:58.567923Z", + "shell.execute_reply": "2022-08-10T00:12:58.567264Z", + "shell.execute_reply.started": "2022-08-10T00:12:58.563943Z" } }, "outputs": [ @@ -1838,7 +1874,7 @@ "(10784, 384)" ] }, - "execution_count": 31, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } @@ -1849,15 +1885,15 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 29, "id": "867850e7", "metadata": { "execution": { - "iopub.execute_input": "2022-08-03T23:04:42.590450Z", - "iopub.status.busy": "2022-08-03T23:04:42.590256Z", - "iopub.status.idle": "2022-08-03T23:04:42.595329Z", - "shell.execute_reply": "2022-08-03T23:04:42.594584Z", - "shell.execute_reply.started": "2022-08-03T23:04:42.590433Z" + "iopub.execute_input": "2022-08-10T00:12:58.570179Z", + "iopub.status.busy": "2022-08-10T00:12:58.570009Z", + "iopub.status.idle": "2022-08-10T00:12:58.573642Z", + "shell.execute_reply": "2022-08-10T00:12:58.573056Z", + "shell.execute_reply.started": "2022-08-10T00:12:58.570163Z" } }, "outputs": [ @@ -1870,7 +1906,7 @@ "})" ] }, - "execution_count": 32, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } @@ -1889,15 +1925,15 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 30, "id": "ab0fde6f", "metadata": { "execution": { - "iopub.execute_input": "2022-08-03T23:04:44.902654Z", - "iopub.status.busy": "2022-08-03T23:04:44.902461Z", - "iopub.status.idle": "2022-08-03T23:05:25.651962Z", - "shell.execute_reply": "2022-08-03T23:05:25.651049Z", - "shell.execute_reply.started": "2022-08-03T23:04:44.902637Z" + "iopub.execute_input": "2022-08-10T00:12:58.575865Z", + "iopub.status.busy": "2022-08-10T00:12:58.575702Z", + "iopub.status.idle": "2022-08-10T00:13:38.332922Z", + "shell.execute_reply": "2022-08-10T00:13:38.332124Z", + "shell.execute_reply.started": "2022-08-10T00:12:58.575849Z" } }, "outputs": [ @@ -1911,7 +1947,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "1519569833a641f8aa9bd4802388afd3", + "model_id": "3ee98e7b8f5042efa9bdf92582ebb818", "version_major": 2, "version_minor": 0 }, @@ -1931,22 +1967,22 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 31, "id": "b4193f0b", "metadata": { "execution": { - "iopub.execute_input": "2022-08-03T23:12:12.093699Z", - "iopub.status.busy": "2022-08-03T23:12:12.093404Z", - "iopub.status.idle": "2022-08-03T23:12:15.493057Z", - "shell.execute_reply": "2022-08-03T23:12:15.492213Z", - "shell.execute_reply.started": "2022-08-03T23:12:12.093679Z" + "iopub.execute_input": "2022-08-10T00:13:38.335589Z", + "iopub.status.busy": "2022-08-10T00:13:38.335407Z", + "iopub.status.idle": "2022-08-10T00:13:41.521266Z", + "shell.execute_reply": "2022-08-10T00:13:41.520132Z", + "shell.execute_reply.started": "2022-08-10T00:13:38.335570Z" } }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "a837edbf764f494eb14e68062af913c0", + "model_id": "0924a542a66c4260bbfd95cb6a2e8879", "version_major": 2, "version_minor": 0 }, @@ -1960,7 +1996,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "f416daba7b2248d99b5ed5f7022e863c", + "model_id": "2f3cac302bc44bea82b895a67eb39ff0", "version_major": 2, "version_minor": 0 }, @@ -1975,7 +2011,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'exact_match': 84.46546830652791, 'f1': 90.89570672930324}\n" + "{'exact_match': 84.5127719962157, 'f1': 91.06852496947944}\n" ] } ], @@ -2014,15 +2050,15 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 32, "id": "83d30fb9", "metadata": { "execution": { - "iopub.execute_input": "2022-08-03T23:12:19.935909Z", - "iopub.status.busy": "2022-08-03T23:12:19.935596Z", - "iopub.status.idle": "2022-08-03T23:12:19.939550Z", - "shell.execute_reply": "2022-08-03T23:12:19.938728Z", - "shell.execute_reply.started": "2022-08-03T23:12:19.935890Z" + "iopub.execute_input": "2022-08-10T00:13:41.523897Z", + "iopub.status.busy": "2022-08-10T00:13:41.523703Z", + "iopub.status.idle": "2022-08-10T00:13:41.526854Z", + "shell.execute_reply": "2022-08-10T00:13:41.526282Z", + "shell.execute_reply.started": "2022-08-10T00:13:41.523879Z" } }, "outputs": [], @@ -2046,15 +2082,15 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 33, "id": "961bb39f", "metadata": { "execution": { - "iopub.execute_input": "2022-08-03T23:12:22.922236Z", - "iopub.status.busy": "2022-08-03T23:12:22.921854Z", - "iopub.status.idle": "2022-08-03T23:12:25.241924Z", - "shell.execute_reply": "2022-08-03T23:12:25.241158Z", - "shell.execute_reply.started": "2022-08-03T23:12:22.922212Z" + "iopub.execute_input": "2022-08-10T00:13:41.529316Z", + "iopub.status.busy": "2022-08-10T00:13:41.529142Z", + "iopub.status.idle": "2022-08-10T00:13:43.729375Z", + "shell.execute_reply": "2022-08-10T00:13:43.728559Z", + "shell.execute_reply.started": "2022-08-10T00:13:41.529301Z" } }, "outputs": [], @@ -2081,9 +2117,17 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 34, "id": "bfc678bb", - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2022-08-10T00:13:43.731891Z", + "iopub.status.busy": "2022-08-10T00:13:43.731630Z", + "iopub.status.idle": "2022-08-10T00:13:43.736172Z", + "shell.execute_reply": "2022-08-10T00:13:43.734950Z", + "shell.execute_reply.started": "2022-08-10T00:13:43.731872Z" + } + }, "outputs": [], "source": [ "# model = transformers.BertForQuestionAnswering.from_pretrained(\"Graphcore/bert-large-uncased-squad11\")" @@ -2099,15 +2143,15 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 35, "id": "69901089", "metadata": { "execution": { - "iopub.execute_input": "2022-08-03T23:12:31.808623Z", - "iopub.status.busy": "2022-08-03T23:12:31.808188Z", - "iopub.status.idle": "2022-08-03T23:12:32.346583Z", - "shell.execute_reply": "2022-08-03T23:12:32.345769Z", - "shell.execute_reply.started": "2022-08-03T23:12:31.808603Z" + "iopub.execute_input": "2022-08-10T00:13:43.738733Z", + "iopub.status.busy": "2022-08-10T00:13:43.738569Z", + "iopub.status.idle": "2022-08-10T00:13:44.166186Z", + "shell.execute_reply": "2022-08-10T00:13:44.165151Z", + "shell.execute_reply.started": "2022-08-10T00:13:43.738718Z" } }, "outputs": [ @@ -2153,9 +2197,17 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 36, "id": "ea7921a0", - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2022-08-10T00:13:44.169414Z", + "iopub.status.busy": "2022-08-10T00:13:44.169224Z", + "iopub.status.idle": "2022-08-10T00:13:44.172339Z", + "shell.execute_reply": "2022-08-10T00:13:44.171788Z", + "shell.execute_reply.started": "2022-08-10T00:13:44.169395Z" + } + }, "outputs": [], "source": [ "# Make sure you have git-lfs and huggingface-hub\n", @@ -2173,10 +2225,33 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 37, "id": "8793ce79", - "metadata": {}, - "outputs": [], + "metadata": { + "execution": { + "iopub.execute_input": "2022-08-10T00:13:44.175178Z", + "iopub.status.busy": "2022-08-10T00:13:44.174906Z", + "iopub.status.idle": "2022-08-10T00:13:44.200980Z", + "shell.execute_reply": "2022-08-10T00:13:44.200090Z", + "shell.execute_reply.started": "2022-08-10T00:13:44.175161Z" + } + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "9705bac7802d4b868356454f539efbb4", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox(children=(HTML(value='