diff --git "a/train.ipynb" "b/train.ipynb" new file mode 100644--- /dev/null +++ "b/train.ipynb" @@ -0,0 +1,1110 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 36, + "id": "452ee8e2-7d0f-4a52-97e6-7976bb21e4c1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "tensorflow 2.9.2 requires protobuf<3.20,>=3.9.2, but you have protobuf 3.20.2 which is incompatible.\n", + "tensorboard 2.9.1 requires protobuf<3.20,>=3.9.2, but you have protobuf 3.20.2 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0m\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", + "\u001b[0m" + ] + } + ], + "source": [ + "!pip install -q -U transformers huggingface_hub datasets accelerate peft onnx onnxruntime optimum\n", + "# !pip install -q transformers==4.28.0 " + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "7cabae93-a56e-48e6-bf6e-36f65b15700b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Detected operating system as Ubuntu/focal.\n", + "Checking for curl...\n", + "Detected curl...\n", + "Checking for gpg...\n", + "Detected gpg...\n", + "Detected apt version as 2.0.9\n", + "Running apt-get update... done.\n", + "Installing apt-transport-https... done.\n", + "Installing /etc/apt/sources.list.d/github_git-lfs.list...done.\n", + "Importing packagecloud gpg key... Packagecloud gpg key imported to /etc/apt/keyrings/github_git-lfs-archive-keyring.gpg\n", + "done.\n", + "Running apt-get update... done.\n", + "\n", + "The repository is setup! You can now install packages.\n" + ] + } + ], + "source": [ + "!curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | sudo bash" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "6428ceeb-9fd1-455d-9cb7-f4d4102d6d34", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Reading package lists... Done\n", + "Building dependency tree \n", + "Reading state information... Done\n", + "The following NEW packages will be installed:\n", + " git-lfs\n", + "0 upgraded, 1 newly installed, 0 to remove and 111 not upgraded.\n", + "Need to get 7419 kB of archives.\n", + "After this operation, 16.0 MB of additional disk space will be used.\n", + "Get:1 https://packagecloud.io/github/git-lfs/ubuntu focal/main amd64 git-lfs amd64 3.3.0 [7419 kB]\n", + "Fetched 7419 kB in 1s (11.9 MB/s)\n", + "Selecting previously unselected package git-lfs.\n", + "(Reading database ... 69943 files and directories currently installed.)\n", + "Preparing to unpack .../git-lfs_3.3.0_amd64.deb ...\n", + "Unpacking git-lfs (3.3.0) ...\n", + "Setting up git-lfs (3.3.0) ...\n", + "Git LFS initialized.\n", + "Processing triggers for man-db (2.9.1-1) ...\n" + ] + } + ], + "source": [ + "!sudo apt-get install git-lfs" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "d522266b-71f2-4e92-a187-dfb8e497bd0d", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "a509e74171a046dea452a1c3281c17d2", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox(children=(HTML(value='
1\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpush_to_hub\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/usr/local/lib/python3.9/dist-packages/transformers/trainer.py:3366\u001b[0m, in \u001b[0;36mpush_to_hub\u001b[0;34m(self, commit_message, blocking, **kwargs)\u001b[0m\n\u001b[1;32m 3363\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(tensor)({k: \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_pad_across_processes(v, pad_index\u001b[38;5;241m=\u001b[39mpad_index) \u001b[38;5;28;01mfor\u001b[39;00m k, v \u001b[38;5;129;01min\u001b[39;00m tensor\u001b[38;5;241m.\u001b[39mitems()})\n\u001b[1;32m 3364\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(tensor, torch\u001b[38;5;241m.\u001b[39mTensor):\n\u001b[1;32m 3365\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\n\u001b[0;32m-> 3366\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCan\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt pad the values of type \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(tensor)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m, only of nested list/tuple/dicts of tensors.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 3367\u001b[0m )\n\u001b[1;32m 3369\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(tensor\u001b[38;5;241m.\u001b[39mshape) \u001b[38;5;241m<\u001b[39m \u001b[38;5;241m2\u001b[39m:\n\u001b[1;32m 3370\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m tensor\n", + "File \u001b[0;32m/usr/local/lib/python3.9/dist-packages/transformers/trainer.py:3244\u001b[0m, in \u001b[0;36mTrainer.init_git_repo\u001b[0;34m(self, at_init)\u001b[0m\n\u001b[1;32m 3241\u001b[0m repo_name \u001b[38;5;241m=\u001b[39m get_full_repo_name(repo_name, token\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mhub_token)\n\u001b[1;32m 3243\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 3244\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrepo \u001b[38;5;241m=\u001b[39m \u001b[43mRepository\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3245\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moutput_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3246\u001b[0m \u001b[43m \u001b[49m\u001b[43mclone_from\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrepo_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3247\u001b[0m \u001b[43m \u001b[49m\u001b[43muse_auth_token\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muse_auth_token\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3248\u001b[0m \u001b[43m \u001b[49m\u001b[43mprivate\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mhub_private_repo\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3249\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3250\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mEnvironmentError\u001b[39;00m:\n\u001b[1;32m 3251\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39moverwrite_output_dir \u001b[38;5;129;01mand\u001b[39;00m at_init:\n\u001b[1;32m 3252\u001b[0m \u001b[38;5;66;03m# Try again after wiping output_dir\u001b[39;00m\n", + "File \u001b[0;32m/usr/local/lib/python3.9/dist-packages/huggingface_hub/utils/_validators.py:124\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.._inner_fn\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 119\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m check_use_auth_token:\n\u001b[1;32m 120\u001b[0m kwargs \u001b[38;5;241m=\u001b[39m smoothly_deprecate_use_auth_token(\n\u001b[1;32m 121\u001b[0m fn_name\u001b[38;5;241m=\u001b[39mfn\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, has_token\u001b[38;5;241m=\u001b[39mhas_token, kwargs\u001b[38;5;241m=\u001b[39mkwargs\n\u001b[1;32m 122\u001b[0m )\n\u001b[0;32m--> 124\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[0;31mTypeError\u001b[0m: __init__() got an unexpected keyword argument 'private'" + ] + } + ], + "source": [ + "# trainer.push_to_hub()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "eff6fbfb-1e36-44dc-8a3b-fba060eeca5b", + "metadata": { + "collapsed": true, + "jupyter": { + "outputs_hidden": true + }, + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:407: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " [2000/2000 47:35, Epoch 11.35/12]\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StepTraining LossValidation Loss
15500.1283000.602472
16000.1251000.629285
16500.1155000.609703
17000.1231000.627464
17500.0971000.610019
18000.1048000.604064
18500.0875000.612238
19000.1042000.613567
19500.0976000.612580
20000.0692000.610577

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "

╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮\n",
+       " /usr/local/lib/python3.9/dist-packages/huggingface_hub/repository.py:1032 in git_commit          \n",
+       "                                                                                                  \n",
+       "   1029 │   │   │   │   The message attributed to the commit.                                     \n",
+       "   1030 │   │   \"\"\"                                                                               \n",
+       "   1031 │   │   try:                                                                              \n",
+       " 1032 │   │   │   result = run_subprocess(\"git commit -v -m\".split() + [commit_message], self.  \n",
+       "   1033 │   │   │   logger.info(f\"Committed:\\n{result.stdout}\\n\")                                 \n",
+       "   1034 │   │   except subprocess.CalledProcessError as exc:                                      \n",
+       "   1035 │   │   │   if len(exc.stderr) > 0:                                                       \n",
+       "                                                                                                  \n",
+       " /usr/local/lib/python3.9/dist-packages/huggingface_hub/utils/_subprocess.py:83 in run_subprocess \n",
+       "                                                                                                  \n",
+       "    80 │   if isinstance(folder, Path):                                                           \n",
+       "    81 │   │   folder = str(folder)                                                               \n",
+       "    82 │                                                                                          \n",
+       "  83 return subprocess.run(                                                                 \n",
+       "    84 │   │   command,                                                                           \n",
+       "    85 │   │   stderr=subprocess.PIPE,                                                            \n",
+       "    86 │   │   stdout=subprocess.PIPE,                                                            \n",
+       "                                                                                                  \n",
+       " /usr/lib/python3.9/subprocess.py:528 in run                                                      \n",
+       "                                                                                                  \n",
+       "    525 │   │   │   raise                                                                         \n",
+       "    526 │   │   retcode = process.poll()                                                          \n",
+       "    527 │   │   if check and retcode:                                                             \n",
+       "  528 │   │   │   raise CalledProcessError(retcode, process.args,                               \n",
+       "    529 │   │   │   │   │   │   │   │   │    output=stdout, stderr=stderr)                        \n",
+       "    530 │   return CompletedProcess(process.args, retcode, stdout, stderr)                        \n",
+       "    531                                                                                           \n",
+       "╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+       "CalledProcessError: Command '['git', 'commit', '-v', '-m', 'Training in progress, step 2000']' returned non-zero \n",
+       "exit status 128.\n",
+       "\n",
+       "During handling of the above exception, another exception occurred:\n",
+       "\n",
+       "╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮\n",
+       " in <module>                                                                                      \n",
+       "                                                                                                  \n",
+       " 1 trainer.train(\"ingbetic/checkpoint-1500\")                                                    \n",
+       "   2                                                                                              \n",
+       "                                                                                                  \n",
+       " /usr/local/lib/python3.9/dist-packages/transformers/trainer.py:1664 in train                     \n",
+       "                                                                                                  \n",
+       "   1661 │   │   inner_training_loop = find_executable_batch_size(                                 \n",
+       "   1662 │   │   │   self._inner_training_loop, self._train_batch_size, args.auto_find_batch_size  \n",
+       "   1663 │   │   )                                                                                 \n",
+       " 1664 │   │   return inner_training_loop(                                                       \n",
+       "   1665 │   │   │   args=args,                                                                    \n",
+       "   1666 │   │   │   resume_from_checkpoint=resume_from_checkpoint,                                \n",
+       "   1667 │   │   │   trial=trial,                                                                  \n",
+       "                                                                                                  \n",
+       " /usr/local/lib/python3.9/dist-packages/transformers/trainer.py:2019 in _inner_training_loop      \n",
+       "                                                                                                  \n",
+       "   2016 │   │   │   │   │   self.state.epoch = epoch + (step + 1 + steps_skipped) / steps_in_epo  \n",
+       "   2017 │   │   │   │   │   self.control = self.callback_handler.on_step_end(args, self.state, s  \n",
+       "   2018 │   │   │   │   │                                                                         \n",
+       " 2019 │   │   │   │   │   self._maybe_log_save_evaluate(tr_loss, model, trial, epoch, ignore_k  \n",
+       "   2020 │   │   │   │   else:                                                                     \n",
+       "   2021 │   │   │   │   │   self.control = self.callback_handler.on_substep_end(args, self.state  \n",
+       "   2022                                                                                           \n",
+       "                                                                                                  \n",
+       " /usr/local/lib/python3.9/dist-packages/transformers/trainer.py:2308 in _maybe_log_save_evaluate  \n",
+       "                                                                                                  \n",
+       "   2305 │   │   │   │   self.lr_scheduler.step(metrics[self.args.metric_for_best_model])          \n",
+       "   2306 │   │                                                                                     \n",
+       "   2307 │   │   if self.control.should_save:                                                      \n",
+       " 2308 │   │   │   self._save_checkpoint(model, trial, metrics=metrics)                          \n",
+       "   2309 │   │   │   self.control = self.callback_handler.on_save(self.args, self.state, self.con  \n",
+       "   2310 │                                                                                         \n",
+       "   2311 │   def _load_rng_state(self, checkpoint):                                                \n",
+       "                                                                                                  \n",
+       " /usr/local/lib/python3.9/dist-packages/transformers/trainer.py:2462 in _save_checkpoint          \n",
+       "                                                                                                  \n",
+       "   2459 │   │   │   torch.save(rng_states, os.path.join(output_dir, f\"rng_state_{self.args.proce  \n",
+       "   2460 │   │                                                                                     \n",
+       "   2461 │   │   if self.args.push_to_hub:                                                         \n",
+       " 2462 │   │   │   self._push_from_checkpoint(output_dir)                                        \n",
+       "   2463 │   │                                                                                     \n",
+       "   2464 │   │   # Maybe delete some older checkpoints.                                            \n",
+       "   2465 │   │   if self.args.should_save:                                                         \n",
+       "                                                                                                  \n",
+       " /usr/local/lib/python3.9/dist-packages/transformers/trainer.py:3649 in _push_from_checkpoint     \n",
+       "                                                                                                  \n",
+       "   3646 │   │   │   │   commit_message = f\"Training in progress, step {self.state.global_step}\"   \n",
+       "   3647 │   │   │   else:                                                                         \n",
+       "   3648 │   │   │   │   commit_message = f\"Training in progress, epoch {int(self.state.epoch)}\"   \n",
+       " 3649 │   │   │   _, self.push_in_progress = self.repo.push_to_hub(                             \n",
+       "   3650 │   │   │   │   commit_message=commit_message, blocking=False, auto_lfs_prune=True        \n",
+       "   3651 │   │   │   )                                                                             \n",
+       "   3652 │   │   finally:                                                                          \n",
+       "                                                                                                  \n",
+       " /usr/local/lib/python3.9/dist-packages/huggingface_hub/repository.py:1306 in push_to_hub         \n",
+       "                                                                                                  \n",
+       "   1303 │   │   │   logger.info(\"Repo currently clean. Ignoring push_to_hub\")                     \n",
+       "   1304 │   │   │   return None                                                                   \n",
+       "   1305 │   │   self.git_add(auto_lfs_track=True)                                                 \n",
+       " 1306 │   │   self.git_commit(commit_message)                                                   \n",
+       "   1307 │   │   return self.git_push(                                                             \n",
+       "   1308 │   │   │   upstream=f\"origin {self.current_branch}\",                                     \n",
+       "   1309 │   │   │   blocking=blocking,                                                            \n",
+       "                                                                                                  \n",
+       " /usr/local/lib/python3.9/dist-packages/huggingface_hub/repository.py:1036 in git_commit          \n",
+       "                                                                                                  \n",
+       "   1033 │   │   │   logger.info(f\"Committed:\\n{result.stdout}\\n\")                                 \n",
+       "   1034 │   │   except subprocess.CalledProcessError as exc:                                      \n",
+       "   1035 │   │   │   if len(exc.stderr) > 0:                                                       \n",
+       " 1036 │   │   │   │   raise EnvironmentError(exc.stderr)                                        \n",
+       "   1037 │   │   │   else:                                                                         \n",
+       "   1038 │   │   │   │   raise EnvironmentError(exc.stdout)                                        \n",
+       "   1039                                                                                           \n",
+       "╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+       "OSError: fatal: Unable to create '/notebooks/ingbetic/.git/index.lock': File exists.\n",
+       "\n",
+       "Another git process seems to be running in this repository, e.g.\n",
+       "an editor opened by 'git commit'. Please make sure all processes\n",
+       "are terminated then try again. If it still fails, a git process\n",
+       "may have crashed in this repository earlier:\n",
+       "remove the file manually to continue.\n",
+       "\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[31m╭─\u001b[0m\u001b[31m──────────────────────────────\u001b[0m\u001b[31m \u001b[0m\u001b[1;31mTraceback \u001b[0m\u001b[1;2;31m(most recent call last)\u001b[0m\u001b[31m \u001b[0m\u001b[31m───────────────────────────────\u001b[0m\u001b[31m─╮\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.9/dist-packages/huggingface_hub/\u001b[0m\u001b[1;33mrepository.py\u001b[0m:\u001b[94m1032\u001b[0m in \u001b[92mgit_commit\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1029 \u001b[0m\u001b[2;33m│ │ │ │ \u001b[0m\u001b[33mThe message attributed to the commit.\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1030 \u001b[0m\u001b[2;33m│ │ \u001b[0m\u001b[33m\"\"\"\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1031 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mtry\u001b[0m: \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1032 \u001b[2m│ │ │ \u001b[0mresult = run_subprocess(\u001b[33m\"\u001b[0m\u001b[33mgit commit -v -m\u001b[0m\u001b[33m\"\u001b[0m.split() + [commit_message], \u001b[96mself\u001b[0m. \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1033 \u001b[0m\u001b[2m│ │ │ \u001b[0mlogger.info(\u001b[33mf\u001b[0m\u001b[33m\"\u001b[0m\u001b[33mCommitted:\u001b[0m\u001b[33m\\n\u001b[0m\u001b[33m{\u001b[0mresult.stdout\u001b[33m}\u001b[0m\u001b[33m\\n\u001b[0m\u001b[33m\"\u001b[0m) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1034 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mexcept\u001b[0m subprocess.CalledProcessError \u001b[94mas\u001b[0m exc: \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1035 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[96mlen\u001b[0m(exc.stderr) > \u001b[94m0\u001b[0m: \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.9/dist-packages/huggingface_hub/utils/\u001b[0m\u001b[1;33m_subprocess.py\u001b[0m:\u001b[94m83\u001b[0m in \u001b[92mrun_subprocess\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m 80 \u001b[0m\u001b[2m│ \u001b[0m\u001b[94mif\u001b[0m \u001b[96misinstance\u001b[0m(folder, Path): \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m 81 \u001b[0m\u001b[2m│ │ \u001b[0mfolder = \u001b[96mstr\u001b[0m(folder) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m 82 \u001b[0m\u001b[2m│ \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m 83 \u001b[2m│ \u001b[0m\u001b[94mreturn\u001b[0m subprocess.run( \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m 84 \u001b[0m\u001b[2m│ │ \u001b[0mcommand, \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m 85 \u001b[0m\u001b[2m│ │ \u001b[0mstderr=subprocess.PIPE, \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m 86 \u001b[0m\u001b[2m│ │ \u001b[0mstdout=subprocess.PIPE, \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2;33m/usr/lib/python3.9/\u001b[0m\u001b[1;33msubprocess.py\u001b[0m:\u001b[94m528\u001b[0m in \u001b[92mrun\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m 525 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mraise\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m 526 \u001b[0m\u001b[2m│ │ \u001b[0mretcode = process.poll() \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m 527 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m check \u001b[95mand\u001b[0m retcode: \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m 528 \u001b[2m│ │ │ \u001b[0m\u001b[94mraise\u001b[0m CalledProcessError(retcode, process.args, \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m 529 \u001b[0m\u001b[2m│ │ │ │ │ │ │ │ │ \u001b[0moutput=stdout, stderr=stderr) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m 530 \u001b[0m\u001b[2m│ \u001b[0m\u001b[94mreturn\u001b[0m CompletedProcess(process.args, retcode, stdout, stderr) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m 531 \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n", + "\u001b[1;91mCalledProcessError: \u001b[0mCommand \u001b[32m'\u001b[0m\u001b[32m[\u001b[0m\u001b[32m'\u001b[0mgit', \u001b[32m'commit'\u001b[0m, \u001b[32m'-v'\u001b[0m, \u001b[32m'-m'\u001b[0m, \u001b[32m'Training in progress, step 2000'\u001b[0m\u001b[1m]\u001b[0m' returned non-zero \n", + "exit status \u001b[1;36m128\u001b[0m.\n", + "\n", + "\u001b[3mDuring handling of the above exception, another exception occurred:\u001b[0m\n", + "\n", + "\u001b[31m╭─\u001b[0m\u001b[31m──────────────────────────────\u001b[0m\u001b[31m \u001b[0m\u001b[1;31mTraceback \u001b[0m\u001b[1;2;31m(most recent call last)\u001b[0m\u001b[31m \u001b[0m\u001b[31m───────────────────────────────\u001b[0m\u001b[31m─╮\u001b[0m\n", + "\u001b[31m│\u001b[0m in \u001b[92m\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1 trainer.train(\u001b[33m\"\u001b[0m\u001b[33mingbetic/checkpoint-1500\u001b[0m\u001b[33m\"\u001b[0m) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2 \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.9/dist-packages/transformers/\u001b[0m\u001b[1;33mtrainer.py\u001b[0m:\u001b[94m1664\u001b[0m in \u001b[92mtrain\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1661 \u001b[0m\u001b[2m│ │ \u001b[0minner_training_loop = find_executable_batch_size( \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1662 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[96mself\u001b[0m._inner_training_loop, \u001b[96mself\u001b[0m._train_batch_size, args.auto_find_batch_size \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1663 \u001b[0m\u001b[2m│ │ \u001b[0m) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1664 \u001b[2m│ │ \u001b[0m\u001b[94mreturn\u001b[0m inner_training_loop( \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1665 \u001b[0m\u001b[2m│ │ │ \u001b[0margs=args, \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1666 \u001b[0m\u001b[2m│ │ │ \u001b[0mresume_from_checkpoint=resume_from_checkpoint, \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1667 \u001b[0m\u001b[2m│ │ │ \u001b[0mtrial=trial, \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.9/dist-packages/transformers/\u001b[0m\u001b[1;33mtrainer.py\u001b[0m:\u001b[94m2019\u001b[0m in \u001b[92m_inner_training_loop\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2016 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0m\u001b[96mself\u001b[0m.state.epoch = epoch + (step + \u001b[94m1\u001b[0m + steps_skipped) / steps_in_epo \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2017 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0m\u001b[96mself\u001b[0m.control = \u001b[96mself\u001b[0m.callback_handler.on_step_end(args, \u001b[96mself\u001b[0m.state, \u001b[96ms\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2018 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m2019 \u001b[2m│ │ │ │ │ \u001b[0m\u001b[96mself\u001b[0m._maybe_log_save_evaluate(tr_loss, model, trial, epoch, ignore_k \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2020 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[94melse\u001b[0m: \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2021 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0m\u001b[96mself\u001b[0m.control = \u001b[96mself\u001b[0m.callback_handler.on_substep_end(args, \u001b[96mself\u001b[0m.state \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2022 \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.9/dist-packages/transformers/\u001b[0m\u001b[1;33mtrainer.py\u001b[0m:\u001b[94m2308\u001b[0m in \u001b[92m_maybe_log_save_evaluate\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2305 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[96mself\u001b[0m.lr_scheduler.step(metrics[\u001b[96mself\u001b[0m.args.metric_for_best_model]) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2306 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2307 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[96mself\u001b[0m.control.should_save: \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m2308 \u001b[2m│ │ │ \u001b[0m\u001b[96mself\u001b[0m._save_checkpoint(model, trial, metrics=metrics) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2309 \u001b[0m\u001b[2m�� │ │ \u001b[0m\u001b[96mself\u001b[0m.control = \u001b[96mself\u001b[0m.callback_handler.on_save(\u001b[96mself\u001b[0m.args, \u001b[96mself\u001b[0m.state, \u001b[96mself\u001b[0m.con \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2310 \u001b[0m\u001b[2m│ \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2311 \u001b[0m\u001b[2m│ \u001b[0m\u001b[94mdef\u001b[0m \u001b[92m_load_rng_state\u001b[0m(\u001b[96mself\u001b[0m, checkpoint): \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.9/dist-packages/transformers/\u001b[0m\u001b[1;33mtrainer.py\u001b[0m:\u001b[94m2462\u001b[0m in \u001b[92m_save_checkpoint\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2459 \u001b[0m\u001b[2m│ │ │ \u001b[0mtorch.save(rng_states, os.path.join(output_dir, \u001b[33mf\u001b[0m\u001b[33m\"\u001b[0m\u001b[33mrng_state_\u001b[0m\u001b[33m{\u001b[0m\u001b[96mself\u001b[0m.args.proce \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2460 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2461 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[96mself\u001b[0m.args.push_to_hub: \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m2462 \u001b[2m│ │ │ \u001b[0m\u001b[96mself\u001b[0m._push_from_checkpoint(output_dir) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2463 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2464 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[2m# Maybe delete some older checkpoints.\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m2465 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[96mself\u001b[0m.args.should_save: \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.9/dist-packages/transformers/\u001b[0m\u001b[1;33mtrainer.py\u001b[0m:\u001b[94m3649\u001b[0m in \u001b[92m_push_from_checkpoint\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m3646 \u001b[0m\u001b[2m│ │ │ │ \u001b[0mcommit_message = \u001b[33mf\u001b[0m\u001b[33m\"\u001b[0m\u001b[33mTraining in progress, step \u001b[0m\u001b[33m{\u001b[0m\u001b[96mself\u001b[0m.state.global_step\u001b[33m}\u001b[0m\u001b[33m\"\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m3647 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94melse\u001b[0m: \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m3648 \u001b[0m\u001b[2m│ │ │ │ \u001b[0mcommit_message = \u001b[33mf\u001b[0m\u001b[33m\"\u001b[0m\u001b[33mTraining in progress, epoch \u001b[0m\u001b[33m{\u001b[0m\u001b[96mint\u001b[0m(\u001b[96mself\u001b[0m.state.epoch)\u001b[33m}\u001b[0m\u001b[33m\"\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m3649 \u001b[2m│ │ │ \u001b[0m_, \u001b[96mself\u001b[0m.push_in_progress = \u001b[96mself\u001b[0m.repo.push_to_hub( \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m3650 \u001b[0m\u001b[2m│ │ │ │ \u001b[0mcommit_message=commit_message, blocking=\u001b[94mFalse\u001b[0m, auto_lfs_prune=\u001b[94mTrue\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m3651 \u001b[0m\u001b[2m│ │ │ \u001b[0m) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m3652 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mfinally\u001b[0m: \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.9/dist-packages/huggingface_hub/\u001b[0m\u001b[1;33mrepository.py\u001b[0m:\u001b[94m1306\u001b[0m in \u001b[92mpush_to_hub\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1303 \u001b[0m\u001b[2m│ │ │ \u001b[0mlogger.info(\u001b[33m\"\u001b[0m\u001b[33mRepo currently clean. Ignoring push_to_hub\u001b[0m\u001b[33m\"\u001b[0m) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1304 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mreturn\u001b[0m \u001b[94mNone\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1305 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[96mself\u001b[0m.git_add(auto_lfs_track=\u001b[94mTrue\u001b[0m) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1306 \u001b[2m│ │ \u001b[0m\u001b[96mself\u001b[0m.git_commit(commit_message) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1307 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mreturn\u001b[0m \u001b[96mself\u001b[0m.git_push( \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1308 \u001b[0m\u001b[2m│ │ │ \u001b[0mupstream=\u001b[33mf\u001b[0m\u001b[33m\"\u001b[0m\u001b[33morigin \u001b[0m\u001b[33m{\u001b[0m\u001b[96mself\u001b[0m.current_branch\u001b[33m}\u001b[0m\u001b[33m\"\u001b[0m, \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1309 \u001b[0m\u001b[2m│ │ │ \u001b[0mblocking=blocking, \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.9/dist-packages/huggingface_hub/\u001b[0m\u001b[1;33mrepository.py\u001b[0m:\u001b[94m1036\u001b[0m in \u001b[92mgit_commit\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1033 \u001b[0m\u001b[2m│ │ │ \u001b[0mlogger.info(\u001b[33mf\u001b[0m\u001b[33m\"\u001b[0m\u001b[33mCommitted:\u001b[0m\u001b[33m\\n\u001b[0m\u001b[33m{\u001b[0mresult.stdout\u001b[33m}\u001b[0m\u001b[33m\\n\u001b[0m\u001b[33m\"\u001b[0m) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1034 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mexcept\u001b[0m subprocess.CalledProcessError \u001b[94mas\u001b[0m exc: \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1035 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[96mlen\u001b[0m(exc.stderr) > \u001b[94m0\u001b[0m: \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1036 \u001b[2m│ │ │ │ \u001b[0m\u001b[94mraise\u001b[0m \u001b[96mEnvironmentError\u001b[0m(exc.stderr) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1037 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94melse\u001b[0m: \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1038 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[94mraise\u001b[0m \u001b[96mEnvironmentError\u001b[0m(exc.stdout) \u001b[31m│\u001b[0m\n", + "\u001b[31m│\u001b[0m \u001b[2m1039 \u001b[0m \u001b[31m│\u001b[0m\n", + "\u001b[31m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n", + "\u001b[1;91mOSError: \u001b[0mfatal: Unable to create \u001b[32m'/notebooks/ingbetic/.git/index.lock'\u001b[0m: File exists.\n", + "\n", + "Another git process seems to be running in this repository, e.g.\n", + "an editor opened by \u001b[32m'git commit'\u001b[0m. Please make sure all processes\n", + "are terminated then try again. If it still fails, a git process\n", + "may have crashed in this repository earlier:\n", + "remove the file manually to continue.\n", + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "trainer.train(\"ingbetic/checkpoint-1500\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "e779574b-082f-4ac5-956e-33c174b1a8f0", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "972f5a97840c41b1a78506ac1da37bcd", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Upload file pytorch_model.bin: 0%| | 1.00/255M [00:00 main\n", + "\n", + "To https://huggingface.co/ziq/ingbetic\n", + " 4708bbb..60450a0 main -> main\n", + "\n" + ] + }, + { + "data": { + "text/plain": [ + "'https://huggingface.co/ziq/ingbetic/commit/4708bbb8b6ee82ff9514601c63b487ad9c8d93b5'" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "trainer.push_to_hub()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "3f4a4763-00bd-4153-9807-77b73574ddaf", + "metadata": {}, + "outputs": [], + "source": [ + "from transformers import AutoModelForSequenceClassification\n", + "\n", + "model = AutoModelForSequenceClassification.from_pretrained(\"ziq/ingbetic\") " + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "f68d53a6-674d-4355-94a9-311346a41fda", + "metadata": {}, + "outputs": [], + "source": [ + "dummy = tokenizer(\"1 pound ground beef, 1 tablespoon dehydrated onion, 1 teaspoon salt, 1/2 teaspoon pepper, 1 teaspoon garlic powder, 1/4 teaspoon oregano, 1 cup uncooked penne pasta, 1 cup water, 1 (15-ounce) can fire-roasted diced tomatoes, 1/4 cup Parmesan cheese, 1/2 cup shredded mozzarella cheese, 4-6 fresh basil leaves, torn\", padding='max_length', truncation=True)\n", + "input_ids = torch.tensor(dummy['input_ids']).unsqueeze(0)\n", + "attention_mask = torch.tensor(dummy['attention_mask']).unsqueeze(0)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "df86f5be-2895-4224-8359-84b21aa48529", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[-0.4866]], grad_fn=)" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model(input_ids=input_ids, attention_mask=attention_mask)['logits']" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "237a2568-eb49-4b64-9d30-3dbb2e81f43d", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Xformers is not installed correctly. If you want to use memorry_efficient_attention to accelerate training use the following command to install Xformers\n", + "pip install xformers.\n" + ] + } + ], + "source": [ + "pipeline = transformers.pipeline(\"text-classification\",model=model,tokenizer=tokenizer)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "920f7581-f6ff-450f-a400-b4b76fad8471", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'label': 'LABEL_0', 'score': 0.38068893551826477}]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pipeline(\"1 pound ground beef, 1 tablespoon dehydrated onion, 1 teaspoon salt, 1/2 teaspoon pepper, 1 teaspoon garlic powder, 1/4 teaspoon oregano, 1 cup uncooked penne pasta, 1 cup water, 1 (15-ounce) can fire-roasted diced tomatoes, 1/4 cup Parmesan cheese, 1/2 cup shredded mozzarella cheese, 4-6 fresh basil leaves, torn\")" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "ad7f824b-d599-47f0-bb30-ffd1049d973b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "============= Diagnostic Run torch.onnx.export version 2.0.1+cu117 =============\n", + "verbose: False, log level: Level.ERROR\n", + "======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n", + "\n" + ] + } + ], + "source": [ + "torch.onnx.export(\n", + " model, \n", + " tuple((input_ids, attention_mask)),\n", + " f=\"sugar.onnx\", \n", + " input_names=['input_ids', 'attention_mask'], \n", + " output_names=['logits'], \n", + " dynamic_axes={'input_ids': {0: 'batch_size', 1: 'sequence'}, \n", + " 'attention_mask': {0: 'batch_size', 1: 'sequence'}, \n", + " 'logits': {0: 'batch_size', 1: 'sequence'}}, \n", + " do_constant_folding=True, \n", + " opset_version=13, \n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "d23e2686-597a-4507-8bee-c53fa3854ec6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Ignore MatMul due to non constant B: /[/distilbert/transformer/layer.0/attention/MatMul]\n", + "Ignore MatMul due to non constant B: /[/distilbert/transformer/layer.0/attention/MatMul_1]\n", + "Ignore MatMul due to non constant B: /[/distilbert/transformer/layer.1/attention/MatMul]\n", + "Ignore MatMul due to non constant B: /[/distilbert/transformer/layer.1/attention/MatMul_1]\n", + "Ignore MatMul due to non constant B: /[/distilbert/transformer/layer.2/attention/MatMul]\n", + "Ignore MatMul due to non constant B: /[/distilbert/transformer/layer.2/attention/MatMul_1]\n", + "Ignore MatMul due to non constant B: /[/distilbert/transformer/layer.3/attention/MatMul]\n", + "Ignore MatMul due to non constant B: /[/distilbert/transformer/layer.3/attention/MatMul_1]\n", + "Ignore MatMul due to non constant B: /[/distilbert/transformer/layer.4/attention/MatMul]\n", + "Ignore MatMul due to non constant B: /[/distilbert/transformer/layer.4/attention/MatMul_1]\n", + "Ignore MatMul due to non constant B: /[/distilbert/transformer/layer.5/attention/MatMul]\n", + "Ignore MatMul due to non constant B: /[/distilbert/transformer/layer.5/attention/MatMul_1]\n" + ] + } + ], + "source": [ + "from onnxruntime.quantization import quantize_dynamic, QuantType\n", + "quantize_dynamic(\"sugar.onnx\", \"sugar-int8.onnx\", \n", + " weight_type=QuantType.QUInt8)" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "0a05890f-0e80-49d6-a14f-10987db30d00", + "metadata": {}, + "outputs": [], + "source": [ + "# from transformers import AutoTokenizer\n", + "# from optimum.onnxruntime import ORTModelForSequenceClassification\n", + "\n", + "# # load model from hub and convert\n", + "# model = ORTModelForSequenceClassification.from_pretrained(\"ziq/ingbetic\",from_transformers=True)\n", + "# tokenizer = AutoTokenizer.from_pretrained(\"distilbert-base-uncased-finetuned-sst-2-english\")\n", + "\n", + "# # save converted model\n", + "# model.save_pretrained(\"ingbetic\")\n", + "# tokenizer.save_pretrained(\"ingbetic\")" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "d265c9fb-9455-46c7-afb3-0082c0403b61", + "metadata": {}, + "outputs": [], + "source": [ + "import onnx\n", + "onnx_model = onnx.load(\"sugar.onnx\")\n", + "onnx.checker.check_model(onnx_model)" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "e0d43a59-0154-4be0-aae0-9f5e5cae94f6", + "metadata": {}, + "outputs": [], + "source": [ + "import onnxruntime as ort\n", + "import numpy as np\n", + "ort_sess = ort.InferenceSession('sugar-int8.onnx')" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "27e582de-9e07-4554-8269-890d245e17a3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[4.4876623]], dtype=float32)" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "outputs = ort_sess.run(None, {'input_ids': input_ids.numpy(), 'attention_mask': attention_mask.numpy()})\n", + "\n", + "# Print Result \n", + "outputs[0] * 13.3627190349059 + 10.85810766787474" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "014f3519-fa8a-4bb2-bf89-c270b1b5beac", + "metadata": {}, + "outputs": [], + "source": [ + "from huggingface_hub import HfApi\n", + "api = HfApi()\n", + "\n", + "# api.upload_file(\n", + "# path_or_fileobj=\"sugar.onnx\",\n", + "# path_in_repo=\"model.onnx\",\n", + "# repo_id=\"ziq/ingbetic\",\n", + "# repo_type=\"model\",\n", + "# )\n", + "\n", + "# api.upload_file(\n", + "# path_or_fileobj=\"sugar-int8.onnx\",\n", + "# path_in_repo=\"model-int8.onnx\",\n", + "# repo_id=\"ziq/ingbetic\",\n", + "# repo_type=\"model\",\n", + "# )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c043e57e-3a2f-4440-b774-06f1914891a4", + "metadata": {}, + "outputs": [], + "source": [ + "api.upload_file(\n", + " path_or_fileobj=\"main.ipynb\",\n", + " path_in_repo=\"train.ipynb\",\n", + " repo_id=\"ziq/ingbetic\",\n", + " repo_type=\"model\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dca7ec74-acde-4e22-a4cb-d44fcf1fd029", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}