{ "cells": [ { "cell_type": "code", "execution_count": 36, "id": "452ee8e2-7d0f-4a52-97e6-7976bb21e4c1", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", "tensorflow 2.9.2 requires protobuf<3.20,>=3.9.2, but you have protobuf 3.20.2 which is incompatible.\n", "tensorboard 2.9.1 requires protobuf<3.20,>=3.9.2, but you have protobuf 3.20.2 which is incompatible.\u001b[0m\u001b[31m\n", "\u001b[0m\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", "\u001b[0m" ] } ], "source": [ "!pip install -q -U transformers huggingface_hub datasets accelerate peft onnx onnxruntime optimum\n", "# !pip install -q transformers==4.28.0 " ] }, { "cell_type": "code", "execution_count": 2, "id": "7cabae93-a56e-48e6-bf6e-36f65b15700b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Detected operating system as Ubuntu/focal.\n", "Checking for curl...\n", "Detected curl...\n", "Checking for gpg...\n", "Detected gpg...\n", "Detected apt version as 2.0.9\n", "Running apt-get update... done.\n", "Installing apt-transport-https... done.\n", "Installing /etc/apt/sources.list.d/github_git-lfs.list...done.\n", "Importing packagecloud gpg key... Packagecloud gpg key imported to /etc/apt/keyrings/github_git-lfs-archive-keyring.gpg\n", "done.\n", "Running apt-get update... done.\n", "\n", "The repository is setup! You can now install packages.\n" ] } ], "source": [ "!curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | sudo bash" ] }, { "cell_type": "code", "execution_count": 3, "id": "6428ceeb-9fd1-455d-9cb7-f4d4102d6d34", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Reading package lists... Done\n", "Building dependency tree \n", "Reading state information... Done\n", "The following NEW packages will be installed:\n", " git-lfs\n", "0 upgraded, 1 newly installed, 0 to remove and 111 not upgraded.\n", "Need to get 7419 kB of archives.\n", "After this operation, 16.0 MB of additional disk space will be used.\n", "Get:1 https://packagecloud.io/github/git-lfs/ubuntu focal/main amd64 git-lfs amd64 3.3.0 [7419 kB]\n", "Fetched 7419 kB in 1s (11.9 MB/s)\n", "Selecting previously unselected package git-lfs.\n", "(Reading database ... 69943 files and directories currently installed.)\n", "Preparing to unpack .../git-lfs_3.3.0_amd64.deb ...\n", "Unpacking git-lfs (3.3.0) ...\n", "Setting up git-lfs (3.3.0) ...\n", "Git LFS initialized.\n", "Processing triggers for man-db (2.9.1-1) ...\n" ] } ], "source": [ "!sudo apt-get install git-lfs" ] }, { "cell_type": "code", "execution_count": 34, "id": "d522266b-71f2-4e92-a187-dfb8e497bd0d", "metadata": { "tags": [] }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a509e74171a046dea452a1c3281c17d2", "version_major": 2, "version_minor": 0 }, "text/plain": [ "VBox(children=(HTML(value='
Step | \n", "Training Loss | \n", "Validation Loss | \n", "
---|---|---|
1550 | \n", "0.128300 | \n", "0.602472 | \n", "
1600 | \n", "0.125100 | \n", "0.629285 | \n", "
1650 | \n", "0.115500 | \n", "0.609703 | \n", "
1700 | \n", "0.123100 | \n", "0.627464 | \n", "
1750 | \n", "0.097100 | \n", "0.610019 | \n", "
1800 | \n", "0.104800 | \n", "0.604064 | \n", "
1850 | \n", "0.087500 | \n", "0.612238 | \n", "
1900 | \n", "0.104200 | \n", "0.613567 | \n", "
1950 | \n", "0.097600 | \n", "0.612580 | \n", "
2000 | \n", "0.069200 | \n", "0.610577 | \n", "
"
],
"text/plain": [
"╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮\n",
"│ /usr/local/lib/python3.9/dist-packages/huggingface_hub/repository.py:1032 in git_commit │\n",
"│ │\n",
"│ 1029 │ │ │ │ The message attributed to the commit. │\n",
"│ 1030 │ │ \"\"\" │\n",
"│ 1031 │ │ try: │\n",
"│ ❱ 1032 │ │ │ result = run_subprocess(\"git commit -v -m\".split() + [commit_message], self. │\n",
"│ 1033 │ │ │ logger.info(f\"Committed:\\n{result.stdout}\\n\") │\n",
"│ 1034 │ │ except subprocess.CalledProcessError as exc: │\n",
"│ 1035 │ │ │ if len(exc.stderr) > 0: │\n",
"│ │\n",
"│ /usr/local/lib/python3.9/dist-packages/huggingface_hub/utils/_subprocess.py:83 in run_subprocess │\n",
"│ │\n",
"│ 80 │ if isinstance(folder, Path): │\n",
"│ 81 │ │ folder = str(folder) │\n",
"│ 82 │ │\n",
"│ ❱ 83 │ return subprocess.run( │\n",
"│ 84 │ │ command, │\n",
"│ 85 │ │ stderr=subprocess.PIPE, │\n",
"│ 86 │ │ stdout=subprocess.PIPE, │\n",
"│ │\n",
"│ /usr/lib/python3.9/subprocess.py:528 in run │\n",
"│ │\n",
"│ 525 │ │ │ raise │\n",
"│ 526 │ │ retcode = process.poll() │\n",
"│ 527 │ │ if check and retcode: │\n",
"│ ❱ 528 │ │ │ raise CalledProcessError(retcode, process.args, │\n",
"│ 529 │ │ │ │ │ │ │ │ │ output=stdout, stderr=stderr) │\n",
"│ 530 │ return CompletedProcess(process.args, retcode, stdout, stderr) │\n",
"│ 531 │\n",
"╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
"CalledProcessError: Command '['git', 'commit', '-v', '-m', 'Training in progress, step 2000']' returned non-zero \n",
"exit status 128.\n",
"\n",
"During handling of the above exception, another exception occurred:\n",
"\n",
"╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮\n",
"│ in <module> │\n",
"│ │\n",
"│ ❱ 1 trainer.train(\"ingbetic/checkpoint-1500\") │\n",
"│ 2 │\n",
"│ │\n",
"│ /usr/local/lib/python3.9/dist-packages/transformers/trainer.py:1664 in train │\n",
"│ │\n",
"│ 1661 │ │ inner_training_loop = find_executable_batch_size( │\n",
"│ 1662 │ │ │ self._inner_training_loop, self._train_batch_size, args.auto_find_batch_size │\n",
"│ 1663 │ │ ) │\n",
"│ ❱ 1664 │ │ return inner_training_loop( │\n",
"│ 1665 │ │ │ args=args, │\n",
"│ 1666 │ │ │ resume_from_checkpoint=resume_from_checkpoint, │\n",
"│ 1667 │ │ │ trial=trial, │\n",
"│ │\n",
"│ /usr/local/lib/python3.9/dist-packages/transformers/trainer.py:2019 in _inner_training_loop │\n",
"│ │\n",
"│ 2016 │ │ │ │ │ self.state.epoch = epoch + (step + 1 + steps_skipped) / steps_in_epo │\n",
"│ 2017 │ │ │ │ │ self.control = self.callback_handler.on_step_end(args, self.state, s │\n",
"│ 2018 │ │ │ │ │ │\n",
"│ ❱ 2019 │ │ │ │ │ self._maybe_log_save_evaluate(tr_loss, model, trial, epoch, ignore_k │\n",
"│ 2020 │ │ │ │ else: │\n",
"│ 2021 │ │ │ │ │ self.control = self.callback_handler.on_substep_end(args, self.state │\n",
"│ 2022 │\n",
"│ │\n",
"│ /usr/local/lib/python3.9/dist-packages/transformers/trainer.py:2308 in _maybe_log_save_evaluate │\n",
"│ │\n",
"│ 2305 │ │ │ │ self.lr_scheduler.step(metrics[self.args.metric_for_best_model]) │\n",
"│ 2306 │ │ │\n",
"│ 2307 │ │ if self.control.should_save: │\n",
"│ ❱ 2308 │ │ │ self._save_checkpoint(model, trial, metrics=metrics) │\n",
"│ 2309 │ │ │ self.control = self.callback_handler.on_save(self.args, self.state, self.con │\n",
"│ 2310 │ │\n",
"│ 2311 │ def _load_rng_state(self, checkpoint): │\n",
"│ │\n",
"│ /usr/local/lib/python3.9/dist-packages/transformers/trainer.py:2462 in _save_checkpoint │\n",
"│ │\n",
"│ 2459 │ │ │ torch.save(rng_states, os.path.join(output_dir, f\"rng_state_{self.args.proce │\n",
"│ 2460 │ │ │\n",
"│ 2461 │ │ if self.args.push_to_hub: │\n",
"│ ❱ 2462 │ │ │ self._push_from_checkpoint(output_dir) │\n",
"│ 2463 │ │ │\n",
"│ 2464 │ │ # Maybe delete some older checkpoints. │\n",
"│ 2465 │ │ if self.args.should_save: │\n",
"│ │\n",
"│ /usr/local/lib/python3.9/dist-packages/transformers/trainer.py:3649 in _push_from_checkpoint │\n",
"│ │\n",
"│ 3646 │ │ │ │ commit_message = f\"Training in progress, step {self.state.global_step}\" │\n",
"│ 3647 │ │ │ else: │\n",
"│ 3648 │ │ │ │ commit_message = f\"Training in progress, epoch {int(self.state.epoch)}\" │\n",
"│ ❱ 3649 │ │ │ _, self.push_in_progress = self.repo.push_to_hub( │\n",
"│ 3650 │ │ │ │ commit_message=commit_message, blocking=False, auto_lfs_prune=True │\n",
"│ 3651 │ │ │ ) │\n",
"│ 3652 │ │ finally: │\n",
"│ │\n",
"│ /usr/local/lib/python3.9/dist-packages/huggingface_hub/repository.py:1306 in push_to_hub │\n",
"│ │\n",
"│ 1303 │ │ │ logger.info(\"Repo currently clean. Ignoring push_to_hub\") │\n",
"│ 1304 │ │ │ return None │\n",
"│ 1305 │ │ self.git_add(auto_lfs_track=True) │\n",
"│ ❱ 1306 │ │ self.git_commit(commit_message) │\n",
"│ 1307 │ │ return self.git_push( │\n",
"│ 1308 │ │ │ upstream=f\"origin {self.current_branch}\", │\n",
"│ 1309 │ │ │ blocking=blocking, │\n",
"│ │\n",
"│ /usr/local/lib/python3.9/dist-packages/huggingface_hub/repository.py:1036 in git_commit │\n",
"│ │\n",
"│ 1033 │ │ │ logger.info(f\"Committed:\\n{result.stdout}\\n\") │\n",
"│ 1034 │ │ except subprocess.CalledProcessError as exc: │\n",
"│ 1035 │ │ │ if len(exc.stderr) > 0: │\n",
"│ ❱ 1036 │ │ │ │ raise EnvironmentError(exc.stderr) │\n",
"│ 1037 │ │ │ else: │\n",
"│ 1038 │ │ │ │ raise EnvironmentError(exc.stdout) │\n",
"│ 1039 │\n",
"╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
"OSError: fatal: Unable to create '/notebooks/ingbetic/.git/index.lock': File exists.\n",
"\n",
"Another git process seems to be running in this repository, e.g.\n",
"an editor opened by 'git commit'. Please make sure all processes\n",
"are terminated then try again. If it still fails, a git process\n",
"may have crashed in this repository earlier:\n",
"remove the file manually to continue.\n",
"\n",
"
\n"
],
"text/plain": [
"\u001b[31m╭─\u001b[0m\u001b[31m──────────────────────────────\u001b[0m\u001b[31m \u001b[0m\u001b[1;31mTraceback \u001b[0m\u001b[1;2;31m(most recent call last)\u001b[0m\u001b[31m \u001b[0m\u001b[31m───────────────────────────────\u001b[0m\u001b[31m─╮\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.9/dist-packages/huggingface_hub/\u001b[0m\u001b[1;33mrepository.py\u001b[0m:\u001b[94m1032\u001b[0m in \u001b[92mgit_commit\u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m1029 \u001b[0m\u001b[2;33m│ │ │ │ \u001b[0m\u001b[33mThe message attributed to the commit.\u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m1030 \u001b[0m\u001b[2;33m│ │ \u001b[0m\u001b[33m\"\"\"\u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m1031 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mtry\u001b[0m: \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1032 \u001b[2m│ │ │ \u001b[0mresult = run_subprocess(\u001b[33m\"\u001b[0m\u001b[33mgit commit -v -m\u001b[0m\u001b[33m\"\u001b[0m.split() + [commit_message], \u001b[96mself\u001b[0m. \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m1033 \u001b[0m\u001b[2m│ │ │ \u001b[0mlogger.info(\u001b[33mf\u001b[0m\u001b[33m\"\u001b[0m\u001b[33mCommitted:\u001b[0m\u001b[33m\\n\u001b[0m\u001b[33m{\u001b[0mresult.stdout\u001b[33m}\u001b[0m\u001b[33m\\n\u001b[0m\u001b[33m\"\u001b[0m) \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m1034 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mexcept\u001b[0m subprocess.CalledProcessError \u001b[94mas\u001b[0m exc: \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m1035 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[96mlen\u001b[0m(exc.stderr) > \u001b[94m0\u001b[0m: \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.9/dist-packages/huggingface_hub/utils/\u001b[0m\u001b[1;33m_subprocess.py\u001b[0m:\u001b[94m83\u001b[0m in \u001b[92mrun_subprocess\u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m 80 \u001b[0m\u001b[2m│ \u001b[0m\u001b[94mif\u001b[0m \u001b[96misinstance\u001b[0m(folder, Path): \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m 81 \u001b[0m\u001b[2m│ │ \u001b[0mfolder = \u001b[96mstr\u001b[0m(folder) \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m 82 \u001b[0m\u001b[2m│ \u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m 83 \u001b[2m│ \u001b[0m\u001b[94mreturn\u001b[0m subprocess.run( \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m 84 \u001b[0m\u001b[2m│ │ \u001b[0mcommand, \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m 85 \u001b[0m\u001b[2m│ │ \u001b[0mstderr=subprocess.PIPE, \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m 86 \u001b[0m\u001b[2m│ │ \u001b[0mstdout=subprocess.PIPE, \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2;33m/usr/lib/python3.9/\u001b[0m\u001b[1;33msubprocess.py\u001b[0m:\u001b[94m528\u001b[0m in \u001b[92mrun\u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m 525 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mraise\u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m 526 \u001b[0m\u001b[2m│ │ \u001b[0mretcode = process.poll() \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m 527 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m check \u001b[95mand\u001b[0m retcode: \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m 528 \u001b[2m│ │ │ \u001b[0m\u001b[94mraise\u001b[0m CalledProcessError(retcode, process.args, \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m 529 \u001b[0m\u001b[2m│ │ │ │ │ │ │ │ │ \u001b[0moutput=stdout, stderr=stderr) \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m 530 \u001b[0m\u001b[2m│ \u001b[0m\u001b[94mreturn\u001b[0m CompletedProcess(process.args, retcode, stdout, stderr) \u001b[31m│\u001b[0m\n",
"\u001b[31m│\u001b[0m \u001b[2m 531 \u001b[0m \u001b[31m│\u001b[0m\n",
"\u001b[31m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n",
"\u001b[1;91mCalledProcessError: \u001b[0mCommand \u001b[32m'\u001b[0m\u001b[32m[\u001b[0m\u001b[32m'\u001b[0mgit', \u001b[32m'commit'\u001b[0m, \u001b[32m'-v'\u001b[0m, \u001b[32m'-m'\u001b[0m, \u001b[32m'Training in progress, step 2000'\u001b[0m\u001b[1m]\u001b[0m' returned non-zero \n",
"exit status \u001b[1;36m128\u001b[0m.\n",
"\n",
"\u001b[3mDuring handling of the above exception, another exception occurred:\u001b[0m\n",
"\n",
"\u001b[31m╭─\u001b[0m\u001b[31m──────────────────────────────\u001b[0m\u001b[31m \u001b[0m\u001b[1;31mTraceback \u001b[0m\u001b[1;2;31m(most recent call last)\u001b[0m\u001b[31m \u001b[0m\u001b[31m───────────────────────────────\u001b[0m\u001b[31m─╮\u001b[0m\n",
"\u001b[31m│\u001b[0m in \u001b[92m