diff --git "a/finetune.ipynb" "b/finetune.ipynb" --- "a/finetune.ipynb" +++ "b/finetune.ipynb" @@ -1,4022 +1,403 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - }, - "accelerator": "GPU", - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "76eba77b0fc9499e9b4015393156153e": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_c42123767d4849ffbcdd48f09580ea13", - "IPY_MODEL_e89f07c1f5744328b071f96a3bdc9532", - "IPY_MODEL_da36c71ba5bc469eb32bde730c5038d3" - ], - "layout": "IPY_MODEL_6cc6b7c564fe48f086152c78f8a9a915" - } - }, - "c42123767d4849ffbcdd48f09580ea13": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_c6a3adfa76dd4bbd8cc5e75ee28ef3e8", - "placeholder": "​", - "style": "IPY_MODEL_0c65f0e913b74de88ab712d65b9027f7", - "value": "Downloading data files: 100%" - } - }, - "e89f07c1f5744328b071f96a3bdc9532": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_b07c7f3b6bc24f10821b88e254e88e43", - "max": 1, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_42ea2834ee074029abe8cf8f9ff79a16", - "value": 1 - } - }, - "da36c71ba5bc469eb32bde730c5038d3": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_ec4205b0491a4f53ad1cba42689d59c4", - "placeholder": "​", - "style": "IPY_MODEL_f8b9df09c8c54d53a22310e64e30772f", - "value": " 1/1 [00:00<00:00, 39.39it/s]" - } - }, - "6cc6b7c564fe48f086152c78f8a9a915": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "c6a3adfa76dd4bbd8cc5e75ee28ef3e8": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "0c65f0e913b74de88ab712d65b9027f7": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "b07c7f3b6bc24f10821b88e254e88e43": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "42ea2834ee074029abe8cf8f9ff79a16": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "ec4205b0491a4f53ad1cba42689d59c4": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "f8b9df09c8c54d53a22310e64e30772f": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "e69b8912d3384180842ff0ead6096803": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_550a2ef0612e4d8396462d1b1c43608d", - "IPY_MODEL_f8d82b4d016f46ca8bab1a2fe591861e", - "IPY_MODEL_975d908a1874425198e0d62ae884a3ef" - ], - "layout": "IPY_MODEL_152aa8231b3f4b7db4e6f27bb61065b9" - } - }, - "550a2ef0612e4d8396462d1b1c43608d": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_27b48982ae244c66bccd855e7e33a00d", - "placeholder": "​", - "style": "IPY_MODEL_a896cb94d1fc4641ad224c7a9ca619b1", - "value": "Extracting data files: 100%" - } - }, - "f8d82b4d016f46ca8bab1a2fe591861e": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_be5d02d7001a4a25a848d6b0c582b42b", - "max": 1, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_609437910022418e83543fe6c3d16501", - "value": 1 - } - }, - "975d908a1874425198e0d62ae884a3ef": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_c3b5ef42dae64f95a2ef055db7e50fa8", - "placeholder": "​", - "style": "IPY_MODEL_ee0f0aff0c254b82854d1a6f4b367161", - "value": " 1/1 [00:01<00:00, 1.73s/it]" - } - }, - "152aa8231b3f4b7db4e6f27bb61065b9": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "27b48982ae244c66bccd855e7e33a00d": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "a896cb94d1fc4641ad224c7a9ca619b1": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "be5d02d7001a4a25a848d6b0c582b42b": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "609437910022418e83543fe6c3d16501": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "c3b5ef42dae64f95a2ef055db7e50fa8": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "ee0f0aff0c254b82854d1a6f4b367161": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "fd90085d416440579f409cdd9b29e053": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_fdd55d56775a4641925fc88e43f946b8", - "IPY_MODEL_a54ebe30a12b4c21b67b2f8d43aea2d3", - "IPY_MODEL_6f041f053ff240cbbb52f6d117ebfcf1" - ], - "layout": "IPY_MODEL_b7d5f8aaee0047178661e58aa59433ee" - } - }, - "fdd55d56775a4641925fc88e43f946b8": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_563ca6cac1594ba0a7c168ee31ccc982", - "placeholder": "​", - "style": "IPY_MODEL_e10e7830d1404baea72e81e0df295239", - "value": "Generating train split: " - } - }, - "a54ebe30a12b4c21b67b2f8d43aea2d3": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "info", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_acc07aa077fd49d38d74e0e10fa3991a", - "max": 1, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_441dfdd43ef0442894c19cad81a478db", - "value": 1 - } - }, - "6f041f053ff240cbbb52f6d117ebfcf1": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_9efe7ac9ee0946388eeb85f9a4603ffe", - "placeholder": "​", - "style": "IPY_MODEL_ae9996dcefcf4cfc8ea2f7488629c5ec", - "value": " 15011/0 [00:01<00:00, 14307.31 examples/s]" - } - }, - "b7d5f8aaee0047178661e58aa59433ee": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": "hidden", - "width": null - } - }, - "563ca6cac1594ba0a7c168ee31ccc982": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "e10e7830d1404baea72e81e0df295239": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "acc07aa077fd49d38d74e0e10fa3991a": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": "20px" - } - }, - "441dfdd43ef0442894c19cad81a478db": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "9efe7ac9ee0946388eeb85f9a4603ffe": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "ae9996dcefcf4cfc8ea2f7488629c5ec": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "db463b34f98c488e8282d7a59c421347": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_2ba524e48f35421c9afc9cc81e9ce3fb", - "IPY_MODEL_1829c96471c64a03a4872e1b7a16551c", - "IPY_MODEL_f639a121fab0407595ebdf04015d9267" - ], - "layout": "IPY_MODEL_fa5ff44ae5f64ce4bbf9b59e300b7ba3" - } - }, - "2ba524e48f35421c9afc9cc81e9ce3fb": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_e0d701b8c09744858db4fda6011b69df", - "placeholder": "​", - "style": "IPY_MODEL_7ef61952e0f2463bad3dd1a814bfb95a", - "value": "100%" - } - }, - "1829c96471c64a03a4872e1b7a16551c": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_75263b1d19ce4cb0b95cacb6138b6fcb", - "max": 1, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_c3f35ce818e34b05b247fae76200c02e", - "value": 1 - } - }, - "f639a121fab0407595ebdf04015d9267": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_be8dddb19dde441da1193d5f8738413d", - "placeholder": "​", - "style": "IPY_MODEL_4be8d304ddce49319488f20ab36e7f53", - "value": " 1/1 [00:00<00:00, 22.58it/s]" - } - }, - "fa5ff44ae5f64ce4bbf9b59e300b7ba3": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "e0d701b8c09744858db4fda6011b69df": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "7ef61952e0f2463bad3dd1a814bfb95a": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "75263b1d19ce4cb0b95cacb6138b6fcb": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "c3f35ce818e34b05b247fae76200c02e": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "be8dddb19dde441da1193d5f8738413d": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "4be8d304ddce49319488f20ab36e7f53": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "984896bacc39446091e2b6573facfa3e": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_f15179aad6ad44dfa2dfbd703dc7c319", - "IPY_MODEL_f92c68902b984e648337da7e63fcc775", - "IPY_MODEL_5ce279d7dca74e55acae1c70efae6cd5" - ], - "layout": "IPY_MODEL_b187d889a6654fd7ad948d66a8bce197" - } - }, - "f15179aad6ad44dfa2dfbd703dc7c319": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_e131a64ebaa84a809754b3c762525c37", - "placeholder": "​", - "style": "IPY_MODEL_72819a3c606c41f19d1493539d64aff5", - "value": "Downloading (…)lve/main/config.json: 100%" - } - }, - "f92c68902b984e648337da7e63fcc775": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_6761867d6e2b415b932bebd33a3144f3", - "max": 506, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_72c9aa3bb9614dfc8f316bc45a3455a0", - "value": 506 - } - }, - "5ce279d7dca74e55acae1c70efae6cd5": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_58e5e895215d4910b55d8cbe1417a111", - "placeholder": "​", - "style": "IPY_MODEL_b6bad7f690c54cb5814d09081e306393", - "value": " 506/506 [00:00<00:00, 31.8kB/s]" - } - }, - "b187d889a6654fd7ad948d66a8bce197": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "e131a64ebaa84a809754b3c762525c37": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "72819a3c606c41f19d1493539d64aff5": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "6761867d6e2b415b932bebd33a3144f3": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "72c9aa3bb9614dfc8f316bc45a3455a0": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "58e5e895215d4910b55d8cbe1417a111": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "b6bad7f690c54cb5814d09081e306393": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "ae3921fcb8e44862a42031f284f608c3": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_aaac19cd3f524821a656ceffa57fb337", - "IPY_MODEL_a77af2c0771b4a20b42dade07c40021c", - "IPY_MODEL_7e5ced10225544399add5e68dda80595" - ], - "layout": "IPY_MODEL_ccf73891ca1941ca8b77c87ba84058f8" - } - }, - "aaac19cd3f524821a656ceffa57fb337": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_bfc1e1979f0741a09e6c9f4181ae88e7", - "placeholder": "​", - "style": "IPY_MODEL_8ce25329fdf04a6e8c4ad8097975111e", - "value": "Downloading pytorch_model.bin: 100%" - } - }, - "a77af2c0771b4a20b42dade07c40021c": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_479d18fc0f9448a49b07b93ac608ae1d", - "max": 6853038093, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_85ffc3306cb4474aa5bfac22f6356352", - "value": 6853038093 - } - }, - "7e5ced10225544399add5e68dda80595": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_0d5765bde1fa470eb087df5fe1e45e56", - "placeholder": "​", - "style": "IPY_MODEL_6873f0a521cd4e4390563e27046c1aa6", - "value": " 6.85G/6.85G [00:35<00:00, 230MB/s]" - } - }, - "ccf73891ca1941ca8b77c87ba84058f8": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "bfc1e1979f0741a09e6c9f4181ae88e7": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "8ce25329fdf04a6e8c4ad8097975111e": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "479d18fc0f9448a49b07b93ac608ae1d": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "85ffc3306cb4474aa5bfac22f6356352": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "0d5765bde1fa470eb087df5fe1e45e56": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "6873f0a521cd4e4390563e27046c1aa6": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "0832a0aa65ac4f70ba2f303f9b867d25": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_436e9deafae04955a13f2ff0fc0e6348", - "IPY_MODEL_83427a912e3740008c0a7ffb9d20a1bd", - "IPY_MODEL_084f196c105c4c428253e0ce6dce8a21" - ], - "layout": "IPY_MODEL_403990397a4041abb8aff7bdcc5d8e8a" - } - }, - "436e9deafae04955a13f2ff0fc0e6348": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_f957482962954f03b383d7c4f93c7b04", - "placeholder": "​", - "style": "IPY_MODEL_19a731496867404b8888986f3d9c83ef", - "value": "Downloading (…)neration_config.json: 100%" - } - }, - "83427a912e3740008c0a7ffb9d20a1bd": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_e0b09d3aa9ae4719bcdfad8e75ad86c5", - "max": 137, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_9d41c197843644858919a396dc927936", - "value": 137 - } - }, - "084f196c105c4c428253e0ce6dce8a21": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_67f9f877dee04d538ffbe2de6c4dd181", - "placeholder": "​", - "style": "IPY_MODEL_001bb332cdb94a0c96dd2a322ff8718f", - "value": " 137/137 [00:00<00:00, 9.29kB/s]" - } - }, - "403990397a4041abb8aff7bdcc5d8e8a": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "f957482962954f03b383d7c4f93c7b04": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "19a731496867404b8888986f3d9c83ef": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "e0b09d3aa9ae4719bcdfad8e75ad86c5": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "9d41c197843644858919a396dc927936": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "67f9f877dee04d538ffbe2de6c4dd181": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "001bb332cdb94a0c96dd2a322ff8718f": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "310970764a4a4c9382e2334a4abbbd77": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_2cd59f5acc4948988814eb9b3d909aed", - "IPY_MODEL_8939309032ca4787a9d3c7b54b889926", - "IPY_MODEL_fcd31f8d1e5a482d9267ab63fd723e86" - ], - "layout": "IPY_MODEL_2d126272a4284ba9af39de65e9680fc8" - } - }, - "2cd59f5acc4948988814eb9b3d909aed": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_d8a0e85fa9fc4a9291f9303861fc9efc", - "placeholder": "​", - "style": "IPY_MODEL_d21f1cfb275b4bb38baf7022665e0208", - "value": "Map: 100%" - } - }, - "8939309032ca4787a9d3c7b54b889926": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_81e25933b9af43a492a6da4a5e3560f6", - "max": 15011, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_5fe27494333b4e07a632769696744210", - "value": 15011 - } - }, - "fcd31f8d1e5a482d9267ab63fd723e86": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_35ca13104a3e4263a192889362dbc05e", - "placeholder": "​", - "style": "IPY_MODEL_85ae96c237754c4db014e5c09898e162", - "value": " 14994/15011 [00:44<00:00, 638.52 examples/s]" - } - }, - "2d126272a4284ba9af39de65e9680fc8": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": "hidden", - "width": null - } - }, - "d8a0e85fa9fc4a9291f9303861fc9efc": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "d21f1cfb275b4bb38baf7022665e0208": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "81e25933b9af43a492a6da4a5e3560f6": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "5fe27494333b4e07a632769696744210": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "35ca13104a3e4263a192889362dbc05e": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "85ae96c237754c4db014e5c09898e162": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - } - } - } + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "Gzg8SopX8EWH" + }, + "source": [ + "# Alpaca OpenLLaMa 3B LoRA" + ] }, - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "YSW4FUQPwIYu", - "outputId": "421da5b9-be5e-4661-bf98-6c57fbf1f6fd" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Cloning into 'alpaca-lora'...\n", - "remote: Enumerating objects: 607, done.\u001b[K\n", - "remote: Counting objects: 100% (51/51), done.\u001b[K\n", - "remote: Compressing objects: 100% (32/32), done.\u001b[K\n", - "remote: Total 607 (delta 28), reused 33 (delta 19), pack-reused 556\u001b[K\n", - "Receiving objects: 100% (607/607), 27.78 MiB | 5.67 MiB/s, done.\n", - "Resolving deltas: 100% (360/360), done.\n" - ] - } - ], - "source": [ - "!git clone https://github.com/tloen/alpaca-lora.git" - ] - }, - { - "cell_type": "markdown", - "source": [ - "# Alpaca OpenLLaMa 3B LoRA" - ], - "metadata": { - "id": "Gzg8SopX8EWH" - } - }, - { - "cell_type": "code", - "source": [ - "%cd alpaca-lora/" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "s1xm2uERx_st", - "outputId": "c77b42e1-202b-45ac-aa2c-3fdf2bcde155" - }, - "execution_count": 2, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "/content/alpaca-lora\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "!pip install -q datasets loralib sentencepiece\n", - "\n", - "!pip install -q git+https://github.com/huggingface/transformers.git\n", - "!pip install -q git+https://github.com/huggingface/peft.git" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "JCB9UzMVwsSM", - "outputId": "7f0688e4-f360-4da6-a4d3-0d59d4135649" - }, - "execution_count": 5, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", - " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", - " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m39.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Building wheel for transformers (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", - " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", - " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n" - ] - } - ] + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, - { - "cell_type": "code", - "source": [ - "!pip install bitsandbytes" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "qCnXTszZxE2T", - "outputId": "e619573c-3ac2-4ac3-bed5-caab57d00f9a" - }, - "execution_count": 6, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", - "Collecting bitsandbytes\n", - " Downloading bitsandbytes-0.39.0-py3-none-any.whl (92.2 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.2/92.2 MB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hInstalling collected packages: bitsandbytes\n", - "Successfully installed bitsandbytes-0.39.0\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "from google.colab import drive\n", - "drive.mount('/content/drive')" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "NyxvpDjcrUvv", - "outputId": "abe581a2-70ce-4956-b3dd-593fa4f3d8ef" - }, - "execution_count": 8, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Mounted at /content/drive\n" - ] - } - ] + "id": "YSW4FUQPwIYu", + "outputId": "421da5b9-be5e-4661-bf98-6c57fbf1f6fd" + }, + "outputs": [], + "source": [ + "!git clone https://github.com/tloen/alpaca-lora.git" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, - { - "cell_type": "markdown", - "source": [ - "## Data Check" - ], - "metadata": { - "id": "9w0aSCzhxxQf" - } + "id": "s1xm2uERx_st", + "outputId": "c77b42e1-202b-45ac-aa2c-3fdf2bcde155" + }, + "outputs": [], + "source": [ + "%cd alpaca-lora/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, - { - "cell_type": "code", - "source": [ - "from datasets import load_dataset\n", - "from transformers import LlamaTokenizer\n", - "\n", - "\n", - "tokenizer = LlamaTokenizer.from_pretrained(\"openlm-research/open_llama_3b_600bt_preview\", add_eos_token=True)\n", - "tokenizer.pad_token = tokenizer.eos_token\n", - "tokenizer.pad_token_id = tokenizer.eos_token_id\n", - "\n", - "data = load_dataset(\"json\", data_files=\"/content/drive/MyDrive/alpaca-data.json\")\n", - "\n", - "\n", - "def generate_prompt(data_point):\n", - " # sorry about the formatting disaster gotta move fast\n", - " if data_point[\"input\"]:\n", - " return f\"\"\"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n", - "\n", - "### Instruction:\n", - "{data_point[\"instruction\"]}\n", - "\n", - "### Input:\n", - "{data_point[\"input\"]}\n", - "\n", - "### Response:\n", - "{data_point[\"output\"]}\"\"\"\n", - " else:\n", - " return f\"\"\"Below is an instruction that describes a task. Write a response that appropriately completes the request.\n", - "\n", - "### Instruction:\n", - "{data_point[\"instruction\"]}\n", - "\n", - "### Response:\n", - "{data_point[\"output\"]}\"\"\"" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 150, - "referenced_widgets": [ - "76eba77b0fc9499e9b4015393156153e", - "c42123767d4849ffbcdd48f09580ea13", - "e89f07c1f5744328b071f96a3bdc9532", - "da36c71ba5bc469eb32bde730c5038d3", - "6cc6b7c564fe48f086152c78f8a9a915", - "c6a3adfa76dd4bbd8cc5e75ee28ef3e8", - "0c65f0e913b74de88ab712d65b9027f7", - "b07c7f3b6bc24f10821b88e254e88e43", - "42ea2834ee074029abe8cf8f9ff79a16", - "ec4205b0491a4f53ad1cba42689d59c4", - "f8b9df09c8c54d53a22310e64e30772f", - "e69b8912d3384180842ff0ead6096803", - "550a2ef0612e4d8396462d1b1c43608d", - "f8d82b4d016f46ca8bab1a2fe591861e", - "975d908a1874425198e0d62ae884a3ef", - "152aa8231b3f4b7db4e6f27bb61065b9", - "27b48982ae244c66bccd855e7e33a00d", - "a896cb94d1fc4641ad224c7a9ca619b1", - "be5d02d7001a4a25a848d6b0c582b42b", - "609437910022418e83543fe6c3d16501", - "c3b5ef42dae64f95a2ef055db7e50fa8", - "ee0f0aff0c254b82854d1a6f4b367161", - "fd90085d416440579f409cdd9b29e053", - "fdd55d56775a4641925fc88e43f946b8", - "a54ebe30a12b4c21b67b2f8d43aea2d3", - "6f041f053ff240cbbb52f6d117ebfcf1", - "b7d5f8aaee0047178661e58aa59433ee", - "563ca6cac1594ba0a7c168ee31ccc982", - "e10e7830d1404baea72e81e0df295239", - "acc07aa077fd49d38d74e0e10fa3991a", - "441dfdd43ef0442894c19cad81a478db", - "9efe7ac9ee0946388eeb85f9a4603ffe", - "ae9996dcefcf4cfc8ea2f7488629c5ec", - "db463b34f98c488e8282d7a59c421347", - "2ba524e48f35421c9afc9cc81e9ce3fb", - "1829c96471c64a03a4872e1b7a16551c", - "f639a121fab0407595ebdf04015d9267", - "fa5ff44ae5f64ce4bbf9b59e300b7ba3", - "e0d701b8c09744858db4fda6011b69df", - "7ef61952e0f2463bad3dd1a814bfb95a", - "75263b1d19ce4cb0b95cacb6138b6fcb", - "c3f35ce818e34b05b247fae76200c02e", - "be8dddb19dde441da1193d5f8738413d", - "4be8d304ddce49319488f20ab36e7f53" - ] - }, - "id": "OdgRTo5YxyRL", - "outputId": "e6e64014-796b-4fa9-9635-3cb29f2dab7d" - }, - "execution_count": 9, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Downloading and preparing dataset json/default to /root/.cache/huggingface/datasets/json/default-e726d3f1eee28f16/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4...\n" - ] - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "Downloading data files: 0%| | 0/1 [00:00" - ], - "text/html": [ - "\n", - "
\n", - " \n", - " \n", - " [ 3/119 01:48 < 3:29:02, 0.01 it/s, Epoch 0.02/1]\n", - "
\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
StepTraining Loss
12.421700

" - ] - }, - "metadata": {} - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "\u001b[31m╭─\u001b[0m\u001b[31m──────────────────────────────\u001b[0m\u001b[31m \u001b[0m\u001b[1;31mTraceback \u001b[0m\u001b[1;2;31m(most recent call last)\u001b[0m\u001b[31m \u001b[0m\u001b[31m───────────────────────────────\u001b[0m\u001b[31m─╮\u001b[0m\n", - "\u001b[31m│\u001b[0m in \u001b[92m\u001b[0m:\u001b[94m27\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/\u001b[0m\u001b[1;33mtrainer.py\u001b[0m:\u001b[94m1696\u001b[0m in \u001b[92mtrain\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1693 \u001b[0m\u001b[2m│ │ \u001b[0minner_training_loop = find_executable_batch_size( \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1694 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[96mself\u001b[0m._inner_training_loop, \u001b[96mself\u001b[0m._train_batch_size, args.auto_find_batch_size \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1695 \u001b[0m\u001b[2m│ │ \u001b[0m) \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1696 \u001b[2m│ │ \u001b[0m\u001b[94mreturn\u001b[0m inner_training_loop( \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1697 \u001b[0m\u001b[2m│ │ │ \u001b[0margs=args, \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1698 \u001b[0m\u001b[2m│ │ │ \u001b[0mresume_from_checkpoint=resume_from_checkpoint, \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1699 \u001b[0m\u001b[2m│ │ │ \u001b[0mtrial=trial, \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/\u001b[0m\u001b[1;33mtrainer.py\u001b[0m:\u001b[94m1973\u001b[0m in \u001b[92m_inner_training_loop\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1970 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0m\u001b[94mwith\u001b[0m model.no_sync(): \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1971 \u001b[0m\u001b[2m│ │ │ │ │ │ \u001b[0mtr_loss_step = \u001b[96mself\u001b[0m.training_step(model, inputs) \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1972 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[94melse\u001b[0m: \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1973 \u001b[2m│ │ │ │ │ \u001b[0mtr_loss_step = \u001b[96mself\u001b[0m.training_step(model, inputs) \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1974 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1975 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[94mif\u001b[0m ( \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1976 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0margs.logging_nan_inf_filter \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/\u001b[0m\u001b[1;33mtrainer.py\u001b[0m:\u001b[94m2787\u001b[0m in \u001b[92mtraining_step\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m2784 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mreturn\u001b[0m loss_mb.reduce_mean().detach().to(\u001b[96mself\u001b[0m.args.device) \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m2785 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m2786 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mwith\u001b[0m \u001b[96mself\u001b[0m.compute_loss_context_manager(): \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m2787 \u001b[2m│ │ │ \u001b[0mloss = \u001b[96mself\u001b[0m.compute_loss(model, inputs) \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m2788 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m2789 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[96mself\u001b[0m.args.n_gpu > \u001b[94m1\u001b[0m: \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m2790 \u001b[0m\u001b[2m│ │ │ \u001b[0mloss = loss.mean() \u001b[2m# mean() to average on multi-gpu parallel training\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/\u001b[0m\u001b[1;33mtrainer.py\u001b[0m:\u001b[94m2819\u001b[0m in \u001b[92mcompute_loss\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m2816 \u001b[0m\u001b[2m│ │ │ \u001b[0mlabels = inputs.pop(\u001b[33m\"\u001b[0m\u001b[33mlabels\u001b[0m\u001b[33m\"\u001b[0m) \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m2817 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94melse\u001b[0m: \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m2818 \u001b[0m\u001b[2m│ │ │ \u001b[0mlabels = \u001b[94mNone\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m2819 \u001b[2m│ │ \u001b[0moutputs = model(**inputs) \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m2820 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[2m# Save past state if it exists\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m2821 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[2m# TODO: this needs to be fixed and made cleaner later.\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m2822 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[96mself\u001b[0m.args.past_index >= \u001b[94m0\u001b[0m: \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/\u001b[0m\u001b[1;33mmodule.py\u001b[0m:\u001b[94m1501\u001b[0m in \u001b[92m_call_impl\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1498 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m (\u001b[96mself\u001b[0m._backward_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._backward_pre_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._forward_hooks \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1499 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[95mor\u001b[0m _global_backward_pre_hooks \u001b[95mor\u001b[0m _global_backward_hooks \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1500 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[95mor\u001b[0m _global_forward_hooks \u001b[95mor\u001b[0m _global_forward_pre_hooks): \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1501 \u001b[2m│ │ │ \u001b[0m\u001b[94mreturn\u001b[0m forward_call(*args, **kwargs) \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1502 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[2m# Do not call functions when jit is used\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1503 \u001b[0m\u001b[2m│ │ \u001b[0mfull_backward_hooks, non_full_backward_hooks = [], [] \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1504 \u001b[0m\u001b[2m│ │ \u001b[0mbackward_pre_hooks = [] \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/peft/\u001b[0m\u001b[1;33mpeft_model.py\u001b[0m:\u001b[94m686\u001b[0m in \u001b[92mforward\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m 683 \u001b[0m\u001b[2m│ \u001b[0m): \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m 684 \u001b[0m\u001b[2m│ │ \u001b[0mpeft_config = \u001b[96mself\u001b[0m.active_peft_config \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m 685 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m \u001b[96misinstance\u001b[0m(peft_config, PromptLearningConfig): \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m 686 \u001b[2m│ │ │ \u001b[0m\u001b[94mreturn\u001b[0m \u001b[96mself\u001b[0m.base_model( \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m 687 \u001b[0m\u001b[2m│ │ │ │ \u001b[0minput_ids=input_ids, \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m 688 \u001b[0m\u001b[2m│ │ │ │ \u001b[0mattention_mask=attention_mask, \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m 689 \u001b[0m\u001b[2m│ │ │ │ \u001b[0minputs_embeds=inputs_embeds, \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/\u001b[0m\u001b[1;33mmodule.py\u001b[0m:\u001b[94m1501\u001b[0m in \u001b[92m_call_impl\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1498 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m (\u001b[96mself\u001b[0m._backward_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._backward_pre_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._forward_hooks \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1499 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[95mor\u001b[0m _global_backward_pre_hooks \u001b[95mor\u001b[0m _global_backward_hooks \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1500 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[95mor\u001b[0m _global_forward_hooks \u001b[95mor\u001b[0m _global_forward_pre_hooks): \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1501 \u001b[2m│ │ │ \u001b[0m\u001b[94mreturn\u001b[0m forward_call(*args, **kwargs) \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1502 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[2m# Do not call functions when jit is used\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1503 \u001b[0m\u001b[2m│ │ \u001b[0mfull_backward_hooks, non_full_backward_hooks = [], [] \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1504 \u001b[0m\u001b[2m│ │ \u001b[0mbackward_pre_hooks = [] \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/accelerate/\u001b[0m\u001b[1;33mhooks.py\u001b[0m:\u001b[94m165\u001b[0m in \u001b[92mnew_forward\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m162 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mwith\u001b[0m torch.no_grad(): \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m163 \u001b[0m\u001b[2m│ │ │ │ \u001b[0moutput = old_forward(*args, **kwargs) \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m164 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94melse\u001b[0m: \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m165 \u001b[2m│ │ │ \u001b[0moutput = old_forward(*args, **kwargs) \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m166 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mreturn\u001b[0m module._hf_hook.post_forward(module, output) \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m167 \u001b[0m\u001b[2m│ \u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m168 \u001b[0m\u001b[2m│ \u001b[0mmodule.forward = new_forward \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/models/llama/\u001b[0m\u001b[1;33mmodeling_llama.py\u001b[0m:\u001b[94m687\u001b[0m in \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[92mforward\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m684 \u001b[0m\u001b[2m│ │ \u001b[0mreturn_dict = return_dict \u001b[94mif\u001b[0m return_dict \u001b[95mis\u001b[0m \u001b[95mnot\u001b[0m \u001b[94mNone\u001b[0m \u001b[94melse\u001b[0m \u001b[96mself\u001b[0m.config.use_return \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m685 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m686 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[2m# decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m687 \u001b[2m│ │ \u001b[0moutputs = \u001b[96mself\u001b[0m.model( \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m688 \u001b[0m\u001b[2m│ │ │ \u001b[0minput_ids=input_ids, \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m689 \u001b[0m\u001b[2m│ │ │ \u001b[0mattention_mask=attention_mask, \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m690 \u001b[0m\u001b[2m│ │ │ \u001b[0mposition_ids=position_ids, \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/\u001b[0m\u001b[1;33mmodule.py\u001b[0m:\u001b[94m1501\u001b[0m in \u001b[92m_call_impl\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1498 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m (\u001b[96mself\u001b[0m._backward_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._backward_pre_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._forward_hooks \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1499 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[95mor\u001b[0m _global_backward_pre_hooks \u001b[95mor\u001b[0m _global_backward_hooks \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1500 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[95mor\u001b[0m _global_forward_hooks \u001b[95mor\u001b[0m _global_forward_pre_hooks): \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1501 \u001b[2m│ │ │ \u001b[0m\u001b[94mreturn\u001b[0m forward_call(*args, **kwargs) \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1502 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[2m# Do not call functions when jit is used\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1503 \u001b[0m\u001b[2m│ │ \u001b[0mfull_backward_hooks, non_full_backward_hooks = [], [] \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1504 \u001b[0m\u001b[2m│ │ \u001b[0mbackward_pre_hooks = [] \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/accelerate/\u001b[0m\u001b[1;33mhooks.py\u001b[0m:\u001b[94m165\u001b[0m in \u001b[92mnew_forward\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m162 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mwith\u001b[0m torch.no_grad(): \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m163 \u001b[0m\u001b[2m│ │ │ │ \u001b[0moutput = old_forward(*args, **kwargs) \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m164 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94melse\u001b[0m: \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m165 \u001b[2m│ │ │ \u001b[0moutput = old_forward(*args, **kwargs) \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m166 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mreturn\u001b[0m module._hf_hook.post_forward(module, output) \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m167 \u001b[0m\u001b[2m│ \u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m168 \u001b[0m\u001b[2m│ \u001b[0mmodule.forward = new_forward \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/models/llama/\u001b[0m\u001b[1;33mmodeling_llama.py\u001b[0m:\u001b[94m569\u001b[0m in \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[92mforward\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m566 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m567 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0m\u001b[94mreturn\u001b[0m custom_forward \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m568 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m569 \u001b[2m│ │ │ │ \u001b[0mlayer_outputs = torch.utils.checkpoint.checkpoint( \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m570 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0mcreate_custom_forward(decoder_layer), \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m571 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0mhidden_states, \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m572 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0mattention_mask, \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/utils/\u001b[0m\u001b[1;33mcheckpoint.py\u001b[0m:\u001b[94m249\u001b[0m in \u001b[92mcheckpoint\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m246 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mraise\u001b[0m \u001b[96mValueError\u001b[0m(\u001b[33m\"\u001b[0m\u001b[33mUnexpected keyword arguments: \u001b[0m\u001b[33m\"\u001b[0m + \u001b[33m\"\u001b[0m\u001b[33m,\u001b[0m\u001b[33m\"\u001b[0m.join(arg \u001b[94mfor\u001b[0m arg \u001b[95min\u001b[0m kwar \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m247 \u001b[0m\u001b[2m│ \u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m248 \u001b[0m\u001b[2m│ \u001b[0m\u001b[94mif\u001b[0m use_reentrant: \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m249 \u001b[2m│ │ \u001b[0m\u001b[94mreturn\u001b[0m CheckpointFunction.apply(function, preserve, *args) \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m250 \u001b[0m\u001b[2m│ \u001b[0m\u001b[94melse\u001b[0m: \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m251 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mreturn\u001b[0m _checkpoint_without_reentrant( \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m252 \u001b[0m\u001b[2m│ │ │ \u001b[0mfunction, \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/autograd/\u001b[0m\u001b[1;33mfunction.py\u001b[0m:\u001b[94m506\u001b[0m in \u001b[92mapply\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m503 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m torch._C._are_functorch_transforms_active(): \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m504 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[2m# See NOTE: [functorch vjp and autograd interaction]\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m505 \u001b[0m\u001b[2m│ │ │ \u001b[0margs = _functorch.utils.unwrap_dead_wrappers(args) \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m506 \u001b[2m│ │ │ \u001b[0m\u001b[94mreturn\u001b[0m \u001b[96msuper\u001b[0m().apply(*args, **kwargs) \u001b[2m# type: ignore[misc]\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m507 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m508 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[96mcls\u001b[0m.setup_context == _SingleLevelFunction.setup_context: \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m509 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mraise\u001b[0m \u001b[96mRuntimeError\u001b[0m( \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/utils/\u001b[0m\u001b[1;33mcheckpoint.py\u001b[0m:\u001b[94m107\u001b[0m in \u001b[92mforward\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m104 \u001b[0m\u001b[2m│ │ \u001b[0mctx.save_for_backward(*tensor_inputs) \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m105 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m106 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mwith\u001b[0m torch.no_grad(): \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m107 \u001b[2m│ │ │ \u001b[0moutputs = run_function(*args) \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m108 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mreturn\u001b[0m outputs \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m109 \u001b[0m\u001b[2m│ \u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m110 \u001b[0m\u001b[2m│ \u001b[0m\u001b[1;95m@staticmethod\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/models/llama/\u001b[0m\u001b[1;33mmodeling_llama.py\u001b[0m:\u001b[94m565\u001b[0m in \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[92mcustom_forward\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m562 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[94mdef\u001b[0m \u001b[92mcreate_custom_forward\u001b[0m(module): \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m563 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0m\u001b[94mdef\u001b[0m \u001b[92mcustom_forward\u001b[0m(*inputs): \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m564 \u001b[0m\u001b[2m│ │ │ │ │ │ \u001b[0m\u001b[2m# None for past_key_value\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m565 \u001b[2m│ │ │ │ │ │ \u001b[0m\u001b[94mreturn\u001b[0m module(*inputs, output_attentions, \u001b[94mNone\u001b[0m) \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m566 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m567 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0m\u001b[94mreturn\u001b[0m custom_forward \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m568 \u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/\u001b[0m\u001b[1;33mmodule.py\u001b[0m:\u001b[94m1501\u001b[0m in \u001b[92m_call_impl\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1498 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m (\u001b[96mself\u001b[0m._backward_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._backward_pre_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._forward_hooks \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1499 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[95mor\u001b[0m _global_backward_pre_hooks \u001b[95mor\u001b[0m _global_backward_hooks \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1500 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[95mor\u001b[0m _global_forward_hooks \u001b[95mor\u001b[0m _global_forward_pre_hooks): \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1501 \u001b[2m│ │ │ \u001b[0m\u001b[94mreturn\u001b[0m forward_call(*args, **kwargs) \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1502 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[2m# Do not call functions when jit is used\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1503 \u001b[0m\u001b[2m│ │ \u001b[0mfull_backward_hooks, non_full_backward_hooks = [], [] \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1504 \u001b[0m\u001b[2m│ │ \u001b[0mbackward_pre_hooks = [] \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/accelerate/\u001b[0m\u001b[1;33mhooks.py\u001b[0m:\u001b[94m165\u001b[0m in \u001b[92mnew_forward\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m162 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mwith\u001b[0m torch.no_grad(): \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m163 \u001b[0m\u001b[2m│ │ │ │ \u001b[0moutput = old_forward(*args, **kwargs) \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m164 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94melse\u001b[0m: \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m165 \u001b[2m│ │ │ \u001b[0moutput = old_forward(*args, **kwargs) \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m166 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mreturn\u001b[0m module._hf_hook.post_forward(module, output) \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m167 \u001b[0m\u001b[2m│ \u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m168 \u001b[0m\u001b[2m│ \u001b[0mmodule.forward = new_forward \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/models/llama/\u001b[0m\u001b[1;33mmodeling_llama.py\u001b[0m:\u001b[94m292\u001b[0m in \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[92mforward\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m289 \u001b[0m\u001b[2m│ │ \u001b[0mhidden_states = \u001b[96mself\u001b[0m.input_layernorm(hidden_states) \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m290 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m291 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[2m# Self Attention\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m292 \u001b[2m│ │ \u001b[0mhidden_states, self_attn_weights, present_key_value = \u001b[96mself\u001b[0m.self_attn( \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m293 \u001b[0m\u001b[2m│ │ │ \u001b[0mhidden_states=hidden_states, \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m294 \u001b[0m\u001b[2m│ │ │ \u001b[0mattention_mask=attention_mask, \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m295 \u001b[0m\u001b[2m│ │ │ \u001b[0mposition_ids=position_ids, \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/\u001b[0m\u001b[1;33mmodule.py\u001b[0m:\u001b[94m1501\u001b[0m in \u001b[92m_call_impl\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1498 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m (\u001b[96mself\u001b[0m._backward_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._backward_pre_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._forward_hooks \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1499 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[95mor\u001b[0m _global_backward_pre_hooks \u001b[95mor\u001b[0m _global_backward_hooks \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1500 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[95mor\u001b[0m _global_forward_hooks \u001b[95mor\u001b[0m _global_forward_pre_hooks): \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1501 \u001b[2m│ │ │ \u001b[0m\u001b[94mreturn\u001b[0m forward_call(*args, **kwargs) \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1502 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[2m# Do not call functions when jit is used\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1503 \u001b[0m\u001b[2m│ │ \u001b[0mfull_backward_hooks, non_full_backward_hooks = [], [] \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1504 \u001b[0m\u001b[2m│ │ \u001b[0mbackward_pre_hooks = [] \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/accelerate/\u001b[0m\u001b[1;33mhooks.py\u001b[0m:\u001b[94m165\u001b[0m in \u001b[92mnew_forward\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m162 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mwith\u001b[0m torch.no_grad(): \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m163 \u001b[0m\u001b[2m│ │ │ │ \u001b[0moutput = old_forward(*args, **kwargs) \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m164 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94melse\u001b[0m: \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m165 \u001b[2m│ │ │ \u001b[0moutput = old_forward(*args, **kwargs) \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m166 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mreturn\u001b[0m module._hf_hook.post_forward(module, output) \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m167 \u001b[0m\u001b[2m│ \u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m168 \u001b[0m\u001b[2m│ \u001b[0mmodule.forward = new_forward \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/transformers/models/llama/\u001b[0m\u001b[1;33mmodeling_llama.py\u001b[0m:\u001b[94m194\u001b[0m in \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[92mforward\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m191 \u001b[0m\u001b[2m│ \u001b[0m) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m192 \u001b[0m\u001b[2m│ │ \u001b[0mbsz, q_len, _ = hidden_states.size() \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m193 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m194 \u001b[2m│ │ \u001b[0mquery_states = \u001b[96mself\u001b[0m.q_proj(hidden_states).view(bsz, q_len, \u001b[96mself\u001b[0m.num_heads, \u001b[96mself\u001b[0m. \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m195 \u001b[0m\u001b[2m│ │ \u001b[0mkey_states = \u001b[96mself\u001b[0m.k_proj(hidden_states).view(bsz, q_len, \u001b[96mself\u001b[0m.num_heads, \u001b[96mself\u001b[0m.he \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m196 \u001b[0m\u001b[2m│ │ \u001b[0mvalue_states = \u001b[96mself\u001b[0m.v_proj(hidden_states).view(bsz, q_len, \u001b[96mself\u001b[0m.num_heads, \u001b[96mself\u001b[0m. \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m197 \u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/\u001b[0m\u001b[1;33mmodule.py\u001b[0m:\u001b[94m1501\u001b[0m in \u001b[92m_call_impl\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1498 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m (\u001b[96mself\u001b[0m._backward_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._backward_pre_hooks \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m._forward_hooks \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1499 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[95mor\u001b[0m _global_backward_pre_hooks \u001b[95mor\u001b[0m _global_backward_hooks \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1500 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[95mor\u001b[0m _global_forward_hooks \u001b[95mor\u001b[0m _global_forward_pre_hooks): \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m1501 \u001b[2m│ │ │ \u001b[0m\u001b[94mreturn\u001b[0m forward_call(*args, **kwargs) \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1502 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[2m# Do not call functions when jit is used\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1503 \u001b[0m\u001b[2m│ │ \u001b[0mfull_backward_hooks, non_full_backward_hooks = [], [] \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m1504 \u001b[0m\u001b[2m│ │ \u001b[0mbackward_pre_hooks = [] \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/peft/tuners/\u001b[0m\u001b[1;33mlora.py\u001b[0m:\u001b[94m709\u001b[0m in \u001b[92mforward\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m706 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[96mself\u001b[0m.active_adapter = adapter_name \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m707 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m708 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mdef\u001b[0m \u001b[92mforward\u001b[0m(\u001b[96mself\u001b[0m, x: torch.Tensor): \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m709 \u001b[2m│ │ │ \u001b[0mresult = \u001b[96msuper\u001b[0m().forward(x) \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m710 \u001b[0m\u001b[2m│ │ │ \u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m711 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[96mself\u001b[0m.disable_adapters \u001b[95mor\u001b[0m \u001b[96mself\u001b[0m.active_adapter \u001b[95mnot\u001b[0m \u001b[95min\u001b[0m \u001b[96mself\u001b[0m.lora_A.keys(): \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m712 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m\u001b[94mreturn\u001b[0m result \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/bitsandbytes/nn/\u001b[0m\u001b[1;33mmodules.py\u001b[0m:\u001b[94m388\u001b[0m in \u001b[92mforward\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m385 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[96mself\u001b[0m.bias \u001b[95mis\u001b[0m \u001b[95mnot\u001b[0m \u001b[94mNone\u001b[0m \u001b[95mand\u001b[0m \u001b[96mself\u001b[0m.bias.dtype != x.dtype: \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m386 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[96mself\u001b[0m.bias.data = \u001b[96mself\u001b[0m.bias.data.to(x.dtype) \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m387 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m388 \u001b[2m│ │ \u001b[0mout = bnb.matmul(x, \u001b[96mself\u001b[0m.weight, bias=\u001b[96mself\u001b[0m.bias, state=\u001b[96mself\u001b[0m.state) \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m389 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m390 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m \u001b[96mself\u001b[0m.state.has_fp16_weights: \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m391 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[96mself\u001b[0m.state.CB \u001b[95mis\u001b[0m \u001b[95mnot\u001b[0m \u001b[94mNone\u001b[0m \u001b[95mand\u001b[0m \u001b[96mself\u001b[0m.state.CxB \u001b[95mis\u001b[0m \u001b[95mnot\u001b[0m \u001b[94mNone\u001b[0m: \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/bitsandbytes/autograd/\u001b[0m\u001b[1;33m_functions.py\u001b[0m:\u001b[94m559\u001b[0m in \u001b[92mmatmul\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m556 \u001b[0m\u001b[2m│ \u001b[0mstate = state \u001b[95mor\u001b[0m MatmulLtState() \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m557 \u001b[0m\u001b[2m│ \u001b[0m\u001b[94mif\u001b[0m threshold > \u001b[94m0.0\u001b[0m: \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m558 \u001b[0m\u001b[2m│ │ \u001b[0mstate.threshold = threshold \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m559 \u001b[2m│ \u001b[0m\u001b[94mreturn\u001b[0m MatMul8bitLt.apply(A, B, out, bias, state) \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m560 \u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m561 \u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m562 \u001b[0m\u001b[94mdef\u001b[0m \u001b[92mmatmul_4bit\u001b[0m(A: tensor, B: tensor, quant_state: List, out: tensor = \u001b[94mNone\u001b[0m, bias=\u001b[94mNone\u001b[0m): \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/torch/autograd/\u001b[0m\u001b[1;33mfunction.py\u001b[0m:\u001b[94m506\u001b[0m in \u001b[92mapply\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m503 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m torch._C._are_functorch_transforms_active(): \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m504 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[2m# See NOTE: [functorch vjp and autograd interaction]\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m505 \u001b[0m\u001b[2m│ │ │ \u001b[0margs = _functorch.utils.unwrap_dead_wrappers(args) \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m506 \u001b[2m│ │ │ \u001b[0m\u001b[94mreturn\u001b[0m \u001b[96msuper\u001b[0m().apply(*args, **kwargs) \u001b[2m# type: ignore[misc]\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m507 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m508 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[96mcls\u001b[0m.setup_context == _SingleLevelFunction.setup_context: \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m509 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mraise\u001b[0m \u001b[96mRuntimeError\u001b[0m( \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/bitsandbytes/autograd/\u001b[0m\u001b[1;33m_functions.py\u001b[0m:\u001b[94m323\u001b[0m in \u001b[92mforward\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m320 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[2m# 1. Quantize A\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m321 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m \u001b[96mlen\u001b[0m(A.shape) == \u001b[94m3\u001b[0m: \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m322 \u001b[0m\u001b[2m│ │ │ \u001b[0mA = A.view(-\u001b[94m1\u001b[0m, A.shape[-\u001b[94m1\u001b[0m]).contiguous() \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m323 \u001b[2m│ │ \u001b[0mCA, CAt, SCA, SCAt, coo_tensorA = F.double_quant(A.to(torch.float16), threshold= \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m324 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m325 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m state.threshold > \u001b[94m0.0\u001b[0m \u001b[95mand\u001b[0m coo_tensorA \u001b[95mis\u001b[0m \u001b[95mnot\u001b[0m \u001b[94mNone\u001b[0m: \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m326 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mif\u001b[0m state.has_fp16_weights: \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2;33m/usr/local/lib/python3.10/dist-packages/bitsandbytes/\u001b[0m\u001b[1;33mfunctional.py\u001b[0m:\u001b[94m2029\u001b[0m in \u001b[92mdouble_quant\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m2026 \u001b[0m\u001b[2m│ │ │ │ \u001b[0mct.c_int32(rows), \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m2027 \u001b[0m\u001b[2m│ │ │ │ \u001b[0mct.c_int32(cols), \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m2028 \u001b[0m\u001b[2m│ │ │ \u001b[0m) \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m2029 \u001b[2m│ │ │ \u001b[0mval, idx = torch.sort(coo_tensor.rowidx) \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m2030 \u001b[0m\u001b[2m│ │ │ \u001b[0mcoo_tensor.rowidx = val \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m2031 \u001b[0m\u001b[2m│ │ │ \u001b[0mcoo_tensor.colidx = coo_tensor.colidx[idx] \u001b[31m│\u001b[0m\n", - "\u001b[31m│\u001b[0m \u001b[2m2032 \u001b[0m\u001b[2m│ │ │ \u001b[0mcoo_tensor.values = coo_tensor.values[idx] \u001b[31m│\u001b[0m\n", - "\u001b[31m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n", - "\u001b[1;91mKeyboardInterrupt\u001b[0m\n" - ], - "text/html": [ - "

╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮\n",
-              " in <cell line: 27>:27                                                                            \n",
-              "                                                                                                  \n",
-              " /usr/local/lib/python3.10/dist-packages/transformers/trainer.py:1696 in train                    \n",
-              "                                                                                                  \n",
-              "   1693 │   │   inner_training_loop = find_executable_batch_size(                                 \n",
-              "   1694 │   │   │   self._inner_training_loop, self._train_batch_size, args.auto_find_batch_size  \n",
-              "   1695 │   │   )                                                                                 \n",
-              " 1696 │   │   return inner_training_loop(                                                       \n",
-              "   1697 │   │   │   args=args,                                                                    \n",
-              "   1698 │   │   │   resume_from_checkpoint=resume_from_checkpoint,                                \n",
-              "   1699 │   │   │   trial=trial,                                                                  \n",
-              "                                                                                                  \n",
-              " /usr/local/lib/python3.10/dist-packages/transformers/trainer.py:1973 in _inner_training_loop     \n",
-              "                                                                                                  \n",
-              "   1970 │   │   │   │   │   with model.no_sync():                                                 \n",
-              "   1971 │   │   │   │   │   │   tr_loss_step = self.training_step(model, inputs)                  \n",
-              "   1972 │   │   │   │   else:                                                                     \n",
-              " 1973 │   │   │   │   │   tr_loss_step = self.training_step(model, inputs)                      \n",
-              "   1974 │   │   │   │                                                                             \n",
-              "   1975 │   │   │   │   if (                                                                      \n",
-              "   1976 │   │   │   │   │   args.logging_nan_inf_filter                                           \n",
-              "                                                                                                  \n",
-              " /usr/local/lib/python3.10/dist-packages/transformers/trainer.py:2787 in training_step            \n",
-              "                                                                                                  \n",
-              "   2784 │   │   │   return loss_mb.reduce_mean().detach().to(self.args.device)                    \n",
-              "   2785 │   │                                                                                     \n",
-              "   2786 │   │   with self.compute_loss_context_manager():                                         \n",
-              " 2787 │   │   │   loss = self.compute_loss(model, inputs)                                       \n",
-              "   2788 │   │                                                                                     \n",
-              "   2789 │   │   if self.args.n_gpu > 1:                                                           \n",
-              "   2790 │   │   │   loss = loss.mean()  # mean() to average on multi-gpu parallel training        \n",
-              "                                                                                                  \n",
-              " /usr/local/lib/python3.10/dist-packages/transformers/trainer.py:2819 in compute_loss             \n",
-              "                                                                                                  \n",
-              "   2816 │   │   │   labels = inputs.pop(\"labels\")                                                 \n",
-              "   2817 │   │   else:                                                                             \n",
-              "   2818 │   │   │   labels = None                                                                 \n",
-              " 2819 │   │   outputs = model(**inputs)                                                         \n",
-              "   2820 │   │   # Save past state if it exists                                                    \n",
-              "   2821 │   │   # TODO: this needs to be fixed and made cleaner later.                            \n",
-              "   2822 │   │   if self.args.past_index >= 0:                                                     \n",
-              "                                                                                                  \n",
-              " /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1501 in _call_impl            \n",
-              "                                                                                                  \n",
-              "   1498 │   │   if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks   \n",
-              "   1499 │   │   │   │   or _global_backward_pre_hooks or _global_backward_hooks                   \n",
-              "   1500 │   │   │   │   or _global_forward_hooks or _global_forward_pre_hooks):                   \n",
-              " 1501 │   │   │   return forward_call(*args, **kwargs)                                          \n",
-              "   1502 │   │   # Do not call functions when jit is used                                          \n",
-              "   1503 │   │   full_backward_hooks, non_full_backward_hooks = [], []                             \n",
-              "   1504 │   │   backward_pre_hooks = []                                                           \n",
-              "                                                                                                  \n",
-              " /usr/local/lib/python3.10/dist-packages/peft/peft_model.py:686 in forward                        \n",
-              "                                                                                                  \n",
-              "    683 │   ):                                                                                    \n",
-              "    684 │   │   peft_config = self.active_peft_config                                             \n",
-              "    685 │   │   if not isinstance(peft_config, PromptLearningConfig):                             \n",
-              "  686 │   │   │   return self.base_model(                                                       \n",
-              "    687 │   │   │   │   input_ids=input_ids,                                                      \n",
-              "    688 │   │   │   │   attention_mask=attention_mask,                                            \n",
-              "    689 │   │   │   │   inputs_embeds=inputs_embeds,                                              \n",
-              "                                                                                                  \n",
-              " /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1501 in _call_impl            \n",
-              "                                                                                                  \n",
-              "   1498 │   │   if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks   \n",
-              "   1499 │   │   │   │   or _global_backward_pre_hooks or _global_backward_hooks                   \n",
-              "   1500 │   │   │   │   or _global_forward_hooks or _global_forward_pre_hooks):                   \n",
-              " 1501 │   │   │   return forward_call(*args, **kwargs)                                          \n",
-              "   1502 │   │   # Do not call functions when jit is used                                          \n",
-              "   1503 │   │   full_backward_hooks, non_full_backward_hooks = [], []                             \n",
-              "   1504 │   │   backward_pre_hooks = []                                                           \n",
-              "                                                                                                  \n",
-              " /usr/local/lib/python3.10/dist-packages/accelerate/hooks.py:165 in new_forward                   \n",
-              "                                                                                                  \n",
-              "   162 │   │   │   with torch.no_grad():                                                          \n",
-              "   163 │   │   │   │   output = old_forward(*args, **kwargs)                                      \n",
-              "   164 │   │   else:                                                                              \n",
-              " 165 │   │   │   output = old_forward(*args, **kwargs)                                          \n",
-              "   166 │   │   return module._hf_hook.post_forward(module, output)                                \n",
-              "   167 │                                                                                          \n",
-              "   168 │   module.forward = new_forward                                                           \n",
-              "                                                                                                  \n",
-              " /usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py:687 in       \n",
-              " forward                                                                                          \n",
-              "                                                                                                  \n",
-              "   684 │   │   return_dict = return_dict if return_dict is not None else self.config.use_return   \n",
-              "   685 │   │                                                                                      \n",
-              "   686 │   │   # decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)    \n",
-              " 687 │   │   outputs = self.model(                                                              \n",
-              "   688 │   │   │   input_ids=input_ids,                                                           \n",
-              "   689 │   │   │   attention_mask=attention_mask,                                                 \n",
-              "   690 │   │   │   position_ids=position_ids,                                                     \n",
-              "                                                                                                  \n",
-              " /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1501 in _call_impl            \n",
-              "                                                                                                  \n",
-              "   1498 │   │   if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks   \n",
-              "   1499 │   │   │   │   or _global_backward_pre_hooks or _global_backward_hooks                   \n",
-              "   1500 │   │   │   │   or _global_forward_hooks or _global_forward_pre_hooks):                   \n",
-              " 1501 │   │   │   return forward_call(*args, **kwargs)                                          \n",
-              "   1502 │   │   # Do not call functions when jit is used                                          \n",
-              "   1503 │   │   full_backward_hooks, non_full_backward_hooks = [], []                             \n",
-              "   1504 │   │   backward_pre_hooks = []                                                           \n",
-              "                                                                                                  \n",
-              " /usr/local/lib/python3.10/dist-packages/accelerate/hooks.py:165 in new_forward                   \n",
-              "                                                                                                  \n",
-              "   162 │   │   │   with torch.no_grad():                                                          \n",
-              "   163 │   │   │   │   output = old_forward(*args, **kwargs)                                      \n",
-              "   164 │   │   else:                                                                              \n",
-              " 165 │   │   │   output = old_forward(*args, **kwargs)                                          \n",
-              "   166 │   │   return module._hf_hook.post_forward(module, output)                                \n",
-              "   167 │                                                                                          \n",
-              "   168 │   module.forward = new_forward                                                           \n",
-              "                                                                                                  \n",
-              " /usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py:569 in       \n",
-              " forward                                                                                          \n",
-              "                                                                                                  \n",
-              "   566 │   │   │   │   │                                                                          \n",
-              "   567 │   │   │   │   │   return custom_forward                                                  \n",
-              "   568 │   │   │   │                                                                              \n",
-              " 569 │   │   │   │   layer_outputs = torch.utils.checkpoint.checkpoint(                         \n",
-              "   570 │   │   │   │   │   create_custom_forward(decoder_layer),                                  \n",
-              "   571 │   │   │   │   │   hidden_states,                                                         \n",
-              "   572 │   │   │   │   │   attention_mask,                                                        \n",
-              "                                                                                                  \n",
-              " /usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:249 in checkpoint              \n",
-              "                                                                                                  \n",
-              "   246 │   │   raise ValueError(\"Unexpected keyword arguments: \" + \",\".join(arg for arg in kwar   \n",
-              "   247 │                                                                                          \n",
-              "   248 │   if use_reentrant:                                                                      \n",
-              " 249 │   │   return CheckpointFunction.apply(function, preserve, *args)                         \n",
-              "   250 │   else:                                                                                  \n",
-              "   251 │   │   return _checkpoint_without_reentrant(                                              \n",
-              "   252 │   │   │   function,                                                                      \n",
-              "                                                                                                  \n",
-              " /usr/local/lib/python3.10/dist-packages/torch/autograd/function.py:506 in apply                  \n",
-              "                                                                                                  \n",
-              "   503 │   │   if not torch._C._are_functorch_transforms_active():                                \n",
-              "   504 │   │   │   # See NOTE: [functorch vjp and autograd interaction]                           \n",
-              "   505 │   │   │   args = _functorch.utils.unwrap_dead_wrappers(args)                             \n",
-              " 506 │   │   │   return super().apply(*args, **kwargs)  # type: ignore[misc]                    \n",
-              "   507 │   │                                                                                      \n",
-              "   508 │   │   if cls.setup_context == _SingleLevelFunction.setup_context:                        \n",
-              "   509 │   │   │   raise RuntimeError(                                                            \n",
-              "                                                                                                  \n",
-              " /usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:107 in forward                 \n",
-              "                                                                                                  \n",
-              "   104 │   │   ctx.save_for_backward(*tensor_inputs)                                              \n",
-              "   105 │   │                                                                                      \n",
-              "   106 │   │   with torch.no_grad():                                                              \n",
-              " 107 │   │   │   outputs = run_function(*args)                                                  \n",
-              "   108 │   │   return outputs                                                                     \n",
-              "   109 │                                                                                          \n",
-              "   110 │   @staticmethod                                                                          \n",
-              "                                                                                                  \n",
-              " /usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py:565 in       \n",
-              " custom_forward                                                                                   \n",
-              "                                                                                                  \n",
-              "   562 │   │   │   │   def create_custom_forward(module):                                         \n",
-              "   563 │   │   │   │   │   def custom_forward(*inputs):                                           \n",
-              "   564 │   │   │   │   │   │   # None for past_key_value                                          \n",
-              " 565 │   │   │   │   │   │   return module(*inputs, output_attentions, None)                    \n",
-              "   566 │   │   │   │   │                                                                          \n",
-              "   567 │   │   │   │   │   return custom_forward                                                  \n",
-              "   568                                                                                            \n",
-              "                                                                                                  \n",
-              " /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1501 in _call_impl            \n",
-              "                                                                                                  \n",
-              "   1498 │   │   if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks   \n",
-              "   1499 │   │   │   │   or _global_backward_pre_hooks or _global_backward_hooks                   \n",
-              "   1500 │   │   │   │   or _global_forward_hooks or _global_forward_pre_hooks):                   \n",
-              " 1501 │   │   │   return forward_call(*args, **kwargs)                                          \n",
-              "   1502 │   │   # Do not call functions when jit is used                                          \n",
-              "   1503 │   │   full_backward_hooks, non_full_backward_hooks = [], []                             \n",
-              "   1504 │   │   backward_pre_hooks = []                                                           \n",
-              "                                                                                                  \n",
-              " /usr/local/lib/python3.10/dist-packages/accelerate/hooks.py:165 in new_forward                   \n",
-              "                                                                                                  \n",
-              "   162 │   │   │   with torch.no_grad():                                                          \n",
-              "   163 │   │   │   │   output = old_forward(*args, **kwargs)                                      \n",
-              "   164 │   │   else:                                                                              \n",
-              " 165 │   │   │   output = old_forward(*args, **kwargs)                                          \n",
-              "   166 │   │   return module._hf_hook.post_forward(module, output)                                \n",
-              "   167 │                                                                                          \n",
-              "   168 │   module.forward = new_forward                                                           \n",
-              "                                                                                                  \n",
-              " /usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py:292 in       \n",
-              " forward                                                                                          \n",
-              "                                                                                                  \n",
-              "   289 │   │   hidden_states = self.input_layernorm(hidden_states)                                \n",
-              "   290 │   │                                                                                      \n",
-              "   291 │   │   # Self Attention                                                                   \n",
-              " 292 │   │   hidden_states, self_attn_weights, present_key_value = self.self_attn(              \n",
-              "   293 │   │   │   hidden_states=hidden_states,                                                   \n",
-              "   294 │   │   │   attention_mask=attention_mask,                                                 \n",
-              "   295 │   │   │   position_ids=position_ids,                                                     \n",
-              "                                                                                                  \n",
-              " /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1501 in _call_impl            \n",
-              "                                                                                                  \n",
-              "   1498 │   │   if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks   \n",
-              "   1499 │   │   │   │   or _global_backward_pre_hooks or _global_backward_hooks                   \n",
-              "   1500 │   │   │   │   or _global_forward_hooks or _global_forward_pre_hooks):                   \n",
-              " 1501 │   │   │   return forward_call(*args, **kwargs)                                          \n",
-              "   1502 │   │   # Do not call functions when jit is used                                          \n",
-              "   1503 │   │   full_backward_hooks, non_full_backward_hooks = [], []                             \n",
-              "   1504 │   │   backward_pre_hooks = []                                                           \n",
-              "                                                                                                  \n",
-              " /usr/local/lib/python3.10/dist-packages/accelerate/hooks.py:165 in new_forward                   \n",
-              "                                                                                                  \n",
-              "   162 │   │   │   with torch.no_grad():                                                          \n",
-              "   163 │   │   │   │   output = old_forward(*args, **kwargs)                                      \n",
-              "   164 │   │   else:                                                                              \n",
-              " 165 │   │   │   output = old_forward(*args, **kwargs)                                          \n",
-              "   166 │   │   return module._hf_hook.post_forward(module, output)                                \n",
-              "   167 │                                                                                          \n",
-              "   168 │   module.forward = new_forward                                                           \n",
-              "                                                                                                  \n",
-              " /usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py:194 in       \n",
-              " forward                                                                                          \n",
-              "                                                                                                  \n",
-              "   191 │   ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:       \n",
-              "   192 │   │   bsz, q_len, _ = hidden_states.size()                                               \n",
-              "   193 │   │                                                                                      \n",
-              " 194 │   │   query_states = self.q_proj(hidden_states).view(bsz, q_len, self.num_heads, self.   \n",
-              "   195 │   │   key_states = self.k_proj(hidden_states).view(bsz, q_len, self.num_heads, self.he   \n",
-              "   196 │   │   value_states = self.v_proj(hidden_states).view(bsz, q_len, self.num_heads, self.   \n",
-              "   197                                                                                            \n",
-              "                                                                                                  \n",
-              " /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1501 in _call_impl            \n",
-              "                                                                                                  \n",
-              "   1498 │   │   if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks   \n",
-              "   1499 │   │   │   │   or _global_backward_pre_hooks or _global_backward_hooks                   \n",
-              "   1500 │   │   │   │   or _global_forward_hooks or _global_forward_pre_hooks):                   \n",
-              " 1501 │   │   │   return forward_call(*args, **kwargs)                                          \n",
-              "   1502 │   │   # Do not call functions when jit is used                                          \n",
-              "   1503 │   │   full_backward_hooks, non_full_backward_hooks = [], []                             \n",
-              "   1504 │   │   backward_pre_hooks = []                                                           \n",
-              "                                                                                                  \n",
-              " /usr/local/lib/python3.10/dist-packages/peft/tuners/lora.py:709 in forward                       \n",
-              "                                                                                                  \n",
-              "   706 │   │   │   self.active_adapter = adapter_name                                             \n",
-              "   707 │   │                                                                                      \n",
-              "   708 │   │   def forward(self, x: torch.Tensor):                                                \n",
-              " 709 │   │   │   result = super().forward(x)                                                    \n",
-              "   710 │   │   │                                                                                  \n",
-              "   711 │   │   │   if self.disable_adapters or self.active_adapter not in self.lora_A.keys():     \n",
-              "   712 │   │   │   │   return result                                                              \n",
-              "                                                                                                  \n",
-              " /usr/local/lib/python3.10/dist-packages/bitsandbytes/nn/modules.py:388 in forward                \n",
-              "                                                                                                  \n",
-              "   385 │   │   if self.bias is not None and self.bias.dtype != x.dtype:                           \n",
-              "   386 │   │   │   self.bias.data = self.bias.data.to(x.dtype)                                    \n",
-              "   387 │   │                                                                                      \n",
-              " 388 │   │   out = bnb.matmul(x, self.weight, bias=self.bias, state=self.state)                 \n",
-              "   389 │   │                                                                                      \n",
-              "   390 │   │   if not self.state.has_fp16_weights:                                                \n",
-              "   391 │   │   │   if self.state.CB is not None and self.state.CxB is not None:                   \n",
-              "                                                                                                  \n",
-              " /usr/local/lib/python3.10/dist-packages/bitsandbytes/autograd/_functions.py:559 in matmul        \n",
-              "                                                                                                  \n",
-              "   556 │   state = state or MatmulLtState()                                                       \n",
-              "   557 │   if threshold > 0.0:                                                                    \n",
-              "   558 │   │   state.threshold = threshold                                                        \n",
-              " 559 return MatMul8bitLt.apply(A, B, out, bias, state)                                      \n",
-              "   560                                                                                            \n",
-              "   561                                                                                            \n",
-              "   562 def matmul_4bit(A: tensor, B: tensor, quant_state: List, out: tensor = None, bias=None):   \n",
-              "                                                                                                  \n",
-              " /usr/local/lib/python3.10/dist-packages/torch/autograd/function.py:506 in apply                  \n",
-              "                                                                                                  \n",
-              "   503 │   │   if not torch._C._are_functorch_transforms_active():                                \n",
-              "   504 │   │   │   # See NOTE: [functorch vjp and autograd interaction]                           \n",
-              "   505 │   │   │   args = _functorch.utils.unwrap_dead_wrappers(args)                             \n",
-              " 506 │   │   │   return super().apply(*args, **kwargs)  # type: ignore[misc]                    \n",
-              "   507 │   │                                                                                      \n",
-              "   508 │   │   if cls.setup_context == _SingleLevelFunction.setup_context:                        \n",
-              "   509 │   │   │   raise RuntimeError(                                                            \n",
-              "                                                                                                  \n",
-              " /usr/local/lib/python3.10/dist-packages/bitsandbytes/autograd/_functions.py:323 in forward       \n",
-              "                                                                                                  \n",
-              "   320 │   │   # 1. Quantize A                                                                    \n",
-              "   321 │   │   if len(A.shape) == 3:                                                              \n",
-              "   322 │   │   │   A = A.view(-1, A.shape[-1]).contiguous()                                       \n",
-              " 323 │   │   CA, CAt, SCA, SCAt, coo_tensorA = F.double_quant(A.to(torch.float16), threshold=   \n",
-              "   324 │   │                                                                                      \n",
-              "   325 │   │   if state.threshold > 0.0 and coo_tensorA is not None:                              \n",
-              "   326 │   │   │   if state.has_fp16_weights:                                                     \n",
-              "                                                                                                  \n",
-              " /usr/local/lib/python3.10/dist-packages/bitsandbytes/functional.py:2029 in double_quant          \n",
-              "                                                                                                  \n",
-              "   2026 │   │   │   │   ct.c_int32(rows),                                                         \n",
-              "   2027 │   │   │   │   ct.c_int32(cols),                                                         \n",
-              "   2028 │   │   │   )                                                                             \n",
-              " 2029 │   │   │   val, idx = torch.sort(coo_tensor.rowidx)                                      \n",
-              "   2030 │   │   │   coo_tensor.rowidx = val                                                       \n",
-              "   2031 │   │   │   coo_tensor.colidx = coo_tensor.colidx[idx]                                    \n",
-              "   2032 │   │   │   coo_tensor.values = coo_tensor.values[idx]                                    \n",
-              "╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
-              "KeyboardInterrupt\n",
-              "
\n" - ] - }, - "metadata": {} - } - ] - } - ] + "id": "kWP89TPIwRkK", + "outputId": "fb4d3012-795b-46c2-90a0-f72fc8185a11" + }, + "outputs": [], + "source": [ + "data = data.shuffle().map(\n", + " lambda data_point: tokenizer(\n", + " generate_prompt(data_point),\n", + " truncation=True,\n", + " max_length=CUTOFF_LEN,\n", + " padding=\"max_length\",\n", + " )\n", + ")\n", + "\n", + "trainer = transformers.Trainer(\n", + " model=model,\n", + " train_dataset=data[\"train\"],\n", + " args=transformers.TrainingArguments(\n", + " per_device_train_batch_size=MICRO_BATCH_SIZE,\n", + " gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,\n", + " warmup_steps=100,\n", + " num_train_epochs=EPOCHS,\n", + " learning_rate=LEARNING_RATE,\n", + " fp16=True,\n", + " logging_steps=1,\n", + " output_dir=\"lora-alpaca\",\n", + " save_total_limit=3,\n", + " ),\n", + " data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),\n", + ")\n", + "model.config.use_cache = False\n", + "trainer.train(resume_from_checkpoint=False)\n", + "\n", + "model.save_pretrained(\"lora-alpaca\")\n", + "\n", + "!cp -rv lora-alpaca /content/drive/MyDrive/" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + } + }, + "nbformat": 4, + "nbformat_minor": 1 }