{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "import time\n", "import pandas as pd\n", "from datasets import load_dataset, DatasetDict\n", "from together import Together\n", "from dotenv import load_dotenv\n", "\n", "load_dotenv()\n", "\n", "\n", "client = Together(api_key=os.getenv(\"TOGETHER_API_KEY\"))\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "BASE_MODEL_ID = \"deepseek-ai/DeepSeek-R1-Distill-Qwen-14B\"\n", "DATASET_ID = \"boatbomber/Roblox-Luau-Reasoning-v1.0\"\n", "SYSTEM_PROMPT = \"You are an expert Roblox developer and Luau software engineer.\"\n", "COMPLETION_TEMPLATE = \"\"\"\\\n", "\n", "{chain_of_thought}\n", "\n", "\n", "```Lua\n", "{code}\n", "```\n", "\n", "{explanation}\\\n", "\"\"\"\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Build the fine tuning dataset" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "c2561cd4a02f48b6ac71f6e8053e124a", "version_major": 2, "version_minor": 0 }, "text/plain": [ "train.parquet: 0%| | 0.00/47.3M [00:00=1.3.0, the default batch size is set to the maximum allowed value for each model.'\n", "message='Starting from together>=1.3.0, the default batch size is set to the maximum allowed value for each model.'\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "FT_JOB_ID ft-49dc2cfb-959b\n" ] } ], "source": [ "FT_JOB_ID = \"ft-49dc2cfb-959b\"\n", "if not FT_JOB_ID:\n", " job_resp = client.fine_tuning.create(\n", " training_file=DATASET_FILE_ID,\n", " model=BASE_MODEL_ID,\n", " lora=True,\n", " lora_r=16,\n", " )\n", "\n", " print(\"FT_JOB_ID\", job_resp.id)\n", " FT_JOB_ID = job_resp.id\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "status: FinetuneJobStatus.STATUS_PENDING steps_completed: 0 output_name: zackovits/DeepSeek-R1-Distill-Qwen-14B-77124c1f events: ['Fine tune request created']\n", "status: FinetuneJobStatus.STATUS_QUEUED steps_completed: 0 output_name: zackovits/DeepSeek-R1-Distill-Qwen-14B-77124c1f events: ['Fine tune request created']\n", "status: FinetuneJobStatus.STATUS_QUEUED steps_completed: 0 output_name: zackovits/DeepSeek-R1-Distill-Qwen-14B-77124c1f events: ['Fine tune request created']\n", "status: FinetuneJobStatus.STATUS_QUEUED steps_completed: 0 output_name: zackovits/DeepSeek-R1-Distill-Qwen-14B-77124c1f events: ['Fine tune request created']\n", "status: FinetuneJobStatus.STATUS_QUEUED steps_completed: 0 output_name: zackovits/DeepSeek-R1-Distill-Qwen-14B-77124c1f events: ['Fine tune request created']\n", "status: FinetuneJobStatus.STATUS_QUEUED steps_completed: 0 output_name: zackovits/DeepSeek-R1-Distill-Qwen-14B-77124c1f events: ['Fine tune request created']\n", "status: FinetuneJobStatus.STATUS_QUEUED steps_completed: 0 output_name: zackovits/DeepSeek-R1-Distill-Qwen-14B-77124c1f events: ['Fine tune request created']\n", "status: FinetuneJobStatus.STATUS_RUNNING steps_completed: 0 output_name: zackovits/DeepSeek-R1-Distill-Qwen-14B-77124c1f events: ['Fine tune request created', 'Job started at Wed Mar 26 10:43:55 UTC 2025', 'Model data downloaded for togethercomputer/DeepSeek-R1-Distill-Qwen-14B at Wed Mar 26 10:43:57 UTC 2025', 'Data downloaded for togethercomputer/DeepSeek-R1-Distill-Qwen-14B at $2025-03-26T10:44:05.779202']\n", "status: FinetuneJobStatus.STATUS_RUNNING steps_completed: 1 output_name: zackovits/DeepSeek-R1-Distill-Qwen-14B-77124c1f events: ['Fine tune request created', 'Job started at Wed Mar 26 10:43:55 UTC 2025', 'Model data downloaded for togethercomputer/DeepSeek-R1-Distill-Qwen-14B at Wed Mar 26 10:43:57 UTC 2025', 'Data downloaded for togethercomputer/DeepSeek-R1-Distill-Qwen-14B at $2025-03-26T10:44:05.779202', 'Training started for model togethercomputer/DeepSeek-R1-Distill-Qwen-14B']\n", "status: FinetuneJobStatus.STATUS_RUNNING steps_completed: 10 output_name: zackovits/DeepSeek-R1-Distill-Qwen-14B-77124c1f events: ['Fine tune request created', 'Job started at Wed Mar 26 10:43:55 UTC 2025', 'Model data downloaded for togethercomputer/DeepSeek-R1-Distill-Qwen-14B at Wed Mar 26 10:43:57 UTC 2025', 'Data downloaded for togethercomputer/DeepSeek-R1-Distill-Qwen-14B at $2025-03-26T10:44:05.779202', 'Training started for model togethercomputer/DeepSeek-R1-Distill-Qwen-14B']\n", "status: FinetuneJobStatus.STATUS_RUNNING steps_completed: 20 output_name: zackovits/DeepSeek-R1-Distill-Qwen-14B-77124c1f events: ['Fine tune request created', 'Job started at Wed Mar 26 10:43:55 UTC 2025', 'Model data downloaded for togethercomputer/DeepSeek-R1-Distill-Qwen-14B at Wed Mar 26 10:43:57 UTC 2025', 'Data downloaded for togethercomputer/DeepSeek-R1-Distill-Qwen-14B at $2025-03-26T10:44:05.779202', 'Training started for model togethercomputer/DeepSeek-R1-Distill-Qwen-14B']\n", "status: FinetuneJobStatus.STATUS_RUNNING steps_completed: 29 output_name: zackovits/DeepSeek-R1-Distill-Qwen-14B-77124c1f events: ['Fine tune request created', 'Job started at Wed Mar 26 10:43:55 UTC 2025', 'Model data downloaded for togethercomputer/DeepSeek-R1-Distill-Qwen-14B at Wed Mar 26 10:43:57 UTC 2025', 'Data downloaded for togethercomputer/DeepSeek-R1-Distill-Qwen-14B at $2025-03-26T10:44:05.779202', 'Training started for model togethercomputer/DeepSeek-R1-Distill-Qwen-14B']\n", "status: FinetuneJobStatus.STATUS_RUNNING steps_completed: 39 output_name: zackovits/DeepSeek-R1-Distill-Qwen-14B-77124c1f events: ['Fine tune request created', 'Job started at Wed Mar 26 10:43:55 UTC 2025', 'Model data downloaded for togethercomputer/DeepSeek-R1-Distill-Qwen-14B at Wed Mar 26 10:43:57 UTC 2025', 'Data downloaded for togethercomputer/DeepSeek-R1-Distill-Qwen-14B at $2025-03-26T10:44:05.779202', 'Training started for model togethercomputer/DeepSeek-R1-Distill-Qwen-14B']\n", "status: FinetuneJobStatus.STATUS_RUNNING steps_completed: 49 output_name: zackovits/DeepSeek-R1-Distill-Qwen-14B-77124c1f events: ['Fine tune request created', 'Job started at Wed Mar 26 10:43:55 UTC 2025', 'Model data downloaded for togethercomputer/DeepSeek-R1-Distill-Qwen-14B at Wed Mar 26 10:43:57 UTC 2025', 'Data downloaded for togethercomputer/DeepSeek-R1-Distill-Qwen-14B at $2025-03-26T10:44:05.779202', 'Training started for model togethercomputer/DeepSeek-R1-Distill-Qwen-14B']\n", "status: FinetuneJobStatus.STATUS_RUNNING steps_completed: 58 output_name: zackovits/DeepSeek-R1-Distill-Qwen-14B-77124c1f events: ['Fine tune request created', 'Job started at Wed Mar 26 10:43:55 UTC 2025', 'Model data downloaded for togethercomputer/DeepSeek-R1-Distill-Qwen-14B at Wed Mar 26 10:43:57 UTC 2025', 'Data downloaded for togethercomputer/DeepSeek-R1-Distill-Qwen-14B at $2025-03-26T10:44:05.779202', 'Training started for model togethercomputer/DeepSeek-R1-Distill-Qwen-14B']\n", "status: FinetuneJobStatus.STATUS_RUNNING steps_completed: 68 output_name: zackovits/DeepSeek-R1-Distill-Qwen-14B-77124c1f events: ['Fine tune request created', 'Job started at Wed Mar 26 10:43:55 UTC 2025', 'Model data downloaded for togethercomputer/DeepSeek-R1-Distill-Qwen-14B at Wed Mar 26 10:43:57 UTC 2025', 'Data downloaded for togethercomputer/DeepSeek-R1-Distill-Qwen-14B at $2025-03-26T10:44:05.779202', 'Training started for model togethercomputer/DeepSeek-R1-Distill-Qwen-14B']\n", "status: FinetuneJobStatus.STATUS_RUNNING steps_completed: 73 output_name: zackovits/DeepSeek-R1-Distill-Qwen-14B-77124c1f events: ['Fine tune request created', 'Job started at Wed Mar 26 10:43:55 UTC 2025', 'Model data downloaded for togethercomputer/DeepSeek-R1-Distill-Qwen-14B at Wed Mar 26 10:43:57 UTC 2025', 'Data downloaded for togethercomputer/DeepSeek-R1-Distill-Qwen-14B at $2025-03-26T10:44:05.779202', 'Training started for model togethercomputer/DeepSeek-R1-Distill-Qwen-14B', 'Epoch completed, at step 73']\n", "status: FinetuneJobStatus.STATUS_RUNNING steps_completed: 73 output_name: zackovits/DeepSeek-R1-Distill-Qwen-14B-77124c1f events: ['Fine tune request created', 'Job started at Wed Mar 26 10:43:55 UTC 2025', 'Model data downloaded for togethercomputer/DeepSeek-R1-Distill-Qwen-14B at Wed Mar 26 10:43:57 UTC 2025', 'Data downloaded for togethercomputer/DeepSeek-R1-Distill-Qwen-14B at $2025-03-26T10:44:05.779202', 'Training started for model togethercomputer/DeepSeek-R1-Distill-Qwen-14B', 'Epoch completed, at step 73', 'Training completed for togethercomputer/DeepSeek-R1-Distill-Qwen-14B at Wed Mar 26 10:58:05 UTC 2025']\n", "status: FinetuneJobStatus.STATUS_COMPRESSING steps_completed: 73 output_name: zackovits/DeepSeek-R1-Distill-Qwen-14B-77124c1f events: ['Fine tune request created', 'Job started at Wed Mar 26 10:43:55 UTC 2025', 'Model data downloaded for togethercomputer/DeepSeek-R1-Distill-Qwen-14B at Wed Mar 26 10:43:57 UTC 2025', 'Data downloaded for togethercomputer/DeepSeek-R1-Distill-Qwen-14B at $2025-03-26T10:44:05.779202', 'Training started for model togethercomputer/DeepSeek-R1-Distill-Qwen-14B', 'Epoch completed, at step 73', 'Training completed for togethercomputer/DeepSeek-R1-Distill-Qwen-14B at Wed Mar 26 10:58:05 UTC 2025', 'Uploading output model', 'Compressing output model', 'Model compression complete']\n", "status: FinetuneJobStatus.STATUS_COMPRESSING steps_completed: 73 output_name: zackovits/DeepSeek-R1-Distill-Qwen-14B-77124c1f events: ['Fine tune request created', 'Job started at Wed Mar 26 10:43:55 UTC 2025', 'Model data downloaded for togethercomputer/DeepSeek-R1-Distill-Qwen-14B at Wed Mar 26 10:43:57 UTC 2025', 'Data downloaded for togethercomputer/DeepSeek-R1-Distill-Qwen-14B at $2025-03-26T10:44:05.779202', 'Training started for model togethercomputer/DeepSeek-R1-Distill-Qwen-14B', 'Epoch completed, at step 73', 'Training completed for togethercomputer/DeepSeek-R1-Distill-Qwen-14B at Wed Mar 26 10:58:05 UTC 2025', 'Uploading output model', 'Compressing output model', 'Model compression complete']\n", "status: FinetuneJobStatus.STATUS_COMPRESSING steps_completed: 73 output_name: zackovits/DeepSeek-R1-Distill-Qwen-14B-77124c1f events: ['Fine tune request created', 'Job started at Wed Mar 26 10:43:55 UTC 2025', 'Model data downloaded for togethercomputer/DeepSeek-R1-Distill-Qwen-14B at Wed Mar 26 10:43:57 UTC 2025', 'Data downloaded for togethercomputer/DeepSeek-R1-Distill-Qwen-14B at $2025-03-26T10:44:05.779202', 'Training started for model togethercomputer/DeepSeek-R1-Distill-Qwen-14B', 'Epoch completed, at step 73', 'Training completed for togethercomputer/DeepSeek-R1-Distill-Qwen-14B at Wed Mar 26 10:58:05 UTC 2025', 'Uploading output model', 'Compressing output model', 'Model compression complete']\n", "status: FinetuneJobStatus.STATUS_COMPLETED steps_completed: 73 output_name: zackovits/DeepSeek-R1-Distill-Qwen-14B-77124c1f events: ['Fine tune request created', 'Job started at Wed Mar 26 10:43:55 UTC 2025', 'Model data downloaded for togethercomputer/DeepSeek-R1-Distill-Qwen-14B at Wed Mar 26 10:43:57 UTC 2025', 'Data downloaded for togethercomputer/DeepSeek-R1-Distill-Qwen-14B at $2025-03-26T10:44:05.779202', 'Training started for model togethercomputer/DeepSeek-R1-Distill-Qwen-14B', 'Epoch completed, at step 73', 'Training completed for togethercomputer/DeepSeek-R1-Distill-Qwen-14B at Wed Mar 26 10:58:05 UTC 2025', 'Uploading output model', 'Compressing output model', 'Model compression complete', 'Model upload complete', 'Job finished at Wed Mar 26 11:03:54 UTC 2025']\n" ] } ], "source": [ "from together.types.finetune import FinetuneJobStatus\n", "\n", "status = None\n", "while status != FinetuneJobStatus.STATUS_COMPLETED:\n", " ft_resp = client.fine_tuning.retrieve(FT_JOB_ID)\n", " status = ft_resp.status\n", " print(\n", " \"status:\",\n", " ft_resp.status,\n", " \"steps_completed:\",\n", " ft_resp.steps_completed,\n", " \"output_name:\",\n", " ft_resp.output_name,\n", " \"events:\",\n", " [event.message for event in ft_resp.events or []],\n", " )\n", " time.sleep(90)\n" ] } ], "metadata": { "kernelspec": { "display_name": "r1-distill-qwen-14b-roblox-luau-M2w3-kVb-py3.11", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.9" } }, "nbformat": 4, "nbformat_minor": 2 }