{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "kH18jD5cR_Ks", "outputId": "f12be379-ce3d-4e3c-e5b5-b5e8ca7127ee" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m122.4/122.4 MB\u001b[0m \u001b[31m7.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m310.9/310.9 kB\u001b[0m \u001b[31m9.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m480.6/480.6 kB\u001b[0m \u001b[31m23.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m7.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m179.3/179.3 kB\u001b[0m \u001b[31m10.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m8.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m10.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", "gcsfs 2024.10.0 requires fsspec==2024.10.0, but you have fsspec 2024.9.0 which is incompatible.\u001b[0m\u001b[31m\n", "\u001b[0m" ] } ], "source": [ "# !pip install -q accelerate peft bitsandbytes transformers trl faiss-gpu langchain_community wandb flash-attn\n", "!pip install -q accelerate peft bitsandbytes transformers trl datasets\n", "\n", "# flash-attn" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "id": "cgVNTbBa-D3j" }, "outputs": [], "source": [ "# load the required packages.\n", "import torch\n", "from datasets import load_dataset, Dataset\n", "from peft import LoraConfig, AutoPeftModelForCausalLM, PeftModel, get_peft_model\n", "from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments, AutoConfig, set_seed\n", "from trl import SFTTrainer\n", "import bitsandbytes as bnb\n", "import transformers\n", "\n", "import os\n", "import numpy as np\n", "import pandas as pd\n", "import sqlparse\n", "import re\n", "import json\n", "\n", "from huggingface_hub import hf_hub_download\n", "from huggingface_hub import HfFileSystem" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "s7RtbLxPwTx2", "outputId": "43d5ff02-3e33-439c-e3a9-c18ce99d237d" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Mounted at /content/drive\n" ] } ], "source": [ "# from google.colab import drive\n", "# from google.colab import userdata\n", "# drive.mount('/content/drive')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "basaX_55Yf_D" }, "outputs": [], "source": [ "#transformers.logging.set_verbosity_info()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "id": "bkkjgGdlrNcq" }, "outputs": [], "source": [ "WRITE_TOKEN = userdata.get('hf_write')\n", "READ_TOKEN = userdata.get('hf_read')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "id": "7CKnwlRfZj4V" }, "outputs": [], "source": [ "model_name = \"TinyLlama/TinyLlama-1.1B-Chat-v1.0\"\n", "out_name = \"lleticiasilvaa/TinyLlama-text2SQL-schemaReduzido\"\n", "prev_checkpoint = None # \"checkpoint-3000\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "9zs7nCmt-pMC" }, "outputs": [], "source": [ "#!huggingface-cli login" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "id": "PZdnxs8k-Cgl" }, "outputs": [], "source": [ "bird_id=\"NESPED-GEN/bird\"\n", "spider_id= \"NESPED-GEN/spider_selector_schemaReduzido\" #\"NESPED-GEN/spider_variacoes_formato_schema\"\n", "\n", "selector_id = \"NESPED-GEN/selector_testes\"" ] }, { "cell_type": "markdown", "metadata": { "id": "xT2iRdCN_MFH" }, "source": [ "### Load Data\n" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 177, "referenced_widgets": [ "f8be448474cc4b81a0b5a330f9ed3705", "8a107cbb374c4404a5fd1baf65de72ad", "1fcb73c41c704aca99e65ba0907d0841", "b907d9ff090d495d83c275e43d049f91", "9231abec1a0845b1b4e8f5a3b0ca60de", "115786b90c5844c18606bf4abdc63522", "dee32e87b07c45bea861648b6bfec5ab", "6ce8223be8d349f5aeb19ac429207beb", "8bdf5e43e5b541449904cb61fe15c504", "249774bfb1174f218f4fdf86fc92c680", "359ac754e92e42c38420c20dd9e6ab25", "b655e8c850874946a7a8b44d458ab194", "9071e4acf58246b4823ab33bde9ad870", "b0437baf58c84daab3a7acc7ac2b03d7", "76c2ed54728a4b47be123027a86d64ce", "126a786d922c49a5b49afa5a6f580c02", "f6802cdc047243929e30805c6c56a5af", "4dd6f8be173145118c4fa2236d13fa7d", "0ad5ce21ee154eb6bc378a32d3685c2d", "446e9df47db148169da5d3df546bbea7", "29ad0f5095fc436bb5372bb2a97522f8", "3cba96b6b2694c179136c50e72791f13", "8db267d4b29e42fd82de95850616e654", "6e503bf9c4d84135b1d2d5bd95d09710", "a5b97970df7c448aa760685dbf3cd797", "6360419398e441609645936873d8da61", "4775a854818241689db29f600fdb8e57", "e01e40c177914d14a571521de498aa3e", "07ee750aa9284978925b62c485eb1822", "3bf33159fe7144d7ab825a4dce0afa76", "2c0b81de6bd647fbb3c608b11f552c14", "2224091acc0a446aba595432479acd5e", "06bfce2b06d1441b9c2a0a70be950320", "1db39cc8d8cf468f86b84aceb5ad44d2", "d6cb118520d14424b7cb8451d95e1506", "db372858e40f4bba99a7169eb4633669", "317a8928f5874be28a85aa8bff87c571", "c7dd736ed51041c9bf60aa7b7712e7b6", "82acc4c6266f4d3690dce7789efcda3f", "d1cd4270583a4e8ebec3e4800906ba49", "a08b393996984cf49cdf7c2f1f24106d", "aa91f3aacd5d4fc2857c9bd07e0b8a66", "758e7b7e5c6347899fddc5bb34edc0b7", "3f8fcb6daa6e46b483ff42dbc35de4db", "a33e8d824b37457a9e66e5ad051f33ea", "e1a2bd619b5f46918ecd3894b55b214f", "26724e23cf6841b1bf6f0437b1fd4218", "46664ed4b13c404e8dc8e8e5c5d4e165", "cd10aba6015c46738784a43345941f90", "ce871679c2044c0c88a0534bdc0ef0ca", "df1a624e4cbc4bfe8641ffbe39d27b80", "737a7d01517148c294a55688f411d4f6", "f299cf491e954f84a21a24b4878061f3", "8eef34ff59a943a8b2e14594a7b77aef", "9594dd26ddb34f72a3d428b1296b3116" ] }, "id": "lLKgY40efdJo", "outputId": "b135d0ab-b371-4186-d715-5e7932e46897" }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "f8be448474cc4b81a0b5a330f9ed3705", "version_major": 2, "version_minor": 0 }, "text/plain": [ "README.md: 0%| | 0.00/885 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b655e8c850874946a7a8b44d458ab194", "version_major": 2, "version_minor": 0 }, "text/plain": [ "dev-00000-of-00001.parquet: 0%| | 0.00/369k [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "8db267d4b29e42fd82de95850616e654", "version_major": 2, "version_minor": 0 }, "text/plain": [ "train-00000-of-00001.parquet: 0%| | 0.00/2.70M [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "1db39cc8d8cf468f86b84aceb5ad44d2", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Generating dev split: 0%| | 0/1034 [00:00, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a33e8d824b37457a9e66e5ad051f33ea", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Generating train split: 0%| | 0/8656 [00:00, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# bird = load_dataset(bird_id, split=\"train\").to_pandas()\n", "spider = load_dataset(spider_id, split=\"train\").to_pandas()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "D3d_091Ud0BG" }, "outputs": [], "source": [ "# df = pd.concat([spider, bird], axis=0)\n", "# df = df.fillna('')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "niRTjA1tQIWr" }, "outputs": [], "source": [ "# df.reset_index(drop=True, inplace=True)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "id": "po1gNdLjFKdZ" }, "outputs": [], "source": [ "df = spider" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 241, "referenced_widgets": [ "5a1e71143dc741efac418bc21eb99526", "e380a31a7c9a41f1bd9de22a4eec9afd", "73f947fba7da4fef829ee2034171a836", "a9abf81190a54b29a4fbf7c2514d8b3e", "68dd1789b9a34b91a74c7d00a6c5c491", "dce14eebcd734217be5fd8bfd1099439", "0ecff7b9301b4713870b0747682533b1", "fb996e2abfcc4a0caaec2a2f3ff9fc9a", "11a036e18eac4d7d809a05f68cfb1096", "d10ad1893e1f48849896994284318067", "418852eab70342b89b6c1589874d71d9", "dc51fa9dd4bc4047b0bb22500b95b1f7", "d01ce95ef53d49bd913fcec32505729d", "748a1613620249e6ae4d71aee744024f", "f1c7ee8b443c4638a0d1423248a2207b", "01301a7d24a3430387ccd7997ce62361", "3bfbe04029a544c0924738e19e7a9436", "f3b80afa9a464264b4a001b9eae61983", "b5c66c0f758645f58aa66d526224e467", "771a2d454ea24693a24e124490a97f54", "0da2fbbaf26a4407bcfc9d2526870a13", "40e6cc0f3f7d4ba586f733c8000cef3e", "a5f7375c80e941418802baba0868b509", "0157feead1184dc89e1b23bb4c3c6265", "f6343ed2e1894a6a8a537a341628a1b3", "38dc818a4cad4ff686f309679fea6fbd", "ace1a9b16b3e4114a9810a9bb1800434", "5a15e7b737b54026b942fecd32bae90a", "c888470c2a3f4d5b93227002e2ae463a", "e4778b1b2402447589011dd20dea59d5", "5df57b5daa364f8fbf4d839886ef0283", "a304bcda80bf4148b38ba8d30454d92b", "06976ba3ed7b4c0f9741604ca01dec8a", "25e2b5a94982477ea03255387e1a4c95", "8329778c95f4488988e3d013bbf81493", "af3a06f6682343dda51ed1ef58229a0a", "63c9861278ae4a0cb2632cc6e3e9fa31", "5d689c3854654e1a9552d637e855b2e9", "07c7e9477dc848e586d193f850e4b330", "648f89df3d71429abe155bf048661b3c", "35bac25d9a394d53a643941969f11f33", "a0d882959f0b4b51969691cf6dcecfa1", "97b5552f17f44a34b90413fc663f83f0", "057bfb8b3fb54a12982d1b7422c9e6ef", "2b9ace70a9bc401085fdbc8cced20d38", "358831d17c974c2c95648cf9ee7c9294", "8bb4561e208b4d2f823c782124cfa335", "1a01eb10b70e42a58ebfedb6ccd61a73", "6880557c5da649c19c855735754906d1", "9dd0e981b51648d888fb724c936b6cf4", "9c1bce42a2494449b8f3a833ef09e862", "5be7663a62f44df9ac01171e421c732c", "9611ef2940f74dbab86496d99bf4d7eb", "1e6881df003345718b84ad9ed160a8af", "e03975280a21420d8880f8bfd93ac44a", "0b37dc50f0cd4f149feb3f7ce08f2630", "0abf618ec97e4ca8873c8af349516150", "c9bd191293104538bdbc0a5539be4d0a", "f3fe35e2b0164044aed92f8f776a17b5", "3a9a0de33597425ab7e550ec32aad186", "f6357593f5f64966a4f115adbcbec940", "f283bc804ab348a29c33a82368552fd5", "3248d30b29f84bdcbe4c43594c803673", "307a99fb69894c4cac341e23119150f2", "46d219bf79384b1d98052febd0271589", "71990f86fca248bd8d1e75be403d70db", "9793ebb3046b4e0cadc483222c6d88f9", "4a47677ae2204165be1c7a27214afab4", "9c1c3f497c874132b43c6c8e86bbf3e1", "814ec31670f543e798527a887db56ea0", "e022ce00981049349403502e2a07758b", "aad2ce00485e4468a787dbca24d0ba04", "03994d990e7442b5ab2249403fa62467", "c505d4d70cfe4c23844c7fef29b79c16", "d88bd0e5dc454761bd9db02488746996", "9a3733ca2451421e9ad5613f94ea7d89", "48a9e7a72ccb4352a7393a2461716e99" ] }, "id": "LeL7EfmQVyBt", "outputId": "4f3f46d3-c400-4dcb-b2ab-acc7880e3e90" }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "5a1e71143dc741efac418bc21eb99526", "version_major": 2, "version_minor": 0 }, "text/plain": [ "README.md: 0%| | 0.00/1.23k [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "dc51fa9dd4bc4047b0bb22500b95b1f7", "version_major": 2, "version_minor": 0 }, "text/plain": [ "trainSpider-00000-of-00001.parquet: 0%| | 0.00/2.00M [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a5f7375c80e941418802baba0868b509", "version_major": 2, "version_minor": 0 }, "text/plain": [ "devSpider-00000-of-00001.parquet: 0%| | 0.00/297k [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "25e2b5a94982477ea03255387e1a4c95", "version_major": 2, "version_minor": 0 }, "text/plain": [ "testSpider-00000-of-00001.parquet: 0%| | 0.00/649k [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "2b9ace70a9bc401085fdbc8cced20d38", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Generating trainSpider split: 0%| | 0/8656 [00:00, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "0b37dc50f0cd4f149feb3f7ce08f2630", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Generating devSpider split: 0%| | 0/1034 [00:00, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "9793ebb3046b4e0cadc483222c6d88f9", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Generating testSpider split: 0%| | 0/2147 [00:00, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "#df = load_dataset(selector_id, split=\"trainSpider\").to_pandas()" ] }, { "cell_type": "markdown", "metadata": { "id": "ON-hPsEYM1Bu" }, "source": [ "# Load Base Model" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "id": "yEAZpfzlNOHW" }, "outputs": [], "source": [ "def download_checkpoint(adapter_model_id, checkpoint):\n", " fs = HfFileSystem()\n", " for file in fs.ls(f'{adapter_model_id}/{checkpoint}', detail=False):\n", " file_name = file.split(checkpoint)[-1]\n", "\n", " hf_hub_download(repo_id=adapter_model_id, filename=(f'{checkpoint}{file_name}'), local_dir='out')\n", "\n", " for file in fs.ls(f'{adapter_model_id}/logs', detail=False):\n", " file_name = file.split(checkpoint)[-1]\n", "\n", " hf_hub_download(repo_id=adapter_model_id, filename=(f'logs/{file_name.split(\"/\")[-1]}'), local_dir='out')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "enUxjGXEqHxg" }, "outputs": [], "source": [ "# download_checkpoint(out_name, prev_checkpoint)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 241, "referenced_widgets": [ "4caa693261bc49f2adfa307db2cc32cd", "fbe2aaea54a64038b5c5eb163b547178", "945b80f640804d138863b83107cb3a34", "413af74285644c2584f89a784adcefaa", "c73ec24352b24c9c80418ab97e1a0b3d", "7b9b30c632114951a6e9a9d60ffc12de", "fab4e6c7d2324b1d9fd910be93649ab7", "30e1e93f86f14516aaf7029547343e80", "861322bcc6dd45e39c843e1d3b24a56c", "547166c07b0f422a9a09feeb17308630", "8218ae5ebfe14d108c0f81bbc613d068", "849d2a1ef1ce4c18a77fb942ae93e8b1", "5477d1cc9b624504a32680edd604c5e7", "4b7df6016a8b406d8d7a3445c8f93401", "8ee2398ab31642cfb7cbfac92a1921a3", "6344de48905a4e4c8fad3d45bef1b775", "c650bc4d4b7c47bfae862a82381e029c", "07e605f64efc40069ce4d2230994b434", "02f535602d254970bb989d1a266a3d0f", "9ce13ec9b1cb4e099ccdd71fcecbe4b5", "9c64059a8d104c8b9cc90ddd0c3346a3", "9a37bcd97f3a40dc8c817d53a1ede4a7", "35e14dfebc8843e7a662e039ea6083dc", "23edeb80adbe4303bc027e2445d97e35", "e152ff9832ca48a486ee3eb8fb9c92f4", "41ee9b1fd3484402ba29b2a40396ffd5", "fbe8dd180a2b4bc297c2459437efa233", "f3e9f0ccec9d484ca382ba069fba3938", "85d24aefccb54e8e8ed91adc047265f2", "772199510e24459bb4a0b2393b26374e", "906569b8519c4760879f62d6f4deee0d", "38f9c9447e4a406684d361b2f4277756", "58bcd2d5d35a4eea93109c27c04709e8", "2b796db373454d36a63f7b849b18bb2f", "d55b294715504a8ea79bc756b318d774", "aedf4b3b0e0b4de19cf5694f96abfbea", "6020e9aeeb304b7b94868554c1557762", "e75f724e5c6749e1a4d7e5cd3e770fd3", "7edaddcf0b48455a871984cc6283c4ca", "e158c6b62bad41dea0041ddc3d20426b", "82c2e96d96904b41ad85ef0636aab4fb", "c0d01107f2364217ad8a610e0f61a2d1", "ab46e75b27084605a775873d5657bb7d", "d5d59657e4594ee88978e9ab9f8e6ee6", "18e26427c5c5458892a8ed813dca6ea4", "f8f1f5d82e034f38affc2a0bad1168d7", "24050c97559c4c7094f4f94056d1e312", "cc1fae3c0c724ba6bdf8cb9d7e37dbc3", "a5010a7e2854485d8ebd21ff3a3c7743", "9668681068164adbbeefec13649f5fa3", "fc9f225a62554911a98c4f6e9bcdd157", "44c15b8c1d384ee29cc4abf0ed4c35ec", "15980266cec845929eaaadf6070edb46", "7ac41fa5cc0841caacc7341631665576", "e54f1f0500de407ea7cf11be8cc38455", "b89a280b9d224e63b6fd6e66ac0e2a27", "5acec29d88df40a4b38653a089400642", "9b612b11222f46a2af4c6644cc01c816", "209586e482a44608bea98c1040659fbf", "66694aeeed8443eaa9f355585e86f3e8", "4e71dc1ccb2549a9aec5a8e757ac8652", "82d98705bcc946f7a1bde908adf2e05a", "e153584bd9dc4d7a9721653eca2d241d", "ced24d5f536245f482f3b9148713d800", "1975c603e767497284d45a3a32efe6b2", "055a7636dc3a4e37ba727b12c5c6d88d", "b22fa2a33b3b4a4187b3f5603dbe29b8", "675565e845f6405689ae4a036912194b", "6e440765d43f41619a9f3868899786da", "ffcc321860484e499fe846375cff1986", "d68f76749b9e4053ab38a9e13e524f48", "216655518d0f4757899237c80fc3733f", "c5312e241ec4441982d8a4852510c08b", "05c4655adc8f4394bfd02794eb36ca6f", "16ec1f130e96499aafb94ef8807512d4", "ac90f9f3291f46aeb4194aa40c29d276", "4ddb3e4f4be3425a8e884c8c14f99d1a" ] }, "id": "M7DoqQMlM_nW", "outputId": "bd5d0dce-8e2d-485c-e696-9128432ff4d3" }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "4caa693261bc49f2adfa307db2cc32cd", "version_major": 2, "version_minor": 0 }, "text/plain": [ "tokenizer_config.json: 0%| | 0.00/1.29k [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "849d2a1ef1ce4c18a77fb942ae93e8b1", "version_major": 2, "version_minor": 0 }, "text/plain": [ "tokenizer.model: 0%| | 0.00/500k [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "35e14dfebc8843e7a662e039ea6083dc", "version_major": 2, "version_minor": 0 }, "text/plain": [ "tokenizer.json: 0%| | 0.00/1.84M [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "2b796db373454d36a63f7b849b18bb2f", "version_major": 2, "version_minor": 0 }, "text/plain": [ "special_tokens_map.json: 0%| | 0.00/551 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "18e26427c5c5458892a8ed813dca6ea4", "version_major": 2, "version_minor": 0 }, "text/plain": [ "config.json: 0%| | 0.00/608 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b89a280b9d224e63b6fd6e66ac0e2a27", "version_major": 2, "version_minor": 0 }, "text/plain": [ "model.safetensors: 0%| | 0.00/2.20G [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b22fa2a33b3b4a4187b3f5603dbe29b8", "version_major": 2, "version_minor": 0 }, "text/plain": [ "generation_config.json: 0%| | 0.00/124 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "seed=14\n", "\n", "if (prev_checkpoint != None):\n", " try:\n", " download_checkpoint(out_name, prev_checkpoint)\n", " except:\n", " pass\n", "tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, token=READ_TOKEN, map_device=\"auto\", add_eos_token=True, use_fast=True)\n", "\n", "new_tokens = {'additional_special_tokens': ['[SQL]','[/SQL]', '[QUESTION]','[/QUESTION]']}\n", "#adicionar tokens especiais:\n", "# if (prev_checkpoint == None):\n", "# tokenizer.add_special_tokens(new_tokens)\n", "\n", "\n", "if torch.cuda.is_bf16_supported():\n", " compute_dtype = torch.bfloat16\n", " attn_implementation = 'flash_attention_2'\n", "else:\n", " compute_dtype = torch.float16\n", " attn_implementation = 'sdpa'\n", "\n", "tokenizer.pad_token = tokenizer.eos_token\n", "tokenizer.padding_side = \"right\"\n", "\n", "\n", "bnb_config = BitsAndBytesConfig(\n", " load_in_4bit=True,\n", " bnb_4bit_quant_type=\"nf4\",\n", " bnb_4bit_compute_dtype=compute_dtype,\n", " bnb_4bit_use_double_quant=False,\n", ")\n", "\n", "model = AutoModelForCausalLM.from_pretrained(\n", " model_name,\n", " torch_dtype=compute_dtype,\n", " device_map=\"auto\",\n", " quantization_config=bnb_config,\n", "\n", " trust_remote_code=True,\n", " token=READ_TOKEN,\n", " # attn_implementation=attn_implementation\n", ")\n", "\n", "# se adicionar special_tokens tem que fazer resize do tokenizer:\n", "# model.resize_token_embeddings(len(tokenizer))\n", "\n", "## model.resize_token_embeddings(max(len(tokenizer), model.config.vocab_size))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "fLuqzhSJBvi8", "outputId": "65293049-fcc8-491b-aaca-579bca0686c7" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`\n", "The new lm_head weights will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`\n" ] }, { "data": { "text/plain": [ "Embedding(32004, 2048)" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# se adicionar special_tokens tem que fazer resize do tokenizer:\n", "#model.resize_token_embeddings(len(tokenizer))\n", "\n", "#model.resize_token_embeddings(max(len(tokenizer), model.config.vocab_size))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "EZcGlhE5jW4L", "outputId": "6e4648c3-e5d0-4cf9-e3d1-1534dccea1d8" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{% for message in messages %}\n", "{% if message['role'] == 'user' %}\n", "{{ '<|user|>\n", "' + message['content'] + eos_token }}\n", "{% elif message['role'] == 'system' %}\n", "{{ '<|system|>\n", "' + message['content'] + eos_token }}\n", "{% elif message['role'] == 'assistant' %}\n", "{{ '<|assistant|>\n", "' + message['content'] + eos_token }}\n", "{% endif %}\n", "{% if loop.last and add_generation_prompt %}\n", "{{ '<|assistant|>' }}\n", "{% endif %}\n", "{% endfor %}\n" ] } ], "source": [ "print(tokenizer.chat_template)" ] }, { "cell_type": "markdown", "metadata": { "id": "leGEqxURAL4r" }, "source": [ "#### Chat Template - Schema Linking" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "Jiq-S5I1KOBr" }, "outputs": [], "source": [ "# tokenizer.chat_template = \"\"\"\n", "# {% if messages[0]['role'] == 'system' %}\n", "# {% set loop_messages = messages[1:] %}\n", "# {% set system_message = messages[0]['content'] %}\n", "# {% else %}\n", "# {% set loop_messages = messages %}\n", "# {% set system_message = 'You are an SQL expert tasked with answering questions about an SQLite database./n/Your responses must always consist of valid SQL code and only that.' %}\n", "# {% endif %}\n", "# {{ '# <|system|>/n/' + system_message + '/n//n/' }}\n", "# {% if messages|selectattr(\"role\", \"equalto\", \"example\")|list %}\n", "# Below are some examples of question and their corresponding SQL queries:/n//n/\n", "# {% else %}\n", "# /n/\n", "# {% endif %}\n", "# {% for message in loop_messages %}\n", "# {% if message['role'] == 'example' %}\n", "# {{ message['content'] }}/n//n/\n", "# {% elif message['role'] == 'schema' %}\n", "# # <|schema|>/n/The query will run on a database with the following schema:/n/{{ message['content'] }}/n//n/\n", "# {% elif message['role'] == 'user' %}\n", "# # <|user|>/n/[QUESTION]{{ message['content'] }}[/QUESTION]/n//n/\n", "# {% elif message['role'] == 'assistant' %}\n", "# # <|assistant|>/n/[SQL]{{ message['content'] }}[/SQL]\n", "# {% endif %}\n", "# {% endfor %}\n", "# {% if add_generation_prompt %}\n", "# # <|assistant|>/n/[SQL]\n", "# {% endif %}\n", "# \"\"\".replace(\"\\n\",\"\").replace(\" \", \"\").replace(\"/n/\", \"\\n\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "aHGTJNq2AmUs" }, "outputs": [], "source": [ "def to_sql(query):\n", " return sqlparse.format(query, reindent=True, keyword_case='upper')\n", "\n", "def replace_last_sql_content(text, new_content):\n", " matches = list(re.finditer(r'```sql (.*?)```', text, re.DOTALL))\n", " if not matches:\n", " return text\n", "\n", " last_match = matches[-1]\n", "\n", " start, end = last_match.span(1)\n", " return text[:start] + new_content + text[end:]\n", "\n", "# def apply_template(row, tokenizer=tokenizer, n_examplea=0):\n", "# question = row['question_en']\n", "# schema = row['schema_llm']\n", "# sql = to_sql(row['query_llm'])\n", "\n", "# system = \"Given a user question and the schema of a database, your task is to generate an SQL query that accurately answers the question based on the provided schema.\"\n", "\n", "\n", "# chat = [{'role': 'user', 'content': f\"# System:\\n{system}\\n\\n# Schema:\\n```sql\\n{schema}\\n```\\n\\n# Question: {question}\"},\n", "# {'role': 'assistant', 'content': f\"```sql\\n{sql}\\n```\"}\n", "# ]\n", "\n", "# row['text'] = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=False)\n", "\n", "# return row\n", "\n", "def apply_template(row, tokenizer=tokenizer, n_examplea=0):\n", " question = row['question_en']\n", " schema = row['schema_llm_t']\n", " schema_linking = to_sql(row['selector_correct'])\n", "\n", " system = \"Given a user question and the schema of a database, your task is to generate an JSON with the the names of tables and columns of the schema that the question is referring to.\"\n", "\n", " chat = [{'role': 'user', 'content': f\"# System:\\n{system}\\n\\n# Schema:\\n```sql\\n{schema}\\n```\\n\\n# Question: {question}\"},\n", " {'role': 'assistant', 'content': f\"```json\\n{schema_linking}\\n```\"}\n", " ]\n", "\n", " row['text'] = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=False)\n", "\n", " return row" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "QlA1kMj83Vql" }, "outputs": [], "source": [ "# spider_chain = json.load(open(\"/content/drive/Shareddrives/LLMs/Datasets/spider/spider_chain.json\", \"r\"))\n", "# bird_chain = json.load(open(\"/content/drive/Shareddrives/LLMs/Datasets/bird/bird_chain.json\", \"r\"))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "pCcD4TiC3rpb" }, "outputs": [], "source": [ "# df['chain'] = spider_chain + bird_chain" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "yvcMZAL3E3TB" }, "outputs": [], "source": [ "df = df.apply(apply_template, axis=1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 539 }, "id": "rQ0KUl2w58K9", "outputId": "664b4e86-fe9f-45ee-bd2b-ea021a31b08f" }, "outputs": [ { "data": { "application/vnd.google.colaboratory.intrinsic+json": { "summary": "{\n \"name\": \"df\",\n \"rows\": 8656,\n \"fields\": [\n {\n \"column\": \"db_id\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 146,\n \"samples\": [\n \"gas_company\",\n \"customers_campaigns_ecommerce\",\n \"performance_attendance\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"schema\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 146,\n \"samples\": [\n \"\\nCREATE TABLE company (\\n\\t\\\"Company_ID\\\" INTEGER, \\n\\t\\\"Rank\\\" INTEGER, \\n\\t\\\"Company\\\" TEXT, \\n\\t\\\"Headquarters\\\" TEXT, \\n\\t\\\"Main_Industry\\\" TEXT, \\n\\t\\\"Sales_billion\\\" REAL, \\n\\t\\\"Profits_billion\\\" REAL, \\n\\t\\\"Assets_billion\\\" REAL, \\n\\t\\\"Market_Value\\\" REAL, \\n\\tPRIMARY KEY (\\\"Company_ID\\\")\\n)\\n\\n\\nCREATE TABLE gas_station (\\n\\t\\\"Station_ID\\\" INTEGER, \\n\\t\\\"Open_Year\\\" INTEGER, \\n\\t\\\"Location\\\" TEXT, \\n\\t\\\"Manager_Name\\\" TEXT, \\n\\t\\\"Vice_Manager_Name\\\" TEXT, \\n\\t\\\"Representative_Name\\\" TEXT, \\n\\tPRIMARY KEY (\\\"Station_ID\\\")\\n)\\n\\n\\nCREATE TABLE station_company (\\n\\t\\\"Station_ID\\\" INTEGER, \\n\\t\\\"Company_ID\\\" INTEGER, \\n\\t\\\"Rank_of_the_Year\\\" INTEGER, \\n\\tPRIMARY KEY (\\\"Station_ID\\\", \\\"Company_ID\\\"), \\n\\tFOREIGN KEY(\\\"Company_ID\\\") REFERENCES company (\\\"Company_ID\\\"), \\n\\tFOREIGN KEY(\\\"Station_ID\\\") REFERENCES gas_station (\\\"Station_ID\\\")\\n)\",\n \"\\nCREATE TABLE \\\"Customer_Addresses\\\" (\\n\\tcustomer_id INTEGER NOT NULL, \\n\\tpremise_id INTEGER NOT NULL, \\n\\tdate_address_from DATETIME NOT NULL, \\n\\taddress_type_code VARCHAR(15) NOT NULL, \\n\\tdate_address_to DATETIME, \\n\\tFOREIGN KEY(customer_id) REFERENCES \\\"Customers\\\" (customer_id), \\n\\tFOREIGN KEY(premise_id) REFERENCES \\\"Premises\\\" (premise_id)\\n)\\n\\n\\nCREATE TABLE \\\"Customer_Orders\\\" (\\n\\torder_id INTEGER, \\n\\tcustomer_id INTEGER NOT NULL, \\n\\torder_status_code VARCHAR(15) NOT NULL, \\n\\tshipping_method_code VARCHAR(15) NOT NULL, \\n\\torder_placed_datetime DATETIME NOT NULL, \\n\\torder_delivered_datetime DATETIME, \\n\\torder_shipping_charges VARCHAR(255), \\n\\tPRIMARY KEY (order_id), \\n\\tFOREIGN KEY(customer_id) REFERENCES \\\"Customers\\\" (customer_id)\\n)\\n\\n\\nCREATE TABLE \\\"Customers\\\" (\\n\\tcustomer_id INTEGER, \\n\\tpayment_method VARCHAR(15) NOT NULL, \\n\\tcustomer_name VARCHAR(80), \\n\\tcustomer_phone VARCHAR(80), \\n\\tcustomer_email VARCHAR(80), \\n\\tcustomer_address VARCHAR(255), \\n\\tcustomer_login VARCHAR(80), \\n\\tcustomer_password VARCHAR(10), \\n\\tPRIMARY KEY (customer_id)\\n)\\n\\n\\nCREATE TABLE \\\"Mailshot_Campaigns\\\" (\\n\\tmailshot_id INTEGER, \\n\\tproduct_category VARCHAR(15), \\n\\tmailshot_name VARCHAR(80), \\n\\tmailshot_start_date DATETIME, \\n\\tmailshot_end_date DATETIME, \\n\\tPRIMARY KEY (mailshot_id)\\n)\\n\\n\\nCREATE TABLE \\\"Mailshot_Customers\\\" (\\n\\tmailshot_id INTEGER NOT NULL, \\n\\tcustomer_id INTEGER NOT NULL, \\n\\toutcome_code VARCHAR(15) NOT NULL, \\n\\tmailshot_customer_date DATETIME, \\n\\tFOREIGN KEY(mailshot_id) REFERENCES \\\"Mailshot_Campaigns\\\" (mailshot_id), \\n\\tFOREIGN KEY(customer_id) REFERENCES \\\"Customers\\\" (customer_id)\\n)\\n\\n\\nCREATE TABLE \\\"Order_Items\\\" (\\n\\titem_id INTEGER NOT NULL, \\n\\torder_item_status_code VARCHAR(15) NOT NULL, \\n\\torder_id INTEGER NOT NULL, \\n\\tproduct_id INTEGER NOT NULL, \\n\\titem_status_code VARCHAR(15), \\n\\titem_delivered_datetime DATETIME, \\n\\titem_order_quantity VARCHAR(80), \\n\\tFOREIGN KEY(order_id) REFERENCES \\\"Customer_Orders\\\" (order_id), \\n\\tFOREIGN KEY(product_id) REFERENCES \\\"Products\\\" (product_id)\\n)\\n\\n\\nCREATE TABLE \\\"Premises\\\" (\\n\\tpremise_id INTEGER, \\n\\tpremises_type VARCHAR(15) NOT NULL, \\n\\tpremise_details VARCHAR(255), \\n\\tPRIMARY KEY (premise_id)\\n)\\n\\n\\nCREATE TABLE \\\"Products\\\" (\\n\\tproduct_id INTEGER, \\n\\tproduct_category VARCHAR(15) NOT NULL, \\n\\tproduct_name VARCHAR(80), \\n\\tPRIMARY KEY (product_id)\\n)\",\n \"\\nCREATE TABLE member (\\n\\t\\\"Member_ID\\\" TEXT, \\n\\t\\\"Name\\\" TEXT, \\n\\t\\\"Nationality\\\" TEXT, \\n\\t\\\"Role\\\" TEXT, \\n\\tPRIMARY KEY (\\\"Member_ID\\\")\\n)\\n\\n\\nCREATE TABLE member_attendance (\\n\\t\\\"Member_ID\\\" INTEGER, \\n\\t\\\"Performance_ID\\\" INTEGER, \\n\\t\\\"Num_of_Pieces\\\" INTEGER, \\n\\tPRIMARY KEY (\\\"Member_ID\\\", \\\"Performance_ID\\\"), \\n\\tFOREIGN KEY(\\\"Performance_ID\\\") REFERENCES performance (\\\"Performance_ID\\\"), \\n\\tFOREIGN KEY(\\\"Member_ID\\\") REFERENCES member (\\\"Member_ID\\\")\\n)\\n\\n\\nCREATE TABLE performance (\\n\\t\\\"Performance_ID\\\" REAL, \\n\\t\\\"Date\\\" TEXT, \\n\\t\\\"Host\\\" TEXT, \\n\\t\\\"Location\\\" TEXT, \\n\\t\\\"Attendance\\\" INTEGER, \\n\\tPRIMARY KEY (\\\"Performance_ID\\\")\\n)\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"question_en\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 8618,\n \"samples\": [\n \"What are the names of courses without prerequisites?\",\n \"Return the investor who have invested in the greatest number of entrepreneurs.\",\n \"Which customers have ever canceled the purchase of the product \\\"food\\\" (the item status is \\\"Cancel\\\")?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"hardness\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"medium\",\n \"extra\",\n \"easy\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"schema_llm\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 146,\n \"samples\": [\n \"Table company (\\n company.Company_ID (INT),\\n company.Rank (INT),\\n company.Company (TEXT),\\n company.Headquarters (TEXT),\\n company.Main_Industry (TEXT),\\n company.Sales_billion (REAL),\\n company.Profits_billion (REAL),\\n company.Assets_billion (REAL),\\n company.Market_Value (REAL),\\n)\\n\\nTable gas_station (\\n gas_station.Station_ID (INT),\\n gas_station.Open_Year (INT),\\n gas_station.Location (TEXT),\\n gas_station.Manager_Name (TEXT),\\n gas_station.Vice_Manager_Name (TEXT),\\n gas_station.Representative_Name (TEXT),\\n)\\n\\nTable station_company (\\n station_company.Station_ID (INT),\\n station_company.Company_ID (INT),\\n station_company.Rank_of_the_Year (INT),\\n)\\n\\n\\nPossible JOINs:\\n\\nstation_company.Station_ID = gas_station.Station_ID\\nstation_company.Company_ID = company.Company_ID\\n\",\n \"Table Customer_Addresses (\\n Customer_Addresses.customer_id (INTEGER),\\n Customer_Addresses.premise_id (INTEGER),\\n Customer_Addresses.date_address_from (DATETIME),\\n Customer_Addresses.address_type_code (VARCHAR(15)),\\n Customer_Addresses.date_address_to (DATETIME),\\n)\\n\\nTable Customer_Orders (\\n Customer_Orders.order_id (INTEGER),\\n Customer_Orders.customer_id (INTEGER),\\n Customer_Orders.order_status_code (VARCHAR(15)),\\n Customer_Orders.shipping_method_code (VARCHAR(15)),\\n Customer_Orders.order_placed_datetime (DATETIME),\\n Customer_Orders.order_delivered_datetime (DATETIME),\\n Customer_Orders.order_shipping_charges (VARCHAR(255)),\\n)\\n\\nTable Customers (\\n Customers.customer_id (INTEGER),\\n Customers.payment_method (VARCHAR(15)),\\n Customers.customer_name (VARCHAR(80)),\\n Customers.customer_phone (VARCHAR(80)),\\n Customers.customer_email (VARCHAR(80)),\\n Customers.customer_address (VARCHAR(255)),\\n Customers.customer_login (VARCHAR(80)),\\n Customers.customer_password (VARCHAR(10)),\\n)\\n\\nTable Mailshot_Campaigns (\\n Mailshot_Campaigns.mailshot_id (INTEGER),\\n Mailshot_Campaigns.product_category (VARCHAR(15)),\\n Mailshot_Campaigns.mailshot_name (VARCHAR(80)),\\n Mailshot_Campaigns.mailshot_start_date (DATETIME),\\n Mailshot_Campaigns.mailshot_end_date (DATETIME),\\n)\\n\\nTable Mailshot_Customers (\\n Mailshot_Customers.mailshot_id (INTEGER),\\n Mailshot_Customers.customer_id (INTEGER),\\n Mailshot_Customers.outcome_code (VARCHAR(15)),\\n Mailshot_Customers.mailshot_customer_date (DATETIME),\\n)\\n\\nTable Order_Items (\\n Order_Items.item_id (INTEGER),\\n Order_Items.order_item_status_code (VARCHAR(15)),\\n Order_Items.order_id (INTEGER),\\n Order_Items.product_id (INTEGER),\\n Order_Items.item_status_code (VARCHAR(15)),\\n Order_Items.item_delivered_datetime (DATETIME),\\n Order_Items.item_order_quantity (VARCHAR(80)),\\n)\\n\\nTable Premises (\\n Premises.premise_id (INTEGER),\\n Premises.premises_type (VARCHAR(15)),\\n Premises.premise_details (VARCHAR(255)),\\n)\\n\\nTable Products (\\n Products.product_id (INTEGER),\\n Products.product_category (VARCHAR(15)),\\n Products.product_name (VARCHAR(80)),\\n)\\n\\n\\nPossible JOINs:\\n\\nCustomer_Addresses.customer_id = Customers.customer_id\\nCustomer_Addresses.premise_id = Premises.premise_id\\n\\nCustomer_Orders.customer_id = Customers.customer_id\\n\\nMailshot_Customers.mailshot_id = Mailshot_Campaigns.mailshot_id\\nMailshot_Customers.customer_id = Customers.customer_id\\n\\nOrder_Items.order_id = Customer_Orders.order_id\\nOrder_Items.product_id = Products.product_id\\n\",\n \"Table member (\\n member.Member_ID (TEXT),\\n member.Name (TEXT),\\n member.Nationality (TEXT),\\n member.Role (TEXT),\\n)\\n\\nTable member_attendance (\\n member_attendance.Member_ID (INT),\\n member_attendance.Performance_ID (INT),\\n member_attendance.Num_of_Pieces (INT),\\n)\\n\\nTable performance (\\n performance.Performance_ID (REAL),\\n performance.Date (TEXT),\\n performance.Host (TEXT),\\n performance.Location (TEXT),\\n performance.Attendance (INT),\\n)\\n\\n\\nPossible JOINs:\\n\\nmember_attendance.Member_ID = member.Member_ID\\nmember_attendance.Performance_ID = performance.Performance_ID\\n\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"query_llm\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4712,\n \"samples\": [\n \"SELECT manager_name FROM gas_station WHERE open_year > 2000 GROUP BY manager_name ORDER BY count(*) DESC LIMIT 1\",\n \"SELECT lname FROM authors WHERE fname = \\\"Amal\\\"\",\n \"SELECT * FROM swimmer\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"selector\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3358,\n \"samples\": [\n \"{\\n 'rating': ['mid', 'stars'],\\n 'movie': ['mid', 'title', 'director']\\n}\",\n \"{\\n 'player': ['birth_country', 'bats']\\n}\",\n \"{\\n 'bank': ['branch_id', 'city']\\n}\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"schema_llm_ct\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 147,\n \"samples\": [\n \"CREATE TABLE country (\\n country.Country_ID (INT),\\n country.Name (TEXT),\\n country.Population (INT),\\n country.Area (INT),\\n country.Languages (TEXT),\\n)\\n\\nCREATE TABLE roller_coaster (\\n roller_coaster.Roller_Coaster_ID (INT),\\n roller_coaster.Name (TEXT),\\n roller_coaster.Park (TEXT),\\n roller_coaster.Country_ID (INT),\\n roller_coaster.Length (REAL),\\n roller_coaster.Height (REAL),\\n roller_coaster.Speed (TEXT),\\n roller_coaster.Opened (TEXT),\\n roller_coaster.Status (TEXT),\\n)\\n\\n\\nPossible JOINs:\\n\\nroller_coaster.Country_ID = country.Country_ID\\n\",\n \"CREATE TABLE circuits (\\n circuits.circuitId (INTEGER),\\n circuits.circuitRef (TEXT),\\n circuits.name (TEXT),\\n circuits.location (TEXT),\\n circuits.country (TEXT),\\n circuits.lat (REAL),\\n circuits.lng (REAL),\\n circuits.alt (TEXT),\\n circuits.url (TEXT),\\n)\\n\\nCREATE TABLE constructorResults (\\n constructorResults.constructorResultsId (INTEGER),\\n constructorResults.raceId (INTEGER),\\n constructorResults.constructorId (INTEGER),\\n constructorResults.points (REAL),\\n constructorResults.status (TEXT),\\n)\\n\\nCREATE TABLE constructorStandings (\\n constructorStandings.constructorStandingsId (INTEGER),\\n constructorStandings.raceId (INTEGER),\\n constructorStandings.constructorId (INTEGER),\\n constructorStandings.points (REAL),\\n constructorStandings.position (INTEGER),\\n constructorStandings.positionText (TEXT),\\n constructorStandings.wins (INTEGER),\\n)\\n\\nCREATE TABLE constructors (\\n constructors.constructorId (INTEGER),\\n constructors.constructorRef (TEXT),\\n constructors.name (TEXT),\\n constructors.nationality (TEXT),\\n constructors.url (TEXT),\\n)\\n\\nCREATE TABLE driverStandings (\\n driverStandings.driverStandingsId (INTEGER),\\n driverStandings.raceId (INTEGER),\\n driverStandings.driverId (INTEGER),\\n driverStandings.points (REAL),\\n driverStandings.position (INTEGER),\\n driverStandings.positionText (TEXT),\\n driverStandings.wins (INTEGER),\\n)\\n\\nCREATE TABLE drivers (\\n drivers.driverId (INTEGER),\\n drivers.driverRef (TEXT),\\n drivers.number (TEXT),\\n drivers.code (TEXT),\\n drivers.forename (TEXT),\\n drivers.surname (TEXT),\\n drivers.dob (TEXT),\\n drivers.nationality (TEXT),\\n drivers.url (TEXT),\\n)\\n\\nCREATE TABLE lapTimes (\\n lapTimes.raceId (INTEGER),\\n lapTimes.driverId (INTEGER),\\n lapTimes.lap (INTEGER),\\n lapTimes.position (INTEGER),\\n lapTimes.time (TEXT),\\n lapTimes.milliseconds (INTEGER),\\n)\\n\\nCREATE TABLE pitStops (\\n pitStops.raceId (INTEGER),\\n pitStops.driverId (INTEGER),\\n pitStops.stop (INTEGER),\\n pitStops.lap (INTEGER),\\n pitStops.time (TEXT),\\n pitStops.duration (TEXT),\\n pitStops.milliseconds (INTEGER),\\n)\\n\\nCREATE TABLE qualifying (\\n qualifying.qualifyId (INTEGER),\\n qualifying.raceId (INTEGER),\\n qualifying.driverId (INTEGER),\\n qualifying.constructorId (INTEGER),\\n qualifying.number (INTEGER),\\n qualifying.position (INTEGER),\\n qualifying.q1 (TEXT),\\n qualifying.q2 (TEXT),\\n qualifying.q3 (TEXT),\\n)\\n\\nCREATE TABLE races (\\n races.raceId (INTEGER),\\n races.year (INTEGER),\\n races.round (INTEGER),\\n races.circuitId (INTEGER),\\n races.name (TEXT),\\n races.date (TEXT),\\n races.time (TEXT),\\n races.url (TEXT),\\n)\\n\\nCREATE TABLE results (\\n results.resultId (INTEGER),\\n results.raceId (INTEGER),\\n results.driverId (INTEGER),\\n results.constructorId (INTEGER),\\n results.number (INTEGER),\\n results.grid (INTEGER),\\n results.position (TEXT),\\n results.positionText (TEXT),\\n results.positionOrder (INTEGER),\\n results.points (REAL),\\n results.laps (TEXT),\\n results.time (TEXT),\\n results.milliseconds (TEXT),\\n results.fastestLap (TEXT),\\n results.rank (TEXT),\\n results.fastestLapTime (TEXT),\\n results.fastestLapSpeed (TEXT),\\n results.statusId (INTEGER),\\n)\\n\\nCREATE TABLE seasons (\\n seasons.year (INTEGER),\\n seasons.url (TEXT),\\n)\\n\\nCREATE TABLE status (\\n status.statusId (INTEGER),\\n status.status (TEXT),\\n)\\n\\n\\nPossible JOINs:\\n\\nconstructorResults.raceId = races.raceId\\nconstructorResults.constructorId = constructors.constructorId\\n\\nconstructorStandings.raceId = races.raceId\\nconstructorStandings.constructorId = constructors.constructorId\\n\\ndriverStandings.raceId = races.raceId\\ndriverStandings.driverId = drivers.driverId\\n\\nlapTimes.raceId = races.raceId\\nlapTimes.driverId = drivers.driverId\\n\\npitStops.raceId = races.raceId\\npitStops.driverId = drivers.driverId\\n\\nqualifying.raceId = races.raceId\\nqualifying.driverId = drivers.driverId\\nqualifying.constructorId = constructors.constructorId\\n\\nraces.circuitId = circuits.circuitId\\n\\nresults.raceId = races.raceId\\nresults.driverId = drivers.driverId\\nresults.constructorId = constructors.constructorId\\n\",\n \"CREATE TABLE Customers (\\n Customers.customer_id (INTEGER),\\n Customers.customer_name (VARCHAR(80)),\\n Customers.customer_details (VARCHAR(255)),\\n)\\n\\nCREATE TABLE Invoices (\\n Invoices.invoice_number (INTEGER),\\n Invoices.invoice_date (DATETIME),\\n Invoices.invoice_details (VARCHAR(255)),\\n)\\n\\nCREATE TABLE Order_Items (\\n Order_Items.order_item_id (INTEGER),\\n Order_Items.product_id (INTEGER),\\n Order_Items.order_id (INTEGER),\\n Order_Items.order_item_status (VARCHAR(10)),\\n Order_Items.order_item_details (VARCHAR(255)),\\n)\\n\\nCREATE TABLE Orders (\\n Orders.order_id (INTEGER),\\n Orders.customer_id (INTEGER),\\n Orders.order_status (VARCHAR(10)),\\n Orders.date_order_placed (DATETIME),\\n Orders.order_details (VARCHAR(255)),\\n)\\n\\nCREATE TABLE Products (\\n Products.product_id (INTEGER),\\n Products.product_name (VARCHAR(80)),\\n Products.product_details (VARCHAR(255)),\\n)\\n\\nCREATE TABLE Shipment_Items (\\n Shipment_Items.shipment_id (INTEGER),\\n Shipment_Items.order_item_id (INTEGER),\\n)\\n\\nCREATE TABLE Shipments (\\n Shipments.shipment_id (INTEGER),\\n Shipments.order_id (INTEGER),\\n Shipments.invoice_number (INTEGER),\\n Shipments.shipment_tracking_number (VARCHAR(80)),\\n Shipments.shipment_date (DATETIME),\\n Shipments.other_shipment_details (VARCHAR(255)),\\n)\\n\\n\\nPossible JOINs:\\n\\nOrder_Items.product_id = Products.product_id\\nOrder_Items.order_id = Orders.order_id\\n\\nOrders.customer_id = Customers.customer_id\\n\\nShipment_Items.shipment_id = Shipments.shipment_id\\nShipment_Items.order_item_id = Order_Items.order_item_id\\n\\nShipments.order_id = Orders.order_id\\nShipments.invoice_number = Invoices.invoice_number\\n\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"schema_llm_columns_min\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 147,\n \"samples\": [\n \"Table country (\\n country.country_id (INT),\\n country.name (TEXT),\\n country.population (INT),\\n country.area (INT),\\n country.languages (TEXT),\\n)\\n\\nTable roller_coaster (\\n roller_coaster.roller_coaster_id (INT),\\n roller_coaster.name (TEXT),\\n roller_coaster.park (TEXT),\\n roller_coaster.country_id (INT),\\n roller_coaster.length (REAL),\\n roller_coaster.height (REAL),\\n roller_coaster.speed (TEXT),\\n roller_coaster.opened (TEXT),\\n roller_coaster.status (TEXT),\\n)\\n\\n\\nPossible JOINs:\\n\\nroller_coaster.country_id = country.country_id\\n\",\n \"Table circuits (\\n circuits.circuitid (INTEGER),\\n circuits.circuitref (TEXT),\\n circuits.name (TEXT),\\n circuits.location (TEXT),\\n circuits.country (TEXT),\\n circuits.lat (REAL),\\n circuits.lng (REAL),\\n circuits.alt (TEXT),\\n circuits.url (TEXT),\\n)\\n\\nTable constructorResults (\\n constructorResults.constructorresultsid (INTEGER),\\n constructorResults.raceid (INTEGER),\\n constructorResults.constructorid (INTEGER),\\n constructorResults.points (REAL),\\n constructorResults.status (TEXT),\\n)\\n\\nTable constructorStandings (\\n constructorStandings.constructorstandingsid (INTEGER),\\n constructorStandings.raceid (INTEGER),\\n constructorStandings.constructorid (INTEGER),\\n constructorStandings.points (REAL),\\n constructorStandings.position (INTEGER),\\n constructorStandings.positiontext (TEXT),\\n constructorStandings.wins (INTEGER),\\n)\\n\\nTable constructors (\\n constructors.constructorid (INTEGER),\\n constructors.constructorref (TEXT),\\n constructors.name (TEXT),\\n constructors.nationality (TEXT),\\n constructors.url (TEXT),\\n)\\n\\nTable driverStandings (\\n driverStandings.driverstandingsid (INTEGER),\\n driverStandings.raceid (INTEGER),\\n driverStandings.driverid (INTEGER),\\n driverStandings.points (REAL),\\n driverStandings.position (INTEGER),\\n driverStandings.positiontext (TEXT),\\n driverStandings.wins (INTEGER),\\n)\\n\\nTable drivers (\\n drivers.driverid (INTEGER),\\n drivers.driverref (TEXT),\\n drivers.number (TEXT),\\n drivers.code (TEXT),\\n drivers.forename (TEXT),\\n drivers.surname (TEXT),\\n drivers.dob (TEXT),\\n drivers.nationality (TEXT),\\n drivers.url (TEXT),\\n)\\n\\nTable lapTimes (\\n lapTimes.raceid (INTEGER),\\n lapTimes.driverid (INTEGER),\\n lapTimes.lap (INTEGER),\\n lapTimes.position (INTEGER),\\n lapTimes.time (TEXT),\\n lapTimes.milliseconds (INTEGER),\\n)\\n\\nTable pitStops (\\n pitStops.raceid (INTEGER),\\n pitStops.driverid (INTEGER),\\n pitStops.stop (INTEGER),\\n pitStops.lap (INTEGER),\\n pitStops.time (TEXT),\\n pitStops.duration (TEXT),\\n pitStops.milliseconds (INTEGER),\\n)\\n\\nTable qualifying (\\n qualifying.qualifyid (INTEGER),\\n qualifying.raceid (INTEGER),\\n qualifying.driverid (INTEGER),\\n qualifying.constructorid (INTEGER),\\n qualifying.number (INTEGER),\\n qualifying.position (INTEGER),\\n qualifying.q1 (TEXT),\\n qualifying.q2 (TEXT),\\n qualifying.q3 (TEXT),\\n)\\n\\nTable races (\\n races.raceid (INTEGER),\\n races.year (INTEGER),\\n races.round (INTEGER),\\n races.circuitid (INTEGER),\\n races.name (TEXT),\\n races.date (TEXT),\\n races.time (TEXT),\\n races.url (TEXT),\\n)\\n\\nTable results (\\n results.resultid (INTEGER),\\n results.raceid (INTEGER),\\n results.driverid (INTEGER),\\n results.constructorid (INTEGER),\\n results.number (INTEGER),\\n results.grid (INTEGER),\\n results.position (TEXT),\\n results.positiontext (TEXT),\\n results.positionorder (INTEGER),\\n results.points (REAL),\\n results.laps (TEXT),\\n results.time (TEXT),\\n results.milliseconds (TEXT),\\n results.fastestlap (TEXT),\\n results.rank (TEXT),\\n results.fastestlaptime (TEXT),\\n results.fastestlapspeed (TEXT),\\n results.statusid (INTEGER),\\n)\\n\\nTable seasons (\\n seasons.year (INTEGER),\\n seasons.url (TEXT),\\n)\\n\\nTable status (\\n status.statusid (INTEGER),\\n status.status (TEXT),\\n)\\n\\n\\nPossible JOINs:\\n\\nconstructorResults.raceid = races.raceid\\nconstructorResults.constructorid = constructors.constructorid\\n\\nconstructorStandings.raceid = races.raceid\\nconstructorStandings.constructorid = constructors.constructorid\\n\\ndriverStandings.raceid = races.raceid\\ndriverStandings.driverid = drivers.driverid\\n\\nlapTimes.raceid = races.raceid\\nlapTimes.driverid = drivers.driverid\\n\\npitStops.raceid = races.raceid\\npitStops.driverid = drivers.driverid\\n\\nqualifying.raceid = races.raceid\\nqualifying.driverid = drivers.driverid\\nqualifying.constructorid = constructors.constructorid\\n\\nraces.circuitid = circuits.circuitid\\n\\nresults.raceid = races.raceid\\nresults.driverid = drivers.driverid\\nresults.constructorid = constructors.constructorid\\n\",\n \"Table Customers (\\n Customers.customer_id (INTEGER),\\n Customers.customer_name (VARCHAR(80)),\\n Customers.customer_details (VARCHAR(255)),\\n)\\n\\nTable Invoices (\\n Invoices.invoice_number (INTEGER),\\n Invoices.invoice_date (DATETIME),\\n Invoices.invoice_details (VARCHAR(255)),\\n)\\n\\nTable Order_Items (\\n Order_Items.order_item_id (INTEGER),\\n Order_Items.product_id (INTEGER),\\n Order_Items.order_id (INTEGER),\\n Order_Items.order_item_status (VARCHAR(10)),\\n Order_Items.order_item_details (VARCHAR(255)),\\n)\\n\\nTable Orders (\\n Orders.order_id (INTEGER),\\n Orders.customer_id (INTEGER),\\n Orders.order_status (VARCHAR(10)),\\n Orders.date_order_placed (DATETIME),\\n Orders.order_details (VARCHAR(255)),\\n)\\n\\nTable Products (\\n Products.product_id (INTEGER),\\n Products.product_name (VARCHAR(80)),\\n Products.product_details (VARCHAR(255)),\\n)\\n\\nTable Shipment_Items (\\n Shipment_Items.shipment_id (INTEGER),\\n Shipment_Items.order_item_id (INTEGER),\\n)\\n\\nTable Shipments (\\n Shipments.shipment_id (INTEGER),\\n Shipments.order_id (INTEGER),\\n Shipments.invoice_number (INTEGER),\\n Shipments.shipment_tracking_number (VARCHAR(80)),\\n Shipments.shipment_date (DATETIME),\\n Shipments.other_shipment_details (VARCHAR(255)),\\n)\\n\\n\\nPossible JOINs:\\n\\nOrder_Items.product_id = Products.product_id\\nOrder_Items.order_id = Orders.order_id\\n\\nOrders.customer_id = Customers.customer_id\\n\\nShipment_Items.shipment_id = Shipments.shipment_id\\nShipment_Items.order_item_id = Order_Items.order_item_id\\n\\nShipments.order_id = Orders.order_id\\nShipments.invoice_number = Invoices.invoice_number\\n\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"schema_llm_columns_min_ct\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 147,\n \"samples\": [\n \"CREATE TABLE country (\\n country.country_id (INT),\\n country.name (TEXT),\\n country.population (INT),\\n country.area (INT),\\n country.languages (TEXT),\\n)\\n\\nCREATE TABLE roller_coaster (\\n roller_coaster.roller_coaster_id (INT),\\n roller_coaster.name (TEXT),\\n roller_coaster.park (TEXT),\\n roller_coaster.country_id (INT),\\n roller_coaster.length (REAL),\\n roller_coaster.height (REAL),\\n roller_coaster.speed (TEXT),\\n roller_coaster.opened (TEXT),\\n roller_coaster.status (TEXT),\\n)\\n\\n\\nPossible JOINs:\\n\\nroller_coaster.country_id = country.country_id\\n\",\n \"CREATE TABLE circuits (\\n circuits.circuitid (INTEGER),\\n circuits.circuitref (TEXT),\\n circuits.name (TEXT),\\n circuits.location (TEXT),\\n circuits.country (TEXT),\\n circuits.lat (REAL),\\n circuits.lng (REAL),\\n circuits.alt (TEXT),\\n circuits.url (TEXT),\\n)\\n\\nCREATE TABLE constructorResults (\\n constructorResults.constructorresultsid (INTEGER),\\n constructorResults.raceid (INTEGER),\\n constructorResults.constructorid (INTEGER),\\n constructorResults.points (REAL),\\n constructorResults.status (TEXT),\\n)\\n\\nCREATE TABLE constructorStandings (\\n constructorStandings.constructorstandingsid (INTEGER),\\n constructorStandings.raceid (INTEGER),\\n constructorStandings.constructorid (INTEGER),\\n constructorStandings.points (REAL),\\n constructorStandings.position (INTEGER),\\n constructorStandings.positiontext (TEXT),\\n constructorStandings.wins (INTEGER),\\n)\\n\\nCREATE TABLE constructors (\\n constructors.constructorid (INTEGER),\\n constructors.constructorref (TEXT),\\n constructors.name (TEXT),\\n constructors.nationality (TEXT),\\n constructors.url (TEXT),\\n)\\n\\nCREATE TABLE driverStandings (\\n driverStandings.driverstandingsid (INTEGER),\\n driverStandings.raceid (INTEGER),\\n driverStandings.driverid (INTEGER),\\n driverStandings.points (REAL),\\n driverStandings.position (INTEGER),\\n driverStandings.positiontext (TEXT),\\n driverStandings.wins (INTEGER),\\n)\\n\\nCREATE TABLE drivers (\\n drivers.driverid (INTEGER),\\n drivers.driverref (TEXT),\\n drivers.number (TEXT),\\n drivers.code (TEXT),\\n drivers.forename (TEXT),\\n drivers.surname (TEXT),\\n drivers.dob (TEXT),\\n drivers.nationality (TEXT),\\n drivers.url (TEXT),\\n)\\n\\nCREATE TABLE lapTimes (\\n lapTimes.raceid (INTEGER),\\n lapTimes.driverid (INTEGER),\\n lapTimes.lap (INTEGER),\\n lapTimes.position (INTEGER),\\n lapTimes.time (TEXT),\\n lapTimes.milliseconds (INTEGER),\\n)\\n\\nCREATE TABLE pitStops (\\n pitStops.raceid (INTEGER),\\n pitStops.driverid (INTEGER),\\n pitStops.stop (INTEGER),\\n pitStops.lap (INTEGER),\\n pitStops.time (TEXT),\\n pitStops.duration (TEXT),\\n pitStops.milliseconds (INTEGER),\\n)\\n\\nCREATE TABLE qualifying (\\n qualifying.qualifyid (INTEGER),\\n qualifying.raceid (INTEGER),\\n qualifying.driverid (INTEGER),\\n qualifying.constructorid (INTEGER),\\n qualifying.number (INTEGER),\\n qualifying.position (INTEGER),\\n qualifying.q1 (TEXT),\\n qualifying.q2 (TEXT),\\n qualifying.q3 (TEXT),\\n)\\n\\nCREATE TABLE races (\\n races.raceid (INTEGER),\\n races.year (INTEGER),\\n races.round (INTEGER),\\n races.circuitid (INTEGER),\\n races.name (TEXT),\\n races.date (TEXT),\\n races.time (TEXT),\\n races.url (TEXT),\\n)\\n\\nCREATE TABLE results (\\n results.resultid (INTEGER),\\n results.raceid (INTEGER),\\n results.driverid (INTEGER),\\n results.constructorid (INTEGER),\\n results.number (INTEGER),\\n results.grid (INTEGER),\\n results.position (TEXT),\\n results.positiontext (TEXT),\\n results.positionorder (INTEGER),\\n results.points (REAL),\\n results.laps (TEXT),\\n results.time (TEXT),\\n results.milliseconds (TEXT),\\n results.fastestlap (TEXT),\\n results.rank (TEXT),\\n results.fastestlaptime (TEXT),\\n results.fastestlapspeed (TEXT),\\n results.statusid (INTEGER),\\n)\\n\\nCREATE TABLE seasons (\\n seasons.year (INTEGER),\\n seasons.url (TEXT),\\n)\\n\\nCREATE TABLE status (\\n status.statusid (INTEGER),\\n status.status (TEXT),\\n)\\n\\n\\nPossible JOINs:\\n\\nconstructorResults.raceid = races.raceid\\nconstructorResults.constructorid = constructors.constructorid\\n\\nconstructorStandings.raceid = races.raceid\\nconstructorStandings.constructorid = constructors.constructorid\\n\\ndriverStandings.raceid = races.raceid\\ndriverStandings.driverid = drivers.driverid\\n\\nlapTimes.raceid = races.raceid\\nlapTimes.driverid = drivers.driverid\\n\\npitStops.raceid = races.raceid\\npitStops.driverid = drivers.driverid\\n\\nqualifying.raceid = races.raceid\\nqualifying.driverid = drivers.driverid\\nqualifying.constructorid = constructors.constructorid\\n\\nraces.circuitid = circuits.circuitid\\n\\nresults.raceid = races.raceid\\nresults.driverid = drivers.driverid\\nresults.constructorid = constructors.constructorid\\n\",\n \"CREATE TABLE Customers (\\n Customers.customer_id (INTEGER),\\n Customers.customer_name (VARCHAR(80)),\\n Customers.customer_details (VARCHAR(255)),\\n)\\n\\nCREATE TABLE Invoices (\\n Invoices.invoice_number (INTEGER),\\n Invoices.invoice_date (DATETIME),\\n Invoices.invoice_details (VARCHAR(255)),\\n)\\n\\nCREATE TABLE Order_Items (\\n Order_Items.order_item_id (INTEGER),\\n Order_Items.product_id (INTEGER),\\n Order_Items.order_id (INTEGER),\\n Order_Items.order_item_status (VARCHAR(10)),\\n Order_Items.order_item_details (VARCHAR(255)),\\n)\\n\\nCREATE TABLE Orders (\\n Orders.order_id (INTEGER),\\n Orders.customer_id (INTEGER),\\n Orders.order_status (VARCHAR(10)),\\n Orders.date_order_placed (DATETIME),\\n Orders.order_details (VARCHAR(255)),\\n)\\n\\nCREATE TABLE Products (\\n Products.product_id (INTEGER),\\n Products.product_name (VARCHAR(80)),\\n Products.product_details (VARCHAR(255)),\\n)\\n\\nCREATE TABLE Shipment_Items (\\n Shipment_Items.shipment_id (INTEGER),\\n Shipment_Items.order_item_id (INTEGER),\\n)\\n\\nCREATE TABLE Shipments (\\n Shipments.shipment_id (INTEGER),\\n Shipments.order_id (INTEGER),\\n Shipments.invoice_number (INTEGER),\\n Shipments.shipment_tracking_number (VARCHAR(80)),\\n Shipments.shipment_date (DATETIME),\\n Shipments.other_shipment_details (VARCHAR(255)),\\n)\\n\\n\\nPossible JOINs:\\n\\nOrder_Items.product_id = Products.product_id\\nOrder_Items.order_id = Orders.order_id\\n\\nOrders.customer_id = Customers.customer_id\\n\\nShipment_Items.shipment_id = Shipments.shipment_id\\nShipment_Items.order_item_id = Order_Items.order_item_id\\n\\nShipments.order_id = Orders.order_id\\nShipments.invoice_number = Invoices.invoice_number\\n\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"schema_llm_all_min\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 147,\n \"samples\": [\n \"Table country (\\n country.country_id (INT),\\n country.name (TEXT),\\n country.population (INT),\\n country.area (INT),\\n country.languages (TEXT),\\n)\\n\\nTable roller_coaster (\\n roller_coaster.roller_coaster_id (INT),\\n roller_coaster.name (TEXT),\\n roller_coaster.park (TEXT),\\n roller_coaster.country_id (INT),\\n roller_coaster.length (REAL),\\n roller_coaster.height (REAL),\\n roller_coaster.speed (TEXT),\\n roller_coaster.opened (TEXT),\\n roller_coaster.status (TEXT),\\n)\\n\\n\\nPossible JOINs:\\n\\nroller_coaster.country_id = country.country_id\\n\",\n \"Table circuits (\\n circuits.circuitid (INTEGER),\\n circuits.circuitref (TEXT),\\n circuits.name (TEXT),\\n circuits.location (TEXT),\\n circuits.country (TEXT),\\n circuits.lat (REAL),\\n circuits.lng (REAL),\\n circuits.alt (TEXT),\\n circuits.url (TEXT),\\n)\\n\\nTable constructorresults (\\n constructorresults.constructorresultsid (INTEGER),\\n constructorresults.raceid (INTEGER),\\n constructorresults.constructorid (INTEGER),\\n constructorresults.points (REAL),\\n constructorresults.status (TEXT),\\n)\\n\\nTable constructorstandings (\\n constructorstandings.constructorstandingsid (INTEGER),\\n constructorstandings.raceid (INTEGER),\\n constructorstandings.constructorid (INTEGER),\\n constructorstandings.points (REAL),\\n constructorstandings.position (INTEGER),\\n constructorstandings.positiontext (TEXT),\\n constructorstandings.wins (INTEGER),\\n)\\n\\nTable constructors (\\n constructors.constructorid (INTEGER),\\n constructors.constructorref (TEXT),\\n constructors.name (TEXT),\\n constructors.nationality (TEXT),\\n constructors.url (TEXT),\\n)\\n\\nTable driverstandings (\\n driverstandings.driverstandingsid (INTEGER),\\n driverstandings.raceid (INTEGER),\\n driverstandings.driverid (INTEGER),\\n driverstandings.points (REAL),\\n driverstandings.position (INTEGER),\\n driverstandings.positiontext (TEXT),\\n driverstandings.wins (INTEGER),\\n)\\n\\nTable drivers (\\n drivers.driverid (INTEGER),\\n drivers.driverref (TEXT),\\n drivers.number (TEXT),\\n drivers.code (TEXT),\\n drivers.forename (TEXT),\\n drivers.surname (TEXT),\\n drivers.dob (TEXT),\\n drivers.nationality (TEXT),\\n drivers.url (TEXT),\\n)\\n\\nTable laptimes (\\n laptimes.raceid (INTEGER),\\n laptimes.driverid (INTEGER),\\n laptimes.lap (INTEGER),\\n laptimes.position (INTEGER),\\n laptimes.time (TEXT),\\n laptimes.milliseconds (INTEGER),\\n)\\n\\nTable pitstops (\\n pitstops.raceid (INTEGER),\\n pitstops.driverid (INTEGER),\\n pitstops.stop (INTEGER),\\n pitstops.lap (INTEGER),\\n pitstops.time (TEXT),\\n pitstops.duration (TEXT),\\n pitstops.milliseconds (INTEGER),\\n)\\n\\nTable qualifying (\\n qualifying.qualifyid (INTEGER),\\n qualifying.raceid (INTEGER),\\n qualifying.driverid (INTEGER),\\n qualifying.constructorid (INTEGER),\\n qualifying.number (INTEGER),\\n qualifying.position (INTEGER),\\n qualifying.q1 (TEXT),\\n qualifying.q2 (TEXT),\\n qualifying.q3 (TEXT),\\n)\\n\\nTable races (\\n races.raceid (INTEGER),\\n races.year (INTEGER),\\n races.round (INTEGER),\\n races.circuitid (INTEGER),\\n races.name (TEXT),\\n races.date (TEXT),\\n races.time (TEXT),\\n races.url (TEXT),\\n)\\n\\nTable results (\\n results.resultid (INTEGER),\\n results.raceid (INTEGER),\\n results.driverid (INTEGER),\\n results.constructorid (INTEGER),\\n results.number (INTEGER),\\n results.grid (INTEGER),\\n results.position (TEXT),\\n results.positiontext (TEXT),\\n results.positionorder (INTEGER),\\n results.points (REAL),\\n results.laps (TEXT),\\n results.time (TEXT),\\n results.milliseconds (TEXT),\\n results.fastestlap (TEXT),\\n results.rank (TEXT),\\n results.fastestlaptime (TEXT),\\n results.fastestlapspeed (TEXT),\\n results.statusid (INTEGER),\\n)\\n\\nTable seasons (\\n seasons.year (INTEGER),\\n seasons.url (TEXT),\\n)\\n\\nTable status (\\n status.statusid (INTEGER),\\n status.status (TEXT),\\n)\\n\\n\\nPossible JOINs:\\n\\nconstructorresults.raceid = races.raceid\\nconstructorresults.constructorid = constructors.constructorid\\n\\nconstructorstandings.raceid = races.raceid\\nconstructorstandings.constructorid = constructors.constructorid\\n\\ndriverstandings.raceid = races.raceid\\ndriverstandings.driverid = drivers.driverid\\n\\nlaptimes.raceid = races.raceid\\nlaptimes.driverid = drivers.driverid\\n\\npitstops.raceid = races.raceid\\npitstops.driverid = drivers.driverid\\n\\nqualifying.raceid = races.raceid\\nqualifying.driverid = drivers.driverid\\nqualifying.constructorid = constructors.constructorid\\n\\nraces.circuitid = circuits.circuitid\\n\\nresults.raceid = races.raceid\\nresults.driverid = drivers.driverid\\nresults.constructorid = constructors.constructorid\\n\",\n \"Table customers (\\n customers.customer_id (INTEGER),\\n customers.customer_name (VARCHAR(80)),\\n customers.customer_details (VARCHAR(255)),\\n)\\n\\nTable invoices (\\n invoices.invoice_number (INTEGER),\\n invoices.invoice_date (DATETIME),\\n invoices.invoice_details (VARCHAR(255)),\\n)\\n\\nTable order_items (\\n order_items.order_item_id (INTEGER),\\n order_items.product_id (INTEGER),\\n order_items.order_id (INTEGER),\\n order_items.order_item_status (VARCHAR(10)),\\n order_items.order_item_details (VARCHAR(255)),\\n)\\n\\nTable orders (\\n orders.order_id (INTEGER),\\n orders.customer_id (INTEGER),\\n orders.order_status (VARCHAR(10)),\\n orders.date_order_placed (DATETIME),\\n orders.order_details (VARCHAR(255)),\\n)\\n\\nTable products (\\n products.product_id (INTEGER),\\n products.product_name (VARCHAR(80)),\\n products.product_details (VARCHAR(255)),\\n)\\n\\nTable shipment_items (\\n shipment_items.shipment_id (INTEGER),\\n shipment_items.order_item_id (INTEGER),\\n)\\n\\nTable shipments (\\n shipments.shipment_id (INTEGER),\\n shipments.order_id (INTEGER),\\n shipments.invoice_number (INTEGER),\\n shipments.shipment_tracking_number (VARCHAR(80)),\\n shipments.shipment_date (DATETIME),\\n shipments.other_shipment_details (VARCHAR(255)),\\n)\\n\\n\\nPossible JOINs:\\n\\norder_items.product_id = products.product_id\\norder_items.order_id = orders.order_id\\n\\norders.customer_id = customers.customer_id\\n\\nshipment_items.shipment_id = shipments.shipment_id\\nshipment_items.order_item_id = order_items.order_item_id\\n\\nshipments.order_id = orders.order_id\\nshipments.invoice_number = invoices.invoice_number\\n\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"schema_llm_all_min_ct\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 147,\n \"samples\": [\n \"CREATE TABLE country (\\n country.country_id (INT),\\n country.name (TEXT),\\n country.population (INT),\\n country.area (INT),\\n country.languages (TEXT),\\n)\\n\\nCREATE TABLE roller_coaster (\\n roller_coaster.roller_coaster_id (INT),\\n roller_coaster.name (TEXT),\\n roller_coaster.park (TEXT),\\n roller_coaster.country_id (INT),\\n roller_coaster.length (REAL),\\n roller_coaster.height (REAL),\\n roller_coaster.speed (TEXT),\\n roller_coaster.opened (TEXT),\\n roller_coaster.status (TEXT),\\n)\\n\\n\\nPossible JOINs:\\n\\nroller_coaster.country_id = country.country_id\\n\",\n \"CREATE TABLE circuits (\\n circuits.circuitid (INTEGER),\\n circuits.circuitref (TEXT),\\n circuits.name (TEXT),\\n circuits.location (TEXT),\\n circuits.country (TEXT),\\n circuits.lat (REAL),\\n circuits.lng (REAL),\\n circuits.alt (TEXT),\\n circuits.url (TEXT),\\n)\\n\\nCREATE TABLE constructorresults (\\n constructorresults.constructorresultsid (INTEGER),\\n constructorresults.raceid (INTEGER),\\n constructorresults.constructorid (INTEGER),\\n constructorresults.points (REAL),\\n constructorresults.status (TEXT),\\n)\\n\\nCREATE TABLE constructorstandings (\\n constructorstandings.constructorstandingsid (INTEGER),\\n constructorstandings.raceid (INTEGER),\\n constructorstandings.constructorid (INTEGER),\\n constructorstandings.points (REAL),\\n constructorstandings.position (INTEGER),\\n constructorstandings.positiontext (TEXT),\\n constructorstandings.wins (INTEGER),\\n)\\n\\nCREATE TABLE constructors (\\n constructors.constructorid (INTEGER),\\n constructors.constructorref (TEXT),\\n constructors.name (TEXT),\\n constructors.nationality (TEXT),\\n constructors.url (TEXT),\\n)\\n\\nCREATE TABLE driverstandings (\\n driverstandings.driverstandingsid (INTEGER),\\n driverstandings.raceid (INTEGER),\\n driverstandings.driverid (INTEGER),\\n driverstandings.points (REAL),\\n driverstandings.position (INTEGER),\\n driverstandings.positiontext (TEXT),\\n driverstandings.wins (INTEGER),\\n)\\n\\nCREATE TABLE drivers (\\n drivers.driverid (INTEGER),\\n drivers.driverref (TEXT),\\n drivers.number (TEXT),\\n drivers.code (TEXT),\\n drivers.forename (TEXT),\\n drivers.surname (TEXT),\\n drivers.dob (TEXT),\\n drivers.nationality (TEXT),\\n drivers.url (TEXT),\\n)\\n\\nCREATE TABLE laptimes (\\n laptimes.raceid (INTEGER),\\n laptimes.driverid (INTEGER),\\n laptimes.lap (INTEGER),\\n laptimes.position (INTEGER),\\n laptimes.time (TEXT),\\n laptimes.milliseconds (INTEGER),\\n)\\n\\nCREATE TABLE pitstops (\\n pitstops.raceid (INTEGER),\\n pitstops.driverid (INTEGER),\\n pitstops.stop (INTEGER),\\n pitstops.lap (INTEGER),\\n pitstops.time (TEXT),\\n pitstops.duration (TEXT),\\n pitstops.milliseconds (INTEGER),\\n)\\n\\nCREATE TABLE qualifying (\\n qualifying.qualifyid (INTEGER),\\n qualifying.raceid (INTEGER),\\n qualifying.driverid (INTEGER),\\n qualifying.constructorid (INTEGER),\\n qualifying.number (INTEGER),\\n qualifying.position (INTEGER),\\n qualifying.q1 (TEXT),\\n qualifying.q2 (TEXT),\\n qualifying.q3 (TEXT),\\n)\\n\\nCREATE TABLE races (\\n races.raceid (INTEGER),\\n races.year (INTEGER),\\n races.round (INTEGER),\\n races.circuitid (INTEGER),\\n races.name (TEXT),\\n races.date (TEXT),\\n races.time (TEXT),\\n races.url (TEXT),\\n)\\n\\nCREATE TABLE results (\\n results.resultid (INTEGER),\\n results.raceid (INTEGER),\\n results.driverid (INTEGER),\\n results.constructorid (INTEGER),\\n results.number (INTEGER),\\n results.grid (INTEGER),\\n results.position (TEXT),\\n results.positiontext (TEXT),\\n results.positionorder (INTEGER),\\n results.points (REAL),\\n results.laps (TEXT),\\n results.time (TEXT),\\n results.milliseconds (TEXT),\\n results.fastestlap (TEXT),\\n results.rank (TEXT),\\n results.fastestlaptime (TEXT),\\n results.fastestlapspeed (TEXT),\\n results.statusid (INTEGER),\\n)\\n\\nCREATE TABLE seasons (\\n seasons.year (INTEGER),\\n seasons.url (TEXT),\\n)\\n\\nCREATE TABLE status (\\n status.statusid (INTEGER),\\n status.status (TEXT),\\n)\\n\\n\\nPossible JOINs:\\n\\nconstructorresults.raceid = races.raceid\\nconstructorresults.constructorid = constructors.constructorid\\n\\nconstructorstandings.raceid = races.raceid\\nconstructorstandings.constructorid = constructors.constructorid\\n\\ndriverstandings.raceid = races.raceid\\ndriverstandings.driverid = drivers.driverid\\n\\nlaptimes.raceid = races.raceid\\nlaptimes.driverid = drivers.driverid\\n\\npitstops.raceid = races.raceid\\npitstops.driverid = drivers.driverid\\n\\nqualifying.raceid = races.raceid\\nqualifying.driverid = drivers.driverid\\nqualifying.constructorid = constructors.constructorid\\n\\nraces.circuitid = circuits.circuitid\\n\\nresults.raceid = races.raceid\\nresults.driverid = drivers.driverid\\nresults.constructorid = constructors.constructorid\\n\",\n \"CREATE TABLE customers (\\n customers.customer_id (INTEGER),\\n customers.customer_name (VARCHAR(80)),\\n customers.customer_details (VARCHAR(255)),\\n)\\n\\nCREATE TABLE invoices (\\n invoices.invoice_number (INTEGER),\\n invoices.invoice_date (DATETIME),\\n invoices.invoice_details (VARCHAR(255)),\\n)\\n\\nCREATE TABLE order_items (\\n order_items.order_item_id (INTEGER),\\n order_items.product_id (INTEGER),\\n order_items.order_id (INTEGER),\\n order_items.order_item_status (VARCHAR(10)),\\n order_items.order_item_details (VARCHAR(255)),\\n)\\n\\nCREATE TABLE orders (\\n orders.order_id (INTEGER),\\n orders.customer_id (INTEGER),\\n orders.order_status (VARCHAR(10)),\\n orders.date_order_placed (DATETIME),\\n orders.order_details (VARCHAR(255)),\\n)\\n\\nCREATE TABLE products (\\n products.product_id (INTEGER),\\n products.product_name (VARCHAR(80)),\\n products.product_details (VARCHAR(255)),\\n)\\n\\nCREATE TABLE shipment_items (\\n shipment_items.shipment_id (INTEGER),\\n shipment_items.order_item_id (INTEGER),\\n)\\n\\nCREATE TABLE shipments (\\n shipments.shipment_id (INTEGER),\\n shipments.order_id (INTEGER),\\n shipments.invoice_number (INTEGER),\\n shipments.shipment_tracking_number (VARCHAR(80)),\\n shipments.shipment_date (DATETIME),\\n shipments.other_shipment_details (VARCHAR(255)),\\n)\\n\\n\\nPossible JOINs:\\n\\norder_items.product_id = products.product_id\\norder_items.order_id = orders.order_id\\n\\norders.customer_id = customers.customer_id\\n\\nshipment_items.shipment_id = shipments.shipment_id\\nshipment_items.order_item_id = order_items.order_item_id\\n\\nshipments.order_id = orders.order_id\\nshipments.invoice_number = invoices.invoice_number\\n\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"schema_dict\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 146,\n \"samples\": [\n \"{\\n 'company': ['Company_ID', 'Rank', 'Company', 'Headquarters', 'Main_Industry', 'Sales_billion', 'Profits_billion', 'Assets_billion', 'Market_Value'],\\n 'gas_station': ['Station_ID', 'Open_Year', 'Location', 'Manager_Name', 'Vice_Manager_Name', 'Representative_Name'],\\n 'station_company': ['Station_ID', 'Company_ID', 'Rank_of_the_Year']\\n}\",\n \"{\\n 'Premises': ['premise_id', 'premises_type', 'premise_details'],\\n 'Products': ['product_id', 'product_category', 'product_name'],\\n 'Customers': ['customer_id', 'payment_method', 'customer_name', 'customer_phone', 'customer_email', 'customer_address', 'customer_login', 'customer_password'],\\n 'Mailshot_Campaigns': ['mailshot_id', 'product_category', 'mailshot_name', 'mailshot_start_date', 'mailshot_end_date'],\\n 'Customer_Addresses': ['customer_id', 'premise_id', 'date_address_from', 'address_type_code', 'date_address_to'],\\n 'Customer_Orders': ['order_id', 'customer_id', 'order_status_code', 'shipping_method_code', 'order_placed_datetime', 'order_delivered_datetime', 'order_shipping_charges'],\\n 'Mailshot_Customers': ['mailshot_id', 'customer_id', 'outcome_code', 'mailshot_customer_date'],\\n 'Order_Items': ['item_id', 'order_item_status_code', 'order_id', 'product_id', 'item_status_code', 'item_delivered_datetime', 'item_order_quantity']\\n}\",\n \"{\\n 'member': ['Member_ID', 'Name', 'Nationality', 'Role'],\\n 'performance': ['Performance_ID', 'Date', 'Host', 'Location', 'Attendance'],\\n 'member_attendance': ['Member_ID', 'Performance_ID', 'Num_of_Pieces']\\n}\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"selector_correct\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3373,\n \"samples\": [\n \"{\\n 'user': ['uid', 'user_id', 'name'],\\n 'review': ['rid', 'user_id', 'rating']\\n}\",\n \"{\\n 'Documents': ['document_id', 'document_status_code', 'document_type_code']\\n}\",\n \"{\\n 'state': ['state_name', 'population'],\\n 'highlow': ['state_name', 'highest_point']\\n}\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"schema_llm_t\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 147,\n \"samples\": [\n \"TABLE country (\\n country.Country_ID (INT),\\n country.Name (TEXT),\\n country.Population (INT),\\n country.Area (INT),\\n country.Languages (TEXT),\\n)\\n\\nTABLE roller_coaster (\\n roller_coaster.Roller_Coaster_ID (INT),\\n roller_coaster.Name (TEXT),\\n roller_coaster.Park (TEXT),\\n roller_coaster.Country_ID (INT),\\n roller_coaster.Length (REAL),\\n roller_coaster.Height (REAL),\\n roller_coaster.Speed (TEXT),\\n roller_coaster.Opened (TEXT),\\n roller_coaster.Status (TEXT),\\n)\\n\\n\\nPossible JOINs:\\n\\nroller_coaster.Country_ID = country.Country_ID\\n\",\n \"TABLE circuits (\\n circuits.circuitId (INTEGER),\\n circuits.circuitRef (TEXT),\\n circuits.name (TEXT),\\n circuits.location (TEXT),\\n circuits.country (TEXT),\\n circuits.lat (REAL),\\n circuits.lng (REAL),\\n circuits.alt (TEXT),\\n circuits.url (TEXT),\\n)\\n\\nTABLE constructorResults (\\n constructorResults.constructorResultsId (INTEGER),\\n constructorResults.raceId (INTEGER),\\n constructorResults.constructorId (INTEGER),\\n constructorResults.points (REAL),\\n constructorResults.status (TEXT),\\n)\\n\\nTABLE constructorStandings (\\n constructorStandings.constructorStandingsId (INTEGER),\\n constructorStandings.raceId (INTEGER),\\n constructorStandings.constructorId (INTEGER),\\n constructorStandings.points (REAL),\\n constructorStandings.position (INTEGER),\\n constructorStandings.positionText (TEXT),\\n constructorStandings.wins (INTEGER),\\n)\\n\\nTABLE constructors (\\n constructors.constructorId (INTEGER),\\n constructors.constructorRef (TEXT),\\n constructors.name (TEXT),\\n constructors.nationality (TEXT),\\n constructors.url (TEXT),\\n)\\n\\nTABLE driverStandings (\\n driverStandings.driverStandingsId (INTEGER),\\n driverStandings.raceId (INTEGER),\\n driverStandings.driverId (INTEGER),\\n driverStandings.points (REAL),\\n driverStandings.position (INTEGER),\\n driverStandings.positionText (TEXT),\\n driverStandings.wins (INTEGER),\\n)\\n\\nTABLE drivers (\\n drivers.driverId (INTEGER),\\n drivers.driverRef (TEXT),\\n drivers.number (TEXT),\\n drivers.code (TEXT),\\n drivers.forename (TEXT),\\n drivers.surname (TEXT),\\n drivers.dob (TEXT),\\n drivers.nationality (TEXT),\\n drivers.url (TEXT),\\n)\\n\\nTABLE lapTimes (\\n lapTimes.raceId (INTEGER),\\n lapTimes.driverId (INTEGER),\\n lapTimes.lap (INTEGER),\\n lapTimes.position (INTEGER),\\n lapTimes.time (TEXT),\\n lapTimes.milliseconds (INTEGER),\\n)\\n\\nTABLE pitStops (\\n pitStops.raceId (INTEGER),\\n pitStops.driverId (INTEGER),\\n pitStops.stop (INTEGER),\\n pitStops.lap (INTEGER),\\n pitStops.time (TEXT),\\n pitStops.duration (TEXT),\\n pitStops.milliseconds (INTEGER),\\n)\\n\\nTABLE qualifying (\\n qualifying.qualifyId (INTEGER),\\n qualifying.raceId (INTEGER),\\n qualifying.driverId (INTEGER),\\n qualifying.constructorId (INTEGER),\\n qualifying.number (INTEGER),\\n qualifying.position (INTEGER),\\n qualifying.q1 (TEXT),\\n qualifying.q2 (TEXT),\\n qualifying.q3 (TEXT),\\n)\\n\\nTABLE races (\\n races.raceId (INTEGER),\\n races.year (INTEGER),\\n races.round (INTEGER),\\n races.circuitId (INTEGER),\\n races.name (TEXT),\\n races.date (TEXT),\\n races.time (TEXT),\\n races.url (TEXT),\\n)\\n\\nTABLE results (\\n results.resultId (INTEGER),\\n results.raceId (INTEGER),\\n results.driverId (INTEGER),\\n results.constructorId (INTEGER),\\n results.number (INTEGER),\\n results.grid (INTEGER),\\n results.position (TEXT),\\n results.positionText (TEXT),\\n results.positionOrder (INTEGER),\\n results.points (REAL),\\n results.laps (TEXT),\\n results.time (TEXT),\\n results.milliseconds (TEXT),\\n results.fastestLap (TEXT),\\n results.rank (TEXT),\\n results.fastestLapTime (TEXT),\\n results.fastestLapSpeed (TEXT),\\n results.statusId (INTEGER),\\n)\\n\\nTABLE seasons (\\n seasons.year (INTEGER),\\n seasons.url (TEXT),\\n)\\n\\nTABLE status (\\n status.statusId (INTEGER),\\n status.status (TEXT),\\n)\\n\\n\\nPossible JOINs:\\n\\nconstructorResults.raceId = races.raceId\\nconstructorResults.constructorId = constructors.constructorId\\n\\nconstructorStandings.raceId = races.raceId\\nconstructorStandings.constructorId = constructors.constructorId\\n\\ndriverStandings.raceId = races.raceId\\ndriverStandings.driverId = drivers.driverId\\n\\nlapTimes.raceId = races.raceId\\nlapTimes.driverId = drivers.driverId\\n\\npitStops.raceId = races.raceId\\npitStops.driverId = drivers.driverId\\n\\nqualifying.raceId = races.raceId\\nqualifying.driverId = drivers.driverId\\nqualifying.constructorId = constructors.constructorId\\n\\nraces.circuitId = circuits.circuitId\\n\\nresults.raceId = races.raceId\\nresults.driverId = drivers.driverId\\nresults.constructorId = constructors.constructorId\\n\",\n \"TABLE Customers (\\n Customers.customer_id (INTEGER),\\n Customers.customer_name (VARCHAR(80)),\\n Customers.customer_details (VARCHAR(255)),\\n)\\n\\nTABLE Invoices (\\n Invoices.invoice_number (INTEGER),\\n Invoices.invoice_date (DATETIME),\\n Invoices.invoice_details (VARCHAR(255)),\\n)\\n\\nTABLE Order_Items (\\n Order_Items.order_item_id (INTEGER),\\n Order_Items.product_id (INTEGER),\\n Order_Items.order_id (INTEGER),\\n Order_Items.order_item_status (VARCHAR(10)),\\n Order_Items.order_item_details (VARCHAR(255)),\\n)\\n\\nTABLE Orders (\\n Orders.order_id (INTEGER),\\n Orders.customer_id (INTEGER),\\n Orders.order_status (VARCHAR(10)),\\n Orders.date_order_placed (DATETIME),\\n Orders.order_details (VARCHAR(255)),\\n)\\n\\nTABLE Products (\\n Products.product_id (INTEGER),\\n Products.product_name (VARCHAR(80)),\\n Products.product_details (VARCHAR(255)),\\n)\\n\\nTABLE Shipment_Items (\\n Shipment_Items.shipment_id (INTEGER),\\n Shipment_Items.order_item_id (INTEGER),\\n)\\n\\nTABLE Shipments (\\n Shipments.shipment_id (INTEGER),\\n Shipments.order_id (INTEGER),\\n Shipments.invoice_number (INTEGER),\\n Shipments.shipment_tracking_number (VARCHAR(80)),\\n Shipments.shipment_date (DATETIME),\\n Shipments.other_shipment_details (VARCHAR(255)),\\n)\\n\\n\\nPossible JOINs:\\n\\nOrder_Items.product_id = Products.product_id\\nOrder_Items.order_id = Orders.order_id\\n\\nOrders.customer_id = Customers.customer_id\\n\\nShipment_Items.shipment_id = Shipments.shipment_id\\nShipment_Items.order_item_id = Order_Items.order_item_id\\n\\nShipments.order_id = Orders.order_id\\nShipments.invoice_number = Invoices.invoice_number\\n\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"schema_llm_columns_min_t\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 147,\n \"samples\": [\n \"TABLE country (\\n country.country_id (INT),\\n country.name (TEXT),\\n country.population (INT),\\n country.area (INT),\\n country.languages (TEXT),\\n)\\n\\nTABLE roller_coaster (\\n roller_coaster.roller_coaster_id (INT),\\n roller_coaster.name (TEXT),\\n roller_coaster.park (TEXT),\\n roller_coaster.country_id (INT),\\n roller_coaster.length (REAL),\\n roller_coaster.height (REAL),\\n roller_coaster.speed (TEXT),\\n roller_coaster.opened (TEXT),\\n roller_coaster.status (TEXT),\\n)\\n\\n\\nPossible JOINs:\\n\\nroller_coaster.country_id = country.country_id\\n\",\n \"TABLE circuits (\\n circuits.circuitid (INTEGER),\\n circuits.circuitref (TEXT),\\n circuits.name (TEXT),\\n circuits.location (TEXT),\\n circuits.country (TEXT),\\n circuits.lat (REAL),\\n circuits.lng (REAL),\\n circuits.alt (TEXT),\\n circuits.url (TEXT),\\n)\\n\\nTABLE constructorResults (\\n constructorResults.constructorresultsid (INTEGER),\\n constructorResults.raceid (INTEGER),\\n constructorResults.constructorid (INTEGER),\\n constructorResults.points (REAL),\\n constructorResults.status (TEXT),\\n)\\n\\nTABLE constructorStandings (\\n constructorStandings.constructorstandingsid (INTEGER),\\n constructorStandings.raceid (INTEGER),\\n constructorStandings.constructorid (INTEGER),\\n constructorStandings.points (REAL),\\n constructorStandings.position (INTEGER),\\n constructorStandings.positiontext (TEXT),\\n constructorStandings.wins (INTEGER),\\n)\\n\\nTABLE constructors (\\n constructors.constructorid (INTEGER),\\n constructors.constructorref (TEXT),\\n constructors.name (TEXT),\\n constructors.nationality (TEXT),\\n constructors.url (TEXT),\\n)\\n\\nTABLE driverStandings (\\n driverStandings.driverstandingsid (INTEGER),\\n driverStandings.raceid (INTEGER),\\n driverStandings.driverid (INTEGER),\\n driverStandings.points (REAL),\\n driverStandings.position (INTEGER),\\n driverStandings.positiontext (TEXT),\\n driverStandings.wins (INTEGER),\\n)\\n\\nTABLE drivers (\\n drivers.driverid (INTEGER),\\n drivers.driverref (TEXT),\\n drivers.number (TEXT),\\n drivers.code (TEXT),\\n drivers.forename (TEXT),\\n drivers.surname (TEXT),\\n drivers.dob (TEXT),\\n drivers.nationality (TEXT),\\n drivers.url (TEXT),\\n)\\n\\nTABLE lapTimes (\\n lapTimes.raceid (INTEGER),\\n lapTimes.driverid (INTEGER),\\n lapTimes.lap (INTEGER),\\n lapTimes.position (INTEGER),\\n lapTimes.time (TEXT),\\n lapTimes.milliseconds (INTEGER),\\n)\\n\\nTABLE pitStops (\\n pitStops.raceid (INTEGER),\\n pitStops.driverid (INTEGER),\\n pitStops.stop (INTEGER),\\n pitStops.lap (INTEGER),\\n pitStops.time (TEXT),\\n pitStops.duration (TEXT),\\n pitStops.milliseconds (INTEGER),\\n)\\n\\nTABLE qualifying (\\n qualifying.qualifyid (INTEGER),\\n qualifying.raceid (INTEGER),\\n qualifying.driverid (INTEGER),\\n qualifying.constructorid (INTEGER),\\n qualifying.number (INTEGER),\\n qualifying.position (INTEGER),\\n qualifying.q1 (TEXT),\\n qualifying.q2 (TEXT),\\n qualifying.q3 (TEXT),\\n)\\n\\nTABLE races (\\n races.raceid (INTEGER),\\n races.year (INTEGER),\\n races.round (INTEGER),\\n races.circuitid (INTEGER),\\n races.name (TEXT),\\n races.date (TEXT),\\n races.time (TEXT),\\n races.url (TEXT),\\n)\\n\\nTABLE results (\\n results.resultid (INTEGER),\\n results.raceid (INTEGER),\\n results.driverid (INTEGER),\\n results.constructorid (INTEGER),\\n results.number (INTEGER),\\n results.grid (INTEGER),\\n results.position (TEXT),\\n results.positiontext (TEXT),\\n results.positionorder (INTEGER),\\n results.points (REAL),\\n results.laps (TEXT),\\n results.time (TEXT),\\n results.milliseconds (TEXT),\\n results.fastestlap (TEXT),\\n results.rank (TEXT),\\n results.fastestlaptime (TEXT),\\n results.fastestlapspeed (TEXT),\\n results.statusid (INTEGER),\\n)\\n\\nTABLE seasons (\\n seasons.year (INTEGER),\\n seasons.url (TEXT),\\n)\\n\\nTABLE status (\\n status.statusid (INTEGER),\\n status.status (TEXT),\\n)\\n\\n\\nPossible JOINs:\\n\\nconstructorResults.raceid = races.raceid\\nconstructorResults.constructorid = constructors.constructorid\\n\\nconstructorStandings.raceid = races.raceid\\nconstructorStandings.constructorid = constructors.constructorid\\n\\ndriverStandings.raceid = races.raceid\\ndriverStandings.driverid = drivers.driverid\\n\\nlapTimes.raceid = races.raceid\\nlapTimes.driverid = drivers.driverid\\n\\npitStops.raceid = races.raceid\\npitStops.driverid = drivers.driverid\\n\\nqualifying.raceid = races.raceid\\nqualifying.driverid = drivers.driverid\\nqualifying.constructorid = constructors.constructorid\\n\\nraces.circuitid = circuits.circuitid\\n\\nresults.raceid = races.raceid\\nresults.driverid = drivers.driverid\\nresults.constructorid = constructors.constructorid\\n\",\n \"TABLE Customers (\\n Customers.customer_id (INTEGER),\\n Customers.customer_name (VARCHAR(80)),\\n Customers.customer_details (VARCHAR(255)),\\n)\\n\\nTABLE Invoices (\\n Invoices.invoice_number (INTEGER),\\n Invoices.invoice_date (DATETIME),\\n Invoices.invoice_details (VARCHAR(255)),\\n)\\n\\nTABLE Order_Items (\\n Order_Items.order_item_id (INTEGER),\\n Order_Items.product_id (INTEGER),\\n Order_Items.order_id (INTEGER),\\n Order_Items.order_item_status (VARCHAR(10)),\\n Order_Items.order_item_details (VARCHAR(255)),\\n)\\n\\nTABLE Orders (\\n Orders.order_id (INTEGER),\\n Orders.customer_id (INTEGER),\\n Orders.order_status (VARCHAR(10)),\\n Orders.date_order_placed (DATETIME),\\n Orders.order_details (VARCHAR(255)),\\n)\\n\\nTABLE Products (\\n Products.product_id (INTEGER),\\n Products.product_name (VARCHAR(80)),\\n Products.product_details (VARCHAR(255)),\\n)\\n\\nTABLE Shipment_Items (\\n Shipment_Items.shipment_id (INTEGER),\\n Shipment_Items.order_item_id (INTEGER),\\n)\\n\\nTABLE Shipments (\\n Shipments.shipment_id (INTEGER),\\n Shipments.order_id (INTEGER),\\n Shipments.invoice_number (INTEGER),\\n Shipments.shipment_tracking_number (VARCHAR(80)),\\n Shipments.shipment_date (DATETIME),\\n Shipments.other_shipment_details (VARCHAR(255)),\\n)\\n\\n\\nPossible JOINs:\\n\\nOrder_Items.product_id = Products.product_id\\nOrder_Items.order_id = Orders.order_id\\n\\nOrders.customer_id = Customers.customer_id\\n\\nShipment_Items.shipment_id = Shipments.shipment_id\\nShipment_Items.order_item_id = Order_Items.order_item_id\\n\\nShipments.order_id = Orders.order_id\\nShipments.invoice_number = Invoices.invoice_number\\n\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"schema_llm_all_min_t\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 147,\n \"samples\": [\n \"TABLE country (\\n country.country_id (INT),\\n country.name (TEXT),\\n country.population (INT),\\n country.area (INT),\\n country.languages (TEXT),\\n)\\n\\nTABLE roller_coaster (\\n roller_coaster.roller_coaster_id (INT),\\n roller_coaster.name (TEXT),\\n roller_coaster.park (TEXT),\\n roller_coaster.country_id (INT),\\n roller_coaster.length (REAL),\\n roller_coaster.height (REAL),\\n roller_coaster.speed (TEXT),\\n roller_coaster.opened (TEXT),\\n roller_coaster.status (TEXT),\\n)\\n\\n\\nPossible JOINs:\\n\\nroller_coaster.country_id = country.country_id\\n\",\n \"TABLE circuits (\\n circuits.circuitid (INTEGER),\\n circuits.circuitref (TEXT),\\n circuits.name (TEXT),\\n circuits.location (TEXT),\\n circuits.country (TEXT),\\n circuits.lat (REAL),\\n circuits.lng (REAL),\\n circuits.alt (TEXT),\\n circuits.url (TEXT),\\n)\\n\\nTABLE constructorresults (\\n constructorresults.constructorresultsid (INTEGER),\\n constructorresults.raceid (INTEGER),\\n constructorresults.constructorid (INTEGER),\\n constructorresults.points (REAL),\\n constructorresults.status (TEXT),\\n)\\n\\nTABLE constructorstandings (\\n constructorstandings.constructorstandingsid (INTEGER),\\n constructorstandings.raceid (INTEGER),\\n constructorstandings.constructorid (INTEGER),\\n constructorstandings.points (REAL),\\n constructorstandings.position (INTEGER),\\n constructorstandings.positiontext (TEXT),\\n constructorstandings.wins (INTEGER),\\n)\\n\\nTABLE constructors (\\n constructors.constructorid (INTEGER),\\n constructors.constructorref (TEXT),\\n constructors.name (TEXT),\\n constructors.nationality (TEXT),\\n constructors.url (TEXT),\\n)\\n\\nTABLE driverstandings (\\n driverstandings.driverstandingsid (INTEGER),\\n driverstandings.raceid (INTEGER),\\n driverstandings.driverid (INTEGER),\\n driverstandings.points (REAL),\\n driverstandings.position (INTEGER),\\n driverstandings.positiontext (TEXT),\\n driverstandings.wins (INTEGER),\\n)\\n\\nTABLE drivers (\\n drivers.driverid (INTEGER),\\n drivers.driverref (TEXT),\\n drivers.number (TEXT),\\n drivers.code (TEXT),\\n drivers.forename (TEXT),\\n drivers.surname (TEXT),\\n drivers.dob (TEXT),\\n drivers.nationality (TEXT),\\n drivers.url (TEXT),\\n)\\n\\nTABLE laptimes (\\n laptimes.raceid (INTEGER),\\n laptimes.driverid (INTEGER),\\n laptimes.lap (INTEGER),\\n laptimes.position (INTEGER),\\n laptimes.time (TEXT),\\n laptimes.milliseconds (INTEGER),\\n)\\n\\nTABLE pitstops (\\n pitstops.raceid (INTEGER),\\n pitstops.driverid (INTEGER),\\n pitstops.stop (INTEGER),\\n pitstops.lap (INTEGER),\\n pitstops.time (TEXT),\\n pitstops.duration (TEXT),\\n pitstops.milliseconds (INTEGER),\\n)\\n\\nTABLE qualifying (\\n qualifying.qualifyid (INTEGER),\\n qualifying.raceid (INTEGER),\\n qualifying.driverid (INTEGER),\\n qualifying.constructorid (INTEGER),\\n qualifying.number (INTEGER),\\n qualifying.position (INTEGER),\\n qualifying.q1 (TEXT),\\n qualifying.q2 (TEXT),\\n qualifying.q3 (TEXT),\\n)\\n\\nTABLE races (\\n races.raceid (INTEGER),\\n races.year (INTEGER),\\n races.round (INTEGER),\\n races.circuitid (INTEGER),\\n races.name (TEXT),\\n races.date (TEXT),\\n races.time (TEXT),\\n races.url (TEXT),\\n)\\n\\nTABLE results (\\n results.resultid (INTEGER),\\n results.raceid (INTEGER),\\n results.driverid (INTEGER),\\n results.constructorid (INTEGER),\\n results.number (INTEGER),\\n results.grid (INTEGER),\\n results.position (TEXT),\\n results.positiontext (TEXT),\\n results.positionorder (INTEGER),\\n results.points (REAL),\\n results.laps (TEXT),\\n results.time (TEXT),\\n results.milliseconds (TEXT),\\n results.fastestlap (TEXT),\\n results.rank (TEXT),\\n results.fastestlaptime (TEXT),\\n results.fastestlapspeed (TEXT),\\n results.statusid (INTEGER),\\n)\\n\\nTABLE seasons (\\n seasons.year (INTEGER),\\n seasons.url (TEXT),\\n)\\n\\nTABLE status (\\n status.statusid (INTEGER),\\n status.status (TEXT),\\n)\\n\\n\\nPossible JOINs:\\n\\nconstructorresults.raceid = races.raceid\\nconstructorresults.constructorid = constructors.constructorid\\n\\nconstructorstandings.raceid = races.raceid\\nconstructorstandings.constructorid = constructors.constructorid\\n\\ndriverstandings.raceid = races.raceid\\ndriverstandings.driverid = drivers.driverid\\n\\nlaptimes.raceid = races.raceid\\nlaptimes.driverid = drivers.driverid\\n\\npitstops.raceid = races.raceid\\npitstops.driverid = drivers.driverid\\n\\nqualifying.raceid = races.raceid\\nqualifying.driverid = drivers.driverid\\nqualifying.constructorid = constructors.constructorid\\n\\nraces.circuitid = circuits.circuitid\\n\\nresults.raceid = races.raceid\\nresults.driverid = drivers.driverid\\nresults.constructorid = constructors.constructorid\\n\",\n \"TABLE customers (\\n customers.customer_id (INTEGER),\\n customers.customer_name (VARCHAR(80)),\\n customers.customer_details (VARCHAR(255)),\\n)\\n\\nTABLE invoices (\\n invoices.invoice_number (INTEGER),\\n invoices.invoice_date (DATETIME),\\n invoices.invoice_details (VARCHAR(255)),\\n)\\n\\nTABLE order_items (\\n order_items.order_item_id (INTEGER),\\n order_items.product_id (INTEGER),\\n order_items.order_id (INTEGER),\\n order_items.order_item_status (VARCHAR(10)),\\n order_items.order_item_details (VARCHAR(255)),\\n)\\n\\nTABLE orders (\\n orders.order_id (INTEGER),\\n orders.customer_id (INTEGER),\\n orders.order_status (VARCHAR(10)),\\n orders.date_order_placed (DATETIME),\\n orders.order_details (VARCHAR(255)),\\n)\\n\\nTABLE products (\\n products.product_id (INTEGER),\\n products.product_name (VARCHAR(80)),\\n products.product_details (VARCHAR(255)),\\n)\\n\\nTABLE shipment_items (\\n shipment_items.shipment_id (INTEGER),\\n shipment_items.order_item_id (INTEGER),\\n)\\n\\nTABLE shipments (\\n shipments.shipment_id (INTEGER),\\n shipments.order_id (INTEGER),\\n shipments.invoice_number (INTEGER),\\n shipments.shipment_tracking_number (VARCHAR(80)),\\n shipments.shipment_date (DATETIME),\\n shipments.other_shipment_details (VARCHAR(255)),\\n)\\n\\n\\nPossible JOINs:\\n\\norder_items.product_id = products.product_id\\norder_items.order_id = orders.order_id\\n\\norders.customer_id = customers.customer_id\\n\\nshipment_items.shipment_id = shipments.shipment_id\\nshipment_items.order_item_id = order_items.order_item_id\\n\\nshipments.order_id = orders.order_id\\nshipments.invoice_number = invoices.invoice_number\\n\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 8647,\n \"samples\": [\n \"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\\n\\nCutting Knowledge Date: December 2023\\nToday Date: 06 Oct 2024\\n\\n<|eot_id|><|start_header_id|>user<|end_header_id|>\\n\\n# System:\\nGiven a user question and the schema of a database, your task is to generate an JSON with the the names of tables and columns of the schema that the question is referring to.\\n\\n# Schema:\\n```sql\\nTABLE event (\\n event.ID (INT),\\n event.Name (TEXT),\\n event.Stadium_ID (INT),\\n event.Year (TEXT),\\n)\\n\\nTABLE record (\\n record.ID (INT),\\n record.Result (TEXT),\\n record.Swimmer_ID (INT),\\n record.Event_ID (INT),\\n)\\n\\nTABLE stadium (\\n stadium.ID (INT),\\n stadium.name (TEXT),\\n stadium.Capacity (INT),\\n stadium.City (TEXT),\\n stadium.Country (TEXT),\\n stadium.Opening_year (INT),\\n)\\n\\nTABLE swimmer (\\n swimmer.ID (INT),\\n swimmer.name (TEXT),\\n swimmer.Nationality (TEXT),\\n swimmer.meter_100 (REAL),\\n swimmer.meter_200 (TEXT),\\n swimmer.meter_300 (TEXT),\\n swimmer.meter_400 (TEXT),\\n swimmer.meter_500 (TEXT),\\n swimmer.meter_600 (TEXT),\\n swimmer.meter_700 (TEXT),\\n swimmer.Time (TEXT),\\n)\\n\\n\\nPossible JOINs:\\n\\nevent.Stadium_ID = stadium.ID\\n\\nrecord.Swimmer_ID = swimmer.ID\\nrecord.Event_ID = event.ID\\n\\n```\\n\\n# Question: Which countries do not have a stadium that was opened after 2006?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\\n\\n```json\\n{ 'stadium': ['ID', 'Country', 'Opening_year'] }\\n```<|eot_id|>\",\n \"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\\n\\nCutting Knowledge Date: December 2023\\nToday Date: 06 Oct 2024\\n\\n<|eot_id|><|start_header_id|>user<|end_header_id|>\\n\\n# System:\\nGiven a user question and the schema of a database, your task is to generate an JSON with the the names of tables and columns of the schema that the question is referring to.\\n\\n# Schema:\\n```sql\\nTABLE film (\\n film.Film_ID (INT),\\n film.Title (TEXT),\\n film.Studio (TEXT),\\n film.Director (TEXT),\\n film.Gross_in_dollar (INT),\\n)\\n\\nTABLE film_market_estimation (\\n film_market_estimation.Estimation_ID (INT),\\n film_market_estimation.Low_Estimate (REAL),\\n film_market_estimation.High_Estimate (REAL),\\n film_market_estimation.Film_ID (INT),\\n film_market_estimation.Type (TEXT),\\n film_market_estimation.Market_ID (INT),\\n film_market_estimation.Year (INT),\\n)\\n\\nTABLE market (\\n market.Market_ID (INT),\\n market.Country (TEXT),\\n market.Number_cities (INT),\\n)\\n\\n\\nPossible JOINs:\\n\\nfilm_market_estimation.Film_ID = film.Film_ID\\nfilm_market_estimation.Market_ID = market.Market_ID\\n\\n```\\n\\n# Question: How many film are there?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\\n\\n```json\\n{ 'film': ['Film_ID'] }\\n```<|eot_id|>\",\n \"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\\n\\nCutting Knowledge Date: December 2023\\nToday Date: 06 Oct 2024\\n\\n<|eot_id|><|start_header_id|>user<|end_header_id|>\\n\\n# System:\\nGiven a user question and the schema of a database, your task is to generate an JSON with the the names of tables and columns of the schema that the question is referring to.\\n\\n# Schema:\\n```sql\\nTABLE book (\\n book.Book_ID (INT),\\n book.Title (TEXT),\\n book.Issues (REAL),\\n book.Writer (TEXT),\\n)\\n\\nTABLE publication (\\n publication.Publication_ID (INT),\\n publication.Book_ID (INT),\\n publication.Publisher (TEXT),\\n publication.Publication_Date (TEXT),\\n publication.Price (REAL),\\n)\\n\\n\\nPossible JOINs:\\n\\npublication.Book_ID = book.Book_ID\\n\\n```\\n\\n# Question: Show the title and publication dates of books.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\\n\\n```json\\n{ 'book': ['Book_ID', 'Title'],\\n 'publication': ['Publication_ID', 'Book_ID', 'Publication_Date'] }\\n```<|eot_id|>\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", "type": "dataframe", "variable_name": "df" }, "text/html": [ "\n", "
\n", " | db_id | \n", "schema | \n", "question_en | \n", "hardness | \n", "schema_llm | \n", "query_llm | \n", "selector | \n", "schema_llm_ct | \n", "schema_llm_columns_min | \n", "schema_llm_columns_min_ct | \n", "schema_llm_all_min | \n", "schema_llm_all_min_ct | \n", "schema_dict | \n", "selector_correct | \n", "schema_llm_t | \n", "schema_llm_columns_min_t | \n", "schema_llm_all_min_t | \n", "text | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "department_management | \n", "\\nCREATE TABLE department (\\n\\t\"Department_ID\"... | \n", "How many heads of the departments are older th... | \n", "easy | \n", "Table department (\\n department.Department_... | \n", "SELECT count(*) FROM head WHERE age > 56 | \n", "{\\n 'head': ['head_id', 'age']\\n} | \n", "CREATE TABLE department (\\n department.Depa... | \n", "Table department (\\n department.department_... | \n", "CREATE TABLE department (\\n department.depa... | \n", "Table department (\\n department.department_... | \n", "CREATE TABLE department (\\n department.depa... | \n", "{\\n 'department': ['Department_ID', 'Name', '... | \n", "{\\n 'head': ['head_ID', 'age']\\n} | \n", "TABLE department (\\n department.Department_... | \n", "TABLE department (\\n department.department_... | \n", "TABLE department (\\n department.department_... | \n", "<|begin_of_text|><|start_header_id|>system<|en... | \n", "
1 | \n", "department_management | \n", "\\nCREATE TABLE department (\\n\\t\"Department_ID\"... | \n", "List the name, born state and age of the heads... | \n", "medium | \n", "Table department (\\n department.Department_... | \n", "SELECT name , born_state , age FROM head ORD... | \n", "{\\n 'head': ['head_id', 'name', 'born_state',... | \n", "CREATE TABLE department (\\n department.Depa... | \n", "Table department (\\n department.department_... | \n", "CREATE TABLE department (\\n department.depa... | \n", "Table department (\\n department.department_... | \n", "CREATE TABLE department (\\n department.depa... | \n", "{\\n 'department': ['Department_ID', 'Name', '... | \n", "{\\n 'head': ['head_ID', 'name', 'born_state',... | \n", "TABLE department (\\n department.Department_... | \n", "TABLE department (\\n department.department_... | \n", "TABLE department (\\n department.department_... | \n", "<|begin_of_text|><|start_header_id|>system<|en... | \n", "
2 | \n", "department_management | \n", "\\nCREATE TABLE department (\\n\\t\"Department_ID\"... | \n", "List the creation year, name and budget of eac... | \n", "medium | \n", "Table department (\\n department.Department_... | \n", "SELECT creation , name , budget_in_billions ... | \n", "{\\n 'department': ['department_id', 'name', '... | \n", "CREATE TABLE department (\\n department.Depa... | \n", "Table department (\\n department.department_... | \n", "CREATE TABLE department (\\n department.depa... | \n", "Table department (\\n department.department_... | \n", "CREATE TABLE department (\\n department.depa... | \n", "{\\n 'department': ['Department_ID', 'Name', '... | \n", "{\\n 'department': ['Department_ID', 'Name', '... | \n", "TABLE department (\\n department.Department_... | \n", "TABLE department (\\n department.department_... | \n", "TABLE department (\\n department.department_... | \n", "<|begin_of_text|><|start_header_id|>system<|en... | \n", "
3 | \n", "department_management | \n", "\\nCREATE TABLE department (\\n\\t\"Department_ID\"... | \n", "What are the maximum and minimum budget of the... | \n", "medium | \n", "Table department (\\n department.Department_... | \n", "SELECT max(budget_in_billions) , min(budget_i... | \n", "{\\n 'department': ['department_id', 'budget_i... | \n", "CREATE TABLE department (\\n department.Depa... | \n", "Table department (\\n department.department_... | \n", "CREATE TABLE department (\\n department.depa... | \n", "Table department (\\n department.department_... | \n", "CREATE TABLE department (\\n department.depa... | \n", "{\\n 'department': ['Department_ID', 'Name', '... | \n", "{\\n 'department': ['Department_ID', 'Budget_i... | \n", "TABLE department (\\n department.Department_... | \n", "TABLE department (\\n department.department_... | \n", "TABLE department (\\n department.department_... | \n", "<|begin_of_text|><|start_header_id|>system<|en... | \n", "
4 | \n", "department_management | \n", "\\nCREATE TABLE department (\\n\\t\"Department_ID\"... | \n", "What is the average number of employees of the... | \n", "easy | \n", "Table department (\\n department.Department_... | \n", "SELECT avg(num_employees) FROM department WHER... | \n", "{\\n 'department': ['department_id', 'ranking'... | \n", "CREATE TABLE department (\\n department.Depa... | \n", "Table department (\\n department.department_... | \n", "CREATE TABLE department (\\n department.depa... | \n", "Table department (\\n department.department_... | \n", "CREATE TABLE department (\\n department.depa... | \n", "{\\n 'department': ['Department_ID', 'Name', '... | \n", "{\\n 'department': ['Department_ID', 'Ranking'... | \n", "TABLE department (\\n department.Department_... | \n", "TABLE department (\\n department.department_... | \n", "TABLE department (\\n department.department_... | \n", "<|begin_of_text|><|start_header_id|>system<|en... | \n", "
Step | \n", "Training Loss | \n", "Validation Loss | \n", "
---|---|---|
250 | \n", "0.362400 | \n", "0.295918 | \n", "
500 | \n", "0.238700 | \n", "0.229598 | \n", "
750 | \n", "0.206300 | \n", "0.196152 | \n", "
1000 | \n", "0.185700 | \n", "0.187826 | \n", "
"
],
"text/plain": [
"