tuandunghcmut
/

vlm_clone_2

Model card Files Files and versions Community

tuandunghcmut commited on Apr 11

Commit

d756736

verified ·

1 Parent(s): ff5d469

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +25 -0
a_mllm_notebooks/openai/image.jpg +3 -0
a_mllm_notebooks/openai/infer.py +167 -0
a_mllm_notebooks/openai/infer.sh +4 -0
a_mllm_notebooks/openai/langchain_openai_api.ipynb +0 -0
a_mllm_notebooks/openai/load_synth_pedes.ipynb +96 -0
a_mllm_notebooks/openai/ping_server.ipynb +416 -0
a_mllm_notebooks/openai/proxy.sh +10 -0
a_mllm_notebooks/openai/serve.sh +60 -0
a_mllm_notebooks/openai/temp.json +0 -0
a_mllm_notebooks/openai/temp.sh +25 -0
a_mllm_notebooks/tensorrt-llm/clone_folder.ipynb +78 -0
a_mllm_notebooks/vllm/Untitled.ipynb +68 -0
a_mllm_notebooks/vllm/cat.jpg +3 -0
a_mllm_notebooks/vllm/cli.md +405 -0
a_mllm_notebooks/vllm/download_md.ipynb +213 -0
a_mllm_notebooks/vllm/florence_2.ipynb +355 -0
a_mllm_notebooks/vllm/serve.sh +452 -0
a_mllm_notebooks/vllm/start.ipynb +432 -0
mlruns/0/meta.yaml +6 -0
recognize-anything/.ipynb_checkpoints/README-checkpoint.md +601 -0
recognize-anything/.ipynb_checkpoints/recognize_anything_demo-checkpoint.ipynb +0 -0
recognize-anything/datasets/hico/hico_600_annots.txt +0 -0
recognize-anything/datasets/hico/hico_600_taglist.txt +600 -0
recognize-anything/datasets/imagenet_multi/imagenet_multi_1000_annots.txt +0 -0
recognize-anything/datasets/imagenet_multi/imagenet_multi_1000_taglist.txt +1000 -0
recognize-anything/datasets/openimages_common_214/imgs/.gitkeep +0 -0
recognize-anything/datasets/openimages_common_214/openimages_common_214_ram_annots.txt +0 -0
recognize-anything/datasets/openimages_common_214/openimages_common_214_ram_taglist.txt +214 -0
recognize-anything/datasets/openimages_common_214/openimages_common_214_tag2text_idannots.txt +0 -0
recognize-anything/datasets/openimages_common_214/openimages_common_214_tag2text_tagidlist.txt +214 -0
recognize-anything/datasets/openimages_rare_200/imgs/.gitkeep +0 -0
recognize-anything/datasets/openimages_rare_200/openimages_rare_200_llm_tag_descriptions.json +0 -0
recognize-anything/datasets/openimages_rare_200/openimages_rare_200_ram_annots.txt +0 -0
recognize-anything/datasets/openimages_rare_200/openimages_rare_200_ram_taglist.txt +200 -0
recognize-anything/images/.ipynb_checkpoints/ram_plus_framework-checkpoint.jpg +3 -0
recognize-anything/images/.ipynb_checkpoints/ram_plus_visualization-checkpoint.jpg +3 -0
recognize-anything/images/.ipynb_checkpoints/tag2text_retrieval_visualization-checkpoint.png +3 -0
recognize-anything/images/1641173_2291260800.jpg +3 -0
recognize-anything/images/demo/.ipynb_checkpoints/demo2-checkpoint.jpg +3 -0
recognize-anything/images/demo/.ipynb_checkpoints/demo4-checkpoint.jpg +3 -0
recognize-anything/images/demo/demo1.jpg +3 -0
recognize-anything/images/demo/demo2.jpg +3 -0
recognize-anything/images/demo/demo3.jpg +3 -0
recognize-anything/images/demo/demo4.jpg +3 -0
recognize-anything/images/experiment_comparison.png +3 -0
recognize-anything/images/localization_and_recognition.jpg +3 -0
recognize-anything/images/openset_example.jpg +3 -0
recognize-anything/images/ram_grounded_sam.jpg +3 -0
recognize-anything/images/ram_plus_compare.jpg +3 -0

.gitattributes CHANGED Viewed

@@ -427,3 +427,28 @@ VILA/inference_test/test_data/painting_1.png filter=lfs diff=lfs merge=lfs -text
 VILA/inference_test/test_data/palm_e_3.png filter=lfs diff=lfs merge=lfs -text
 VILA/tests/sample_data/llava_arch_test_images/23/image.png filter=lfs diff=lfs merge=lfs -text
 VILA/tests/sample_data/llava_arch_test_images/5/image.png filter=lfs diff=lfs merge=lfs -text

 VILA/inference_test/test_data/palm_e_3.png filter=lfs diff=lfs merge=lfs -text
 VILA/tests/sample_data/llava_arch_test_images/23/image.png filter=lfs diff=lfs merge=lfs -text
 VILA/tests/sample_data/llava_arch_test_images/5/image.png filter=lfs diff=lfs merge=lfs -text
+recognize-anything/images/ram_grounded_sam.jpg filter=lfs diff=lfs merge=lfs -text
+recognize-anything/images/ram_plus_experiment.png filter=lfs diff=lfs merge=lfs -text
+recognize-anything/images/ram_plus_compare.jpg filter=lfs diff=lfs merge=lfs -text
+recognize-anything/images/experiment_comparison.png filter=lfs diff=lfs merge=lfs -text
+recognize-anything/images/1641173_2291260800.jpg filter=lfs diff=lfs merge=lfs -text
+recognize-anything/images/tag2text_framework.png filter=lfs diff=lfs merge=lfs -text
+recognize-anything/images/tag2text_retrieval_visualization.png filter=lfs diff=lfs merge=lfs -text
+recognize-anything/images/tag2text_visualization.png filter=lfs diff=lfs merge=lfs -text
+recognize-anything/images/tagging_results.jpg filter=lfs diff=lfs merge=lfs -text
+recognize-anything/images/localization_and_recognition.jpg filter=lfs diff=lfs merge=lfs -text
+recognize-anything/images/ram_plus_visualization.jpg filter=lfs diff=lfs merge=lfs -text
+recognize-anything/images/openset_example.jpg filter=lfs diff=lfs merge=lfs -text
+recognize-anything/images/ram_plus_framework.jpg filter=lfs diff=lfs merge=lfs -text
+recognize-anything/images/tag2text_grounded_sam.jpg filter=lfs diff=lfs merge=lfs -text
+recognize-anything/images/demo/demo3.jpg filter=lfs diff=lfs merge=lfs -text
+recognize-anything/images/demo/demo1.jpg filter=lfs diff=lfs merge=lfs -text
+recognize-anything/images/demo/demo2.jpg filter=lfs diff=lfs merge=lfs -text
+recognize-anything/images/demo/demo4.jpg filter=lfs diff=lfs merge=lfs -text
+recognize-anything/images/.ipynb_checkpoints/tag2text_retrieval_visualization-checkpoint.png filter=lfs diff=lfs merge=lfs -text
+recognize-anything/images/.ipynb_checkpoints/ram_plus_visualization-checkpoint.jpg filter=lfs diff=lfs merge=lfs -text
+recognize-anything/images/.ipynb_checkpoints/ram_plus_framework-checkpoint.jpg filter=lfs diff=lfs merge=lfs -text
+recognize-anything/images/demo/.ipynb_checkpoints/demo4-checkpoint.jpg filter=lfs diff=lfs merge=lfs -text
+recognize-anything/images/demo/.ipynb_checkpoints/demo2-checkpoint.jpg filter=lfs diff=lfs merge=lfs -text
+a_mllm_notebooks/vllm/cat.jpg filter=lfs diff=lfs merge=lfs -text
+a_mllm_notebooks/openai/image.jpg filter=lfs diff=lfs merge=lfs -text

a_mllm_notebooks/openai/image.jpg ADDED Viewed

Git LFS Details

SHA256: dea9e7ef97386345f7cff32f9055da4982da5471c48d575146c796ab4563b04e
Pointer size: 131 Bytes
Size of remote file: 173 kB

a_mllm_notebooks/openai/infer.py ADDED Viewed

	@@ -0,0 +1,167 @@

+# !pip install openai
+from openai import OpenAI
+from tqdm import tqdm
+client = OpenAI(api_key="YOUR_API_KEY", base_url="http://0.0.0.0:7089/v1")
+model_name = client.models.list().data[0].id
+NUM_MODEL = len(client.models.list().data)
+NUM_THREAD = min(int(NUM_MODEL * 1.5), 32)
+import datasets, huggingface_hub
+disk_path ='/dscilab_dungvo/workspace/BA-PRE_THESIS/dataset_pretraining/SYNTH-PEDES/annotation_english_vietnamese_processed'
+dataset = datasets.load_from_disk(disk_path)
+# Dataset({
+#     features: ['image_name', 'person_id', 'caption_0', 'caption_1', 'attributes', 'prompt_caption', 'image', 'viet_captions', 'viet_prompt_caption'],
+#     num_rows: 4791127
+# })
+# {'image_name': 'Part1/1/0.jpg',
+#  'person_id': 1,
+#  'caption_0': 'A woman with black hair and she is wearing a black jacket with blue jeans paired with black shoes.',
+#  'caption_1': '',
+#  'attributes': 'woman,short hair,black jacket,blue denim jeans,black sneakers,black backpack',
+#  'prompt_caption': 'The woman has short hair. She is wearing a black jacket, blue denim jeans and black sneakers. She is carrying a black backpack. ',
+#  'image': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=59x129>,
+#  'viet_captions': ['Một người phụ nữ với mái tóc đen và cô ấy đang mặc một chiếc áo khoác màu đen với quần jean màu xanh kết hợp với giày đen.'],
+#  'viet_prompt_caption': ['Người phụ nữ có mái tóc ngắn. Cô đang mặc một chiếc áo khoác màu đen, quần jean denim màu xanh và giày thể thao màu đen. Cô đang mang theo một ba lô màu đen.']}
+def get_output(english_text):
+    response = client.chat.completions.create(
+        model=model_name,
+        messages=[
+            {
+                "role": "system",
+                "content": "You are a helpful assistant who is proficient in translating English to Chinese.",
+            },
+            {
+                "role": "user",
+                "content": "Please translate and paraphrase the following sentence into natural, fluent Chinese: " + english_text,
+            },
+        ],
+        temperature=0.7,
+        top_p=0.9,
+    )
+    return response.choices[0].message.content
+output_root_folder = './output_chinese'
+import os
+# make dir
+os.makedirs(output_root_folder, exist_ok=True)
+# multithread: NUM_THREAD threads
+import threading
+import time
+# def get_list_partition_index(n, num_partition):
+#     partition_size = n // num_partition
+#     partition_index = []
+#     for i in range(num_partition):
+#         if i == num_partition - 1:
+#             partition_index.append((i * partition_size, n))
+#         else:
+#             partition_index.append((i * partition_size, (i + 1) * partition_size))
+#     return partition_index
+# /dscilab_dungvo/workspace/vlm_clone/a_mllm_notebooks/openai/output_chinese/thread_32/4509280.json
+def get_uninferenced_indices(total_indices, output_dir):
+    inferenced_indices = set()
+    for thread_folder in os.listdir(output_dir):
+        if 'thread' not in thread_folder:
+            continue
+        thread_path = os.path.join(output_dir, thread_folder)
+        if os.path.isdir(thread_path):
+            for json_file in os.listdir(thread_path):
+                try:
+                    index = json_file.split('.')[0]
+                    index = int(index)
+                except:
+                    print(f"Error: {json_file}")
+                    continue
+                inferenced_indices.add(index)
+    uninferenced_indices = [index for index in total_indices if index not in inferenced_indices]
+    return uninferenced_indices
+total_indices = list(range(len(dataset)))
+REMAIN_INDEXES = get_uninferenced_indices(total_indices, output_root_folder)
+def get_list_partition_from_list_index(list_index, num_partition):
+    n = len(list_index)
+    partition_size = n // num_partition
+    partition_index = []
+    for i in range(num_partition):
+        if i == num_partition - 1:
+            partition_index.append(list_index[i * partition_size:])
+        else:
+            partition_index.append(list_index[i * partition_size:(i + 1) * partition_size])
+    return partition_index
+# LIST_PARTITION_INDEX is list of list of index
+LIST_PARTITION_INDEX = get_list_partition_from_list_index(REMAIN_INDEXES, NUM_THREAD)
+import json
+# Each thread do a loop in its partition index. for each index, get the chinese translation for: prompt_caption, caption_0, caption_1
+def thread_function(thread_id):
+    # make output folder for this thread
+    os.makedirs(os.path.join(output_root_folder, f"thread_{thread_id}"), exist_ok=True)
+    list_index = LIST_PARTITION_INDEX[thread_id]
+    for i in tqdm(range(len(list_index))):
+        if i % 1000 == 0:
+            print(f"Thread {thread_id}: {i}/{len(list_index)}")
+        index = list_index[i]
+        item = dataset[index]
+        dump_item = {}
+        for key in ['prompt_caption', 'caption_0', 'caption_1']:
+            english_text = item[key]
+            if english_text == '':
+                chinese_text = ''
+            else:
+                chinese_text = get_output(english_text)
+            dump_item[key + '_chinese'] = chinese_text
+        # dump to json file
+        with open(os.path.join(output_root_folder, f"thread_{thread_id}", f"{index}.json"), 'w') as f:
+            json.dump(dump_item, f)
+    print(f"Thread {thread_id}: Done")
+threads = []
+# for i, (start, end) in enumerate(LIST_PARTITION_INDEX):
+for i in range(NUM_THREAD):
+    x = threading.Thread(target=thread_function, args=(i,))
+    threads.append(x)
+    x.start()
+    time.sleep(1)
+for thread in threads:
+    thread.join()
+print("Done")
+# # Combine all json files in each thread folder to a single json file
+# import os
+# import json
+# list_json_files = []
+# for thread_folder in os.listdir(output_file):
+#     for json_file in os.listdir(os.path.join(output_file, thread_folder)):
+#         list_json_files.append(os.path.join(output_file, thread_folder, json_file))
+# output_json_file = './output_chinese.json'
+# with open(output_json_file, 'w') as f:
+#     for json_file in list_json_files:
+#         with open(json_file, 'r') as f_json:
+#             json.dump(json.load(f_json), f)
+#             f.write('\n')

a_mllm_notebooks/openai/infer.sh ADDED Viewed

	@@ -0,0 +1,4 @@

+eval "$(conda shell.bash hook)"
+conda activate lmdeploy
+python infer.py &

a_mllm_notebooks/openai/langchain_openai_api.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

a_mllm_notebooks/openai/load_synth_pedes.ipynb ADDED Viewed

	@@ -0,0 +1,96 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import datasets, huggingface_hub\n",
+    "# huggingface_hub.login(token=\"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\")\n",
+    "disk_path ='/dscilab_dungvo/workspace/BA-PRE_THESIS/dataset_pretraining/SYNTH-PEDES/annotation_english_vietnamese_processed'\n",
+    "dataset = datasets.load_from_disk(disk_path)\n",
+    "# dataset = dataset.cast_column('image', datasets.Image(decode=True))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Dataset({\n",
+       "    features: ['image_name', 'person_id', 'caption_0', 'caption_1', 'attributes', 'prompt_caption', 'image', 'viet_captions', 'viet_prompt_caption'],\n",
+       "    num_rows: 4791127\n",
+       "})"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'image_name': 'Part1/1/0.jpg',\n",
+       " 'person_id': 1,\n",
+       " 'caption_0': 'A woman with black hair and she is wearing a black jacket with blue jeans paired with black shoes.',\n",
+       " 'caption_1': '',\n",
+       " 'attributes': 'woman,short hair,black jacket,blue denim jeans,black sneakers,black backpack',\n",
+       " 'prompt_caption': 'The woman has short hair. She is wearing a black jacket, blue denim jeans and black sneakers. She is carrying a black backpack. ',\n",
+       " 'image': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=59x129>,\n",
+       " 'viet_captions': ['Một người phụ nữ với mái tóc đen và cô ấy đang mặc một chiếc áo khoác màu đen với quần jean màu xanh kết hợp với giày đen.'],\n",
+       " 'viet_prompt_caption': ['Người phụ nữ có mái tóc ngắn. Cô đang mặc một chiếc áo khoác màu đen, quần jean denim màu xanh và giày thể thao màu đen. Cô đang mang theo một ba lô màu đen.']}"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "dataset[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

a_mllm_notebooks/openai/ping_server.ipynb ADDED Viewed

	@@ -0,0 +1,416 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# !pip install openai\n",
+    "from openai import OpenAI\n",
+    "\n",
+    "client = OpenAI(api_key=\"YOUR_API_KEY\", base_url=\"http://0.0.0.0:8092/v1\")\n",
+    "model_name = client.models.list().data[0].id\n",
+    "# response = client.chat.completions.create(\n",
+    "#     model=model_name,\n",
+    "#     messages=[\n",
+    "#         {\n",
+    "#             \"role\": \"system\",\n",
+    "#             \"content\": \"You are a helpful assistant who is proficient in translating English to Chinese.\",\n",
+    "#         },\n",
+    "#         {\n",
+    "#             \"role\": \"user\",\n",
+    "#             \"content\": \"Please translate and paraphrase the following sentence into natural, fluent Chinese: \",\n",
+    "#         },\n",
+    "#     ],\n",
+    "#     temperature=0.8,\n",
+    "#     top_p=0.9,\n",
+    "# )\n",
+    "# print(response)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(client.models.list().data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'这个人穿着红色的衬衫和蓝色的牛仔裤。'"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "def get_output(english_text):\n",
+    "    response = client.chat.completions.create(\n",
+    "        model=model_name,\n",
+    "        messages=[\n",
+    "            {\n",
+    "                \"role\": \"system\",\n",
+    "                \"content\": \"You are a helpful assistant who is proficient in translating English to Chinese.\",\n",
+    "            },\n",
+    "            {\n",
+    "                \"role\": \"user\",\n",
+    "                \"content\": \"Please translate and paraphrase the following sentence into natural, fluent Chinese: \" + english_text,\n",
+    "            },\n",
+    "        ],\n",
+    "        temperature=0.7,\n",
+    "        top_p=0.9,\n",
+    "    )\n",
+    "    return response.choices[0].message.content\n",
+    "\n",
+    "o = get_output(\"The man is wearing a red shirt and blue jeans.\" * 1)\n",
+    "o"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'```\\nparaphrase:\\n- 1: The man is walking. He is wearing a dark grey jacket that fits closely. His black trousers are tight. He is wearing brown shoes.\\n- 2: As he walks, the man is dressed in a dark grey jacket that hugs his body. His black trousers are snug, and he is wearing brown shoes.\\n- 3: The man is out for a walk, wearing a dark grey jacket that fits snugly. His black trousers are tight, and he is wearing brown shoes.\\n```'"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "def get_output(english_text):\n",
+    "    response = client.chat.completions.create(\n",
+    "        model=model_name,\n",
+    "        messages=[\n",
+    "            {\n",
+    "                \"role\": \"system\",\n",
+    "                \"content\": \"You are a helpful assistant who is proficient in paraphrase text\",\n",
+    "            },\n",
+    "            {\n",
+    "                \"role\": \"user\",\n",
+    "                \"content\": english_text,\n",
+    "            },\n",
+    "        ],\n",
+    "        temperature=0.7,\n",
+    "        top_p=0.9,\n",
+    "    )\n",
+    "    return response.choices[0].message.content\n",
+    "\n",
+    "o = get_output('''\n",
+    "Please correct the grammar of this text, then paraphrase it into 3 different ways:\n",
+    "The man is walking. He is wearing a dark grey jacket that is close. His trousers is black. He has a pair of brown shoes. \n",
+    "The man is walking. He is wearing a dark grey jacket that is close. His trousers is black. He has a pair of brown shoes.\n",
+    "The man is walking. He is wearing a dark grey jacket that is close. His trousers is black. He has a pair of brown shoes.\n",
+    "The man is walking. He is wearing a dark grey jacket that is close. His trousers is black. He has a pair of brown shoes.\n",
+    "The man is walking. He is wearing a dark grey jacket that is close. His trousers is black. He has a pair of brown shoes.\n",
+    "The man is walking. He is wearing a dark grey jacket that is close. His trousers is black. He has a pair of brown shoes.\n",
+    "The man is walking. He is wearing a dark grey jacket that is close. His trousers is black. He has a pair of brown shoes.\n",
+    "\n",
+    "\n",
+    "Return your answer in YAML format without comment or explaining anything.\n",
+    "Example:\n",
+    "```\n",
+    "paraphrase:\n",
+    "- 1: ...\n",
+    "- 2: ...\n",
+    "- 3: ...\n",
+    "```\n",
+    "''')\n",
+    "o"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1.06 s ± 39.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%timeit\n",
+    "o = get_output('''\n",
+    "Please correct the grammar of this text, then paraphrase it into 3 different ways:\n",
+    "The man is walking. He is wearing a dark grey jacket that is close. His trousers is black. He has a pair of brown shoes.\n",
+    "The man is walking. He is wearing a dark grey jacket that is close. His trousers is black. He has a pair of brown shoes.\n",
+    "The man is walking. He is wearing a dark grey jacket that is close. His trousers is black. He has a pair of brown shoes.\n",
+    "The man is walking. He is wearing a dark grey jacket that is close. His trousers is black. He has a pair of brown shoes.\n",
+    "\n",
+    "Return your answer in YAML format without comment or explaining anything.\n",
+    "Example:\n",
+    "```\n",
+    "paraphrase:\n",
+    "- 1: ...\n",
+    "- 2: ...\n",
+    "- 3: ...\n",
+    "```\n",
+    "''')\n",
+    "o"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'paraphrase': [{1: 'The man is walking in a dark grey jacket and black trousers, wearing a pair of brown shoes.'},\n",
+       "  {2: 'As he walks, the man is dressed in a dark grey jacket, black trousers, and brown shoes.'},\n",
+       "  {3: 'The man is on his walk, wearing a dark grey jacket, black trousers, and brown shoes.'}]}"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import yaml\n",
+    "\n",
+    "def load_yaml_string(yaml_string):\n",
+    "    # Remove the triple backticks and any leading/trailing whitespace\n",
+    "    yaml_string = yaml_string.strip('```').strip()\n",
+    "    \n",
+    "    # Load the YAML string into a Python dictionary\n",
+    "    data = yaml.safe_load(yaml_string)\n",
+    "    \n",
+    "    return data\n",
+    "load_yaml_string(o)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "21"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# !ps aux|grep infer|grep -v grep | awk '{print $2}'|xargs kill -9"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "APIConnectionError",
+     "evalue": "Connection error.",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mConnectError\u001b[0m                              Traceback (most recent call last)",
+      "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/httpx/_transports/default.py:101\u001b[0m, in \u001b[0;36mmap_httpcore_exceptions\u001b[0;34m()\u001b[0m\n\u001b[1;32m    100\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 101\u001b[0m     \u001b[38;5;28;01myield\u001b[39;00m\n\u001b[1;32m    102\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n",
+      "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/httpx/_transports/default.py:250\u001b[0m, in \u001b[0;36mHTTPTransport.handle_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m    249\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m map_httpcore_exceptions():\n\u001b[0;32m--> 250\u001b[0m     resp \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_pool\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mhandle_request\u001b[49m\u001b[43m(\u001b[49m\u001b[43mreq\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    252\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(resp\u001b[38;5;241m.\u001b[39mstream, typing\u001b[38;5;241m.\u001b[39mIterable)\n",
+      "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/httpcore/_sync/connection_pool.py:256\u001b[0m, in \u001b[0;36mConnectionPool.handle_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m    255\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_close_connections(closing)\n\u001b[0;32m--> 256\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m exc \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m    258\u001b[0m \u001b[38;5;66;03m# Return the response. Note that in this case we still have to manage\u001b[39;00m\n\u001b[1;32m    259\u001b[0m \u001b[38;5;66;03m# the point at which the response is closed.\u001b[39;00m\n",
+      "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/httpcore/_sync/connection_pool.py:236\u001b[0m, in \u001b[0;36mConnectionPool.handle_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m    234\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m    235\u001b[0m     \u001b[38;5;66;03m# Send the request on the assigned connection.\u001b[39;00m\n\u001b[0;32m--> 236\u001b[0m     response \u001b[38;5;241m=\u001b[39m \u001b[43mconnection\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mhandle_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    237\u001b[0m \u001b[43m        \u001b[49m\u001b[43mpool_request\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\n\u001b[1;32m    238\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    239\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ConnectionNotAvailable:\n\u001b[1;32m    240\u001b[0m     \u001b[38;5;66;03m# In some cases a connection may initially be available to\u001b[39;00m\n\u001b[1;32m    241\u001b[0m     \u001b[38;5;66;03m# handle a request, but then become unavailable.\u001b[39;00m\n\u001b[1;32m    242\u001b[0m     \u001b[38;5;66;03m#\u001b[39;00m\n\u001b[1;32m    243\u001b[0m     \u001b[38;5;66;03m# In this case we clear the connection and try again.\u001b[39;00m\n",
+      "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/httpcore/_sync/connection.py:101\u001b[0m, in \u001b[0;36mHTTPConnection.handle_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m    100\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_connect_failed \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[0;32m--> 101\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m exc\n\u001b[1;32m    103\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_connection\u001b[38;5;241m.\u001b[39mhandle_request(request)\n",
+      "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/httpcore/_sync/connection.py:78\u001b[0m, in \u001b[0;36mHTTPConnection.handle_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m     77\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_connection \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m---> 78\u001b[0m     stream \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_connect\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     80\u001b[0m     ssl_object \u001b[38;5;241m=\u001b[39m stream\u001b[38;5;241m.\u001b[39mget_extra_info(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mssl_object\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
+      "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/httpcore/_sync/connection.py:124\u001b[0m, in \u001b[0;36mHTTPConnection._connect\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m    123\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m Trace(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mconnect_tcp\u001b[39m\u001b[38;5;124m\"\u001b[39m, logger, request, kwargs) \u001b[38;5;28;01mas\u001b[39;00m trace:\n\u001b[0;32m--> 124\u001b[0m     stream \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_network_backend\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconnect_tcp\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    125\u001b[0m     trace\u001b[38;5;241m.\u001b[39mreturn_value \u001b[38;5;241m=\u001b[39m stream\n",
+      "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/httpcore/_backends/sync.py:207\u001b[0m, in \u001b[0;36mSyncBackend.connect_tcp\u001b[0;34m(self, host, port, timeout, local_address, socket_options)\u001b[0m\n\u001b[1;32m    202\u001b[0m exc_map: ExceptionMapping \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m    203\u001b[0m     socket\u001b[38;5;241m.\u001b[39mtimeout: ConnectTimeout,\n\u001b[1;32m    204\u001b[0m     \u001b[38;5;167;01mOSError\u001b[39;00m: ConnectError,\n\u001b[1;32m    205\u001b[0m }\n\u001b[0;32m--> 207\u001b[0m \u001b[43m\u001b[49m\u001b[38;5;28;43;01mwith\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mmap_exceptions\u001b[49m\u001b[43m(\u001b[49m\u001b[43mexc_map\u001b[49m\u001b[43m)\u001b[49m\u001b[43m:\u001b[49m\n\u001b[1;32m    208\u001b[0m \u001b[43m    \u001b[49m\u001b[43msock\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43msocket\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcreate_connection\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    209\u001b[0m \u001b[43m        \u001b[49m\u001b[43maddress\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    210\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    211\u001b[0m \u001b[43m        \u001b[49m\u001b[43msource_address\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msource_address\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    212\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/usr/lib/python3.11/contextlib.py:155\u001b[0m, in \u001b[0;36m_GeneratorContextManager.__exit__\u001b[0;34m(self, typ, value, traceback)\u001b[0m\n\u001b[1;32m    154\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 155\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgen\u001b[38;5;241m.\u001b[39mthrow(typ, value, traceback)\n\u001b[1;32m    156\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[1;32m    157\u001b[0m     \u001b[38;5;66;03m# Suppress StopIteration *unless* it's the same exception that\u001b[39;00m\n\u001b[1;32m    158\u001b[0m     \u001b[38;5;66;03m# was passed to throw().  This prevents a StopIteration\u001b[39;00m\n\u001b[1;32m    159\u001b[0m     \u001b[38;5;66;03m# raised inside the \"with\" statement from being suppressed.\u001b[39;00m\n",
+      "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/httpcore/_exceptions.py:14\u001b[0m, in \u001b[0;36mmap_exceptions\u001b[0;34m(map)\u001b[0m\n\u001b[1;32m     13\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(exc, from_exc):\n\u001b[0;32m---> 14\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m to_exc(exc) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mexc\u001b[39;00m\n\u001b[1;32m     15\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m\n",
+      "\u001b[0;31mConnectError\u001b[0m: [Errno 111] Connection refused",
+      "\nThe above exception was the direct cause of the following exception:\n",
+      "\u001b[0;31mConnectError\u001b[0m                              Traceback (most recent call last)",
+      "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/openai/_base_client.py:993\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m    992\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 993\u001b[0m     response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_client\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    994\u001b[0m \u001b[43m        \u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    995\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_should_stream_response_body\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    996\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    997\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    998\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m httpx\u001b[38;5;241m.\u001b[39mTimeoutException \u001b[38;5;28;01mas\u001b[39;00m err:\n",
+      "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/httpx/_client.py:914\u001b[0m, in \u001b[0;36mClient.send\u001b[0;34m(self, request, stream, auth, follow_redirects)\u001b[0m\n\u001b[1;32m    912\u001b[0m auth \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_build_request_auth(request, auth)\n\u001b[0;32m--> 914\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_send_handling_auth\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    915\u001b[0m \u001b[43m    \u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    916\u001b[0m \u001b[43m    \u001b[49m\u001b[43mauth\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mauth\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    917\u001b[0m \u001b[43m    \u001b[49m\u001b[43mfollow_redirects\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfollow_redirects\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    918\u001b[0m \u001b[43m    \u001b[49m\u001b[43mhistory\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    919\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    920\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n",
+      "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/httpx/_client.py:942\u001b[0m, in \u001b[0;36mClient._send_handling_auth\u001b[0;34m(self, request, auth, follow_redirects, history)\u001b[0m\n\u001b[1;32m    941\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m--> 942\u001b[0m     response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_send_handling_redirects\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    943\u001b[0m \u001b[43m        \u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    944\u001b[0m \u001b[43m        \u001b[49m\u001b[43mfollow_redirects\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfollow_redirects\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    945\u001b[0m \u001b[43m        \u001b[49m\u001b[43mhistory\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhistory\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    946\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    947\u001b[0m     \u001b[38;5;28;01mtry\u001b[39;00m:\n",
+      "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/httpx/_client.py:979\u001b[0m, in \u001b[0;36mClient._send_handling_redirects\u001b[0;34m(self, request, follow_redirects, history)\u001b[0m\n\u001b[1;32m    977\u001b[0m     hook(request)\n\u001b[0;32m--> 979\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_send_single_request\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    980\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n",
+      "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/httpx/_client.py:1014\u001b[0m, in \u001b[0;36mClient._send_single_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m   1013\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m request_context(request\u001b[38;5;241m=\u001b[39mrequest):\n\u001b[0;32m-> 1014\u001b[0m     response \u001b[38;5;241m=\u001b[39m \u001b[43mtransport\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mhandle_request\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1016\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(response\u001b[38;5;241m.\u001b[39mstream, SyncByteStream)\n",
+      "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/httpx/_transports/default.py:249\u001b[0m, in \u001b[0;36mHTTPTransport.handle_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m    237\u001b[0m req \u001b[38;5;241m=\u001b[39m httpcore\u001b[38;5;241m.\u001b[39mRequest(\n\u001b[1;32m    238\u001b[0m     method\u001b[38;5;241m=\u001b[39mrequest\u001b[38;5;241m.\u001b[39mmethod,\n\u001b[1;32m    239\u001b[0m     url\u001b[38;5;241m=\u001b[39mhttpcore\u001b[38;5;241m.\u001b[39mURL(\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    247\u001b[0m     extensions\u001b[38;5;241m=\u001b[39mrequest\u001b[38;5;241m.\u001b[39mextensions,\n\u001b[1;32m    248\u001b[0m )\n\u001b[0;32m--> 249\u001b[0m \u001b[43m\u001b[49m\u001b[38;5;28;43;01mwith\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mmap_httpcore_exceptions\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m:\u001b[49m\n\u001b[1;32m    250\u001b[0m \u001b[43m    \u001b[49m\u001b[43mresp\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_pool\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mhandle_request\u001b[49m\u001b[43m(\u001b[49m\u001b[43mreq\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/usr/lib/python3.11/contextlib.py:155\u001b[0m, in \u001b[0;36m_GeneratorContextManager.__exit__\u001b[0;34m(self, typ, value, traceback)\u001b[0m\n\u001b[1;32m    154\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 155\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgen\u001b[38;5;241m.\u001b[39mthrow(typ, value, traceback)\n\u001b[1;32m    156\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[1;32m    157\u001b[0m     \u001b[38;5;66;03m# Suppress StopIteration *unless* it's the same exception that\u001b[39;00m\n\u001b[1;32m    158\u001b[0m     \u001b[38;5;66;03m# was passed to throw().  This prevents a StopIteration\u001b[39;00m\n\u001b[1;32m    159\u001b[0m     \u001b[38;5;66;03m# raised inside the \"with\" statement from being suppressed.\u001b[39;00m\n",
+      "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/httpx/_transports/default.py:118\u001b[0m, in \u001b[0;36mmap_httpcore_exceptions\u001b[0;34m()\u001b[0m\n\u001b[1;32m    117\u001b[0m message \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mstr\u001b[39m(exc)\n\u001b[0;32m--> 118\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m mapped_exc(message) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mexc\u001b[39;00m\n",
+      "\u001b[0;31mConnectError\u001b[0m: [Errno 111] Connection refused",
+      "\nThe above exception was the direct cause of the following exception:\n",
+      "\u001b[0;31mAPIConnectionError\u001b[0m                        Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[5], line 6\u001b[0m\n\u001b[1;32m      3\u001b[0m port \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m2000\u001b[39m\n\u001b[1;32m      5\u001b[0m client \u001b[38;5;241m=\u001b[39m OpenAI(api_key\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mYOUR_API_KEY\u001b[39m\u001b[38;5;124m\"\u001b[39m, base_url\u001b[38;5;241m=\u001b[39m\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhttp://0.0.0.0:\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mport\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/v1\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m----> 6\u001b[0m model_name \u001b[38;5;241m=\u001b[39m \u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodels\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlist\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mdata[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39mid\n\u001b[1;32m      7\u001b[0m response \u001b[38;5;241m=\u001b[39m client\u001b[38;5;241m.\u001b[39mchat\u001b[38;5;241m.\u001b[39mcompletions\u001b[38;5;241m.\u001b[39mcreate(\n\u001b[1;32m      8\u001b[0m     model\u001b[38;5;241m=\u001b[39mmodel_name,\n\u001b[1;32m      9\u001b[0m     messages\u001b[38;5;241m=\u001b[39m[\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     27\u001b[0m     top_p\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0.8\u001b[39m,\n\u001b[1;32m     28\u001b[0m )\n\u001b[1;32m     29\u001b[0m \u001b[38;5;28mprint\u001b[39m(response)\n",
+      "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/openai/resources/models.py:91\u001b[0m, in \u001b[0;36mModels.list\u001b[0;34m(self, extra_headers, extra_query, extra_body, timeout)\u001b[0m\n\u001b[1;32m     77\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mlist\u001b[39m(\n\u001b[1;32m     78\u001b[0m     \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m     79\u001b[0m     \u001b[38;5;241m*\u001b[39m,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     85\u001b[0m     timeout: \u001b[38;5;28mfloat\u001b[39m \u001b[38;5;241m|\u001b[39m httpx\u001b[38;5;241m.\u001b[39mTimeout \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m|\u001b[39m NotGiven \u001b[38;5;241m=\u001b[39m NOT_GIVEN,\n\u001b[1;32m     86\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m SyncPage[Model]:\n\u001b[1;32m     87\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m     88\u001b[0m \u001b[38;5;124;03m    Lists the currently available models, and provides basic information about each\u001b[39;00m\n\u001b[1;32m     89\u001b[0m \u001b[38;5;124;03m    one such as the owner and availability.\u001b[39;00m\n\u001b[1;32m     90\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[0;32m---> 91\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_api_list\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m     92\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/models\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m     93\u001b[0m \u001b[43m        \u001b[49m\u001b[43mpage\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mSyncPage\u001b[49m\u001b[43m[\u001b[49m\u001b[43mModel\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     94\u001b[0m \u001b[43m        \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmake_request_options\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m     95\u001b[0m \u001b[43m            \u001b[49m\u001b[43mextra_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_headers\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_query\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_query\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_body\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_body\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\n\u001b[1;32m     96\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     97\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mModel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     98\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/openai/_base_client.py:1329\u001b[0m, in \u001b[0;36mSyncAPIClient.get_api_list\u001b[0;34m(self, path, model, page, body, options, method)\u001b[0m\n\u001b[1;32m   1318\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget_api_list\u001b[39m(\n\u001b[1;32m   1319\u001b[0m     \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m   1320\u001b[0m     path: \u001b[38;5;28mstr\u001b[39m,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1326\u001b[0m     method: \u001b[38;5;28mstr\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mget\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m   1327\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m SyncPageT:\n\u001b[1;32m   1328\u001b[0m     opts \u001b[38;5;241m=\u001b[39m FinalRequestOptions\u001b[38;5;241m.\u001b[39mconstruct(method\u001b[38;5;241m=\u001b[39mmethod, url\u001b[38;5;241m=\u001b[39mpath, json_data\u001b[38;5;241m=\u001b[39mbody, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39moptions)\n\u001b[0;32m-> 1329\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request_api_list\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpage\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mopts\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/openai/_base_client.py:1180\u001b[0m, in \u001b[0;36mSyncAPIClient._request_api_list\u001b[0;34m(self, model, page, options)\u001b[0m\n\u001b[1;32m   1176\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m resp\n\u001b[1;32m   1178\u001b[0m options\u001b[38;5;241m.\u001b[39mpost_parser \u001b[38;5;241m=\u001b[39m _parser\n\u001b[0;32m-> 1180\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpage\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/openai/_base_client.py:957\u001b[0m, in \u001b[0;36mSyncAPIClient.request\u001b[0;34m(self, cast_to, options, remaining_retries, stream, stream_cls)\u001b[0m\n\u001b[1;32m    954\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    955\u001b[0m     retries_taken \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[0;32m--> 957\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    958\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    959\u001b[0m \u001b[43m    \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    960\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    961\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    962\u001b[0m \u001b[43m    \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    963\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/openai/_base_client.py:1017\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1014\u001b[0m log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mEncountered Exception\u001b[39m\u001b[38;5;124m\"\u001b[39m, exc_info\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m   1016\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m remaining_retries \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[0;32m-> 1017\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_retry_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1018\u001b[0m \u001b[43m        \u001b[49m\u001b[43minput_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1019\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1020\u001b[0m \u001b[43m        \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1021\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1022\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1023\u001b[0m \u001b[43m        \u001b[49m\u001b[43mresponse_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m   1024\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1026\u001b[0m log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRaising connection error\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m   1027\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m APIConnectionError(request\u001b[38;5;241m=\u001b[39mrequest) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merr\u001b[39;00m\n",
+      "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/openai/_base_client.py:1095\u001b[0m, in \u001b[0;36mSyncAPIClient._retry_request\u001b[0;34m(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1091\u001b[0m \u001b[38;5;66;03m# In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a\u001b[39;00m\n\u001b[1;32m   1092\u001b[0m \u001b[38;5;66;03m# different thread if necessary.\u001b[39;00m\n\u001b[1;32m   1093\u001b[0m time\u001b[38;5;241m.\u001b[39msleep(timeout)\n\u001b[0;32m-> 1095\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1096\u001b[0m \u001b[43m    \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1097\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1098\u001b[0m \u001b[43m    \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1099\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1100\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1101\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/openai/_base_client.py:1017\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1014\u001b[0m log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mEncountered Exception\u001b[39m\u001b[38;5;124m\"\u001b[39m, exc_info\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m   1016\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m remaining_retries \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[0;32m-> 1017\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_retry_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1018\u001b[0m \u001b[43m        \u001b[49m\u001b[43minput_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1019\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1020\u001b[0m \u001b[43m        \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1021\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1022\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1023\u001b[0m \u001b[43m        \u001b[49m\u001b[43mresponse_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m   1024\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1026\u001b[0m log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRaising connection error\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m   1027\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m APIConnectionError(request\u001b[38;5;241m=\u001b[39mrequest) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merr\u001b[39;00m\n",
+      "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/openai/_base_client.py:1095\u001b[0m, in \u001b[0;36mSyncAPIClient._retry_request\u001b[0;34m(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1091\u001b[0m \u001b[38;5;66;03m# In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a\u001b[39;00m\n\u001b[1;32m   1092\u001b[0m \u001b[38;5;66;03m# different thread if necessary.\u001b[39;00m\n\u001b[1;32m   1093\u001b[0m time\u001b[38;5;241m.\u001b[39msleep(timeout)\n\u001b[0;32m-> 1095\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1096\u001b[0m \u001b[43m    \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1097\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1098\u001b[0m \u001b[43m    \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1099\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1100\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1101\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/openai/_base_client.py:1027\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1017\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_retry_request(\n\u001b[1;32m   1018\u001b[0m             input_options,\n\u001b[1;32m   1019\u001b[0m             cast_to,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1023\u001b[0m             response_headers\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m   1024\u001b[0m         )\n\u001b[1;32m   1026\u001b[0m     log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRaising connection error\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 1027\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m APIConnectionError(request\u001b[38;5;241m=\u001b[39mrequest) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merr\u001b[39;00m\n\u001b[1;32m   1029\u001b[0m log\u001b[38;5;241m.\u001b[39mdebug(\n\u001b[1;32m   1030\u001b[0m     \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mHTTP Response: \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m%i\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m   1031\u001b[0m     request\u001b[38;5;241m.\u001b[39mmethod,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1035\u001b[0m     response\u001b[38;5;241m.\u001b[39mheaders,\n\u001b[1;32m   1036\u001b[0m )\n\u001b[1;32m   1037\u001b[0m log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrequest_id: \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m, response\u001b[38;5;241m.\u001b[39mheaders\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mx-request-id\u001b[39m\u001b[38;5;124m\"\u001b[39m))\n",
+      "\u001b[0;31mAPIConnectionError\u001b[0m: Connection error."
+     ]
+    }
+   ],
+   "source": [
+    "from openai import OpenAI\n",
+    "\n",
+    "port = 2000\n",
+    "\n",
+    "client = OpenAI(api_key=\"YOUR_API_KEY\", base_url=f\"http://0.0.0.0:{port}/v1\")\n",
+    "model_name = client.models.list().data[0].id\n",
+    "response = client.chat.completions.create(\n",
+    "    model=model_name,\n",
+    "    messages=[\n",
+    "        {\n",
+    "            \"role\": \"user\",\n",
+    "            \"content\": [\n",
+    "                {\n",
+    "                    \"type\": \"text\",\n",
+    "                    \"text\": \"Miêu tả bức tranh giùm coi\",\n",
+    "                },\n",
+    "                {\n",
+    "                    \"type\": \"image_url\",\n",
+    "                    \"image_url\": {\n",
+    "                        \"url\": \"https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/tests/data/tiger.jpeg\",\n",
+    "                    },\n",
+    "                },\n",
+    "            ],\n",
+    "        }\n",
+    "    ],\n",
+    "    temperature=0.8,\n",
+    "    top_p=0.8,\n",
+    ")\n",
+    "print(response)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_name"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "response.choices[0].message.content"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current\n",
+      "                                 Dload  Upload   Total   Spent    Left  Speed\n",
+      "100   617  100   404  100   213   5970   3147 --:--:-- --:--:-- --:--:--  9208\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{\"id\":\"chatcmpl-8b3b1360415d4805a44f33bd81fc3447\",\"object\":\"chat.completion\",\"created\":1734879441,\"model\":\"Qwen/Qwen2.5-1.5B-Instruct\",\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"巴黎\",\"tool_calls\":[]},\"logprobs\":null,\"finish_reason\":\"stop\",\"stop_reason\":null}],\"usage\":{\"prompt_tokens\":48,\"total_tokens\":50,\"completion_tokens\":2,\"prompt_tokens_details\":null},\"prompt_logprobs\":null}"
+     ]
+    }
+   ],
+   "source": [
+    "%%bash\n",
+    "# Call the server using curl:\n",
+    "curl -X POST \"http://localhost:8000/v1/chat/completions\" \\\n",
+    "\t-H \"Content-Type: application/json\" \\\n",
+    "\t--data '{\n",
+    "\t\t\"model\": \"Qwen/Qwen2.5-1.5B-Instruct\",\n",
+    "\t\t\"messages\": [\n",
+    "\t\t\t{\n",
+    "\t\t\t\t\"role\": \"user\",\n",
+    "\t\t\t\t\"content\": \"What is the capital of France? You must answer in Chinese without adding any comment or explanation.\"\n",
+    "\t\t\t}\n",
+    "\t\t]\n",
+    "\t}'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "lmdeploy",
+   "language": "python",
+   "name": "lmdeploy"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.19"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

a_mllm_notebooks/openai/proxy.sh ADDED Viewed

	@@ -0,0 +1,10 @@

+eval "$(conda shell.bash hook)"
+conda activate lmdeploy
+MODEL_NAME=Qwen/Qwen2.5-1.5B-Instruct-AWQ
+PROXY_URL=0.0.0.0
+lmdeploy serve proxy --server-name $PROXY_URL --server-port 7089 --strategy \
+"min_expected_latency" \
+&

a_mllm_notebooks/openai/serve.sh ADDED Viewed

	@@ -0,0 +1,60 @@

+eval "$(conda shell.bash hook)"
+conda activate lmdeploy
+# # MODEL_NAME=OpenGVLab/InternVL2_5-8B-AWQ
+MODEL_NAME=OpenGVLab/InternVL2_5-4B-MPO-AWQ
+PORT_LIST=( $(seq 5011 1 5011) )
+for PORT in "${PORT_LIST[@]}"; do
+  # get random device id from 0 to 3
+  # RANDOM_DEVICE_ID=$((RANDOM % 3))
+  # RANDOM_DEVICE_ID=3
+    # CUDA_VISIBLE_DEVICES=0,1 \
+    # CUDA_VISIBLE_DEVICES=2,3 \
+  CUDA_VISIBLE_DEVICES=0 \
+  lmdeploy serve api_server $MODEL_NAME \
+  --server-port $PORT \
+  --backend turbomind \
+  --dtype float16 --proxy-url http://0.0.0.0:7089 \
+  --vision-max-batch-size 64 &
+  # --cache-max-entry-count 0.4 &
+  # --tp 1 &
+    # &
+done
+PORT_LIST=( $(seq 5972 1 5972) )
+for PORT in "${PORT_LIST[@]}"; do
+  # get random device id from 0 to 3
+  # RANDOM_DEVICE_ID=$((RANDOM % 3))
+  # RANDOM_DEVICE_ID=3
+    # CUDA_VISIBLE_DEVICES=0,1 \
+    # CUDA_VISIBLE_DEVICES=2,3 \
+  CUDA_VISIBLE_DEVICES=2 \
+  lmdeploy serve api_server $MODEL_NAME \
+  --server-port $PORT \
+  --backend turbomind \
+  --dtype float16 --proxy-url http://0.0.0.0:7089 &
+  # --vision-max-batch-size 64 &
+  # --cache-max-entry-count 0.4 &
+  # --tp 1 &
+    # &
+done
+PORT_LIST=( $(seq 5171 1 5171) )
+for PORT in "${PORT_LIST[@]}"; do
+  # get random device id from 0 to 3
+  # RANDOM_DEVICE_ID=$((RANDOM % 3))
+  # RANDOM_DEVICE_ID=3
+    # CUDA_VISIBLE_DEVICES=0,1 \
+    # CUDA_VISIBLE_DEVICES=2,3 \
+  CUDA_VISIBLE_DEVICES=1 \
+  lmdeploy serve api_server $MODEL_NAME \
+  --server-port $PORT \
+  --backend turbomind \
+  --dtype float16 --proxy-url http://0.0.0.0:7089 \
+  --vision-max-batch-size 64 &
+  # --cache-max-entry-count 0.4 &
+  # --tp 1 &
+    # &
+done

a_mllm_notebooks/openai/temp.json ADDED Viewed

The diff for this file is too large to render. See raw diff

a_mllm_notebooks/openai/temp.sh ADDED Viewed

	@@ -0,0 +1,25 @@

+eval "$(conda shell.bash hook)"
+conda activate lmdeploy
+MODEL_NAME=Qwen/Qwen2.5-1.5B-Instruct-AWQ
+PORT_LIST=( $(seq 3162 1 3162) )
+for PORT in "${PORT_LIST[@]}"; do
+  CUDA_VISIBLE_DEVICES=0,1,2,3 \
+  lmdeploy serve api_server $MODEL_NAME \
+  --server-port $PORT \
+  --backend turbomind \
+  --dtype float16 --proxy-url http://0.0.0.0:8082 \
+  --cache-max-entry-count 0.0075 --tp 3 &
+done
+# # PORT_LIST from 3063 to 3099
+# PORT_LIST=( $(seq 9000 1 9000) )
+# # PORT_LIST=(9898)
+# for PORT in "${PORT_LIST[@]}"; do
+#   CUDA_VISIBLE_DEVICES=3 \
+#   lmdeploy serve api_server $MODEL_NAME \
+#   --server-port $PORT \
+#   --backend turbomind \
+#   --dtype float16 --proxy-url http://0.0.0.0:8082 \
+#   --cache-max-entry-count 0.025 --tp 1 &
+# done

a_mllm_notebooks/tensorrt-llm/clone_folder.ipynb ADDED Viewed

	@@ -0,0 +1,78 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# https://github.com/NVIDIA/TensorRT-LLM/tree/main/examples/bert"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "apps\t\t      generate_checkpoint_config.py  multimodal\n",
+      "arctic\t\t      gpt\t\t\t     nemotron\n",
+      "baichuan\t      gptj\t\t\t     openai_triton\n",
+      "bert\t\t      gptneox\t\t\t     opt\n",
+      "bindings\t      grok\t\t\t     phi\n",
+      "blip2\t\t      hf_lora_convert.py\t     quantization\n",
+      "bloom\t\t      infinitebench\t\t     qwen\n",
+      "chatglm\t\t      internlm\t\t\t     qwenvl\n",
+      "cogvlm\t\t      internlm2\t\t\t     recurrentgemma\n",
+      "cpp\t\t      jais\t\t\t     redrafter\n",
+      "cpp_library\t      llama\t\t\t     run.py\n",
+      "dbrx\t\t      llm-api\t\t\t     sample_weight_stripping\n",
+      "dit\t\t      mamba\t\t\t     skywork\n",
+      "enc_dec\t\t      medusa\t\t\t     smaug\n",
+      "eval_long_context.py  mixtral\t\t\t     summarize.py\n",
+      "exaone\t\t      mmlu.py\t\t\t     utils.py\n",
+      "falcon\t\t      model_api\t\t\t     whisper\n",
+      "gemma\t\t      mpt\n"
+     ]
+    }
+   ],
+   "source": [
+    "!ls ../../TensorRT-LLM/examples\n",
+    "!cp ../../TensorRT-LLM/examples/bert . -r\n",
+    "!cp ../../TensorRT-LLM/examples/blip2 . -r\n",
+    "!cp ../../TensorRT-LLM/examples/multimodal . -r"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "tensorrt-llm",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.14"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

a_mllm_notebooks/vllm/Untitled.ipynb ADDED Viewed

	@@ -0,0 +1,68 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "9b7fb01b-8c7b-4213-b2b8-fb750a4c55a8",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current\n",
+      "                                 Dload  Upload   Total   Spent    Left  Speed\n",
+      "100   123  100    88  100    35  22768   9055 --:--:-- --:--:-- --:--:-- 41000\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\"{\\\"error_code\\\": 10402, \\\"text\\\": \\\"Failed to get response after a period of time\\\"}\\n\""
+     ]
+    }
+   ],
+   "source": [
+    "%%bash\n",
+    "\n",
+    "curl -X 'POST' \\\n",
+    "  'http://localhost:8082/nodes/add' \\\n",
+    "  -H 'accept: application/json' \\\n",
+    "  -H 'Content-Type: application/json' \\\n",
+    "  -d '{\n",
+    "  \"url\": \"http://0.0.0.0:19400\"\n",
+    "}'\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "55fc29ba-43e2-4bdb-b499-cf47266b9c3e",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "lmdeploy",
+   "language": "python",
+   "name": "lmdeploy"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.19"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

a_mllm_notebooks/vllm/cat.jpg ADDED Viewed

Git LFS Details

SHA256: a8e76cfc63e1a0d0eafbbf5ae038c3a4a8ec727b4a57e4de2a9749c0180b8e86
Pointer size: 132 Bytes
Size of remote file: 2.52 MB

a_mllm_notebooks/vllm/cli.md ADDED Viewed

	@@ -0,0 +1,405 @@

+```bash
+usage: vllm serve <model_tag> [options]
+positional arguments:
+  model_tag             The model tag to serve
+options:
+  --allow-credentials   allow credentials
+  --allowed-headers ALLOWED_HEADERS
+                        allowed headers
+  --allowed-local-media-path ALLOWED_LOCAL_MEDIA_PATH
+                        Allowing API requests to read local images or videos from directories
+                        specified by the server file system. This is a security risk. Should only
+                        be enabled in trusted environments.
+  --allowed-methods ALLOWED_METHODS
+                        allowed methods
+  --allowed-origins ALLOWED_ORIGINS
+                        allowed origins
+  --api-key API_KEY     If provided, the server will require this key to be presented in the
+                        header.
+  --block-size {8,16,32,64,128}
+                        Token block size for contiguous chunks of tokens. This is ignored on
+                        neuron devices and set to max-model-len
+  --chat-template CHAT_TEMPLATE
+                        The file path to the chat template, or the template in single-line form
+                        for the specified model
+  --chat-template-content-format {auto,string,openai}
+                        The format to render message content within a chat template. * "string"
+                        will render the content as a string. Example: "Hello World" * "openai"
+                        will render the content as a list of dictionaries, similar to OpenAI
+                        schema. Example: [{"type": "text", "text": "Hello world!"}]
+  --code-revision CODE_REVISION
+                        The specific revision to use for the model code on Hugging Face Hub. It
+                        can be a branch name, a tag name, or a commit id. If unspecified, will use
+                        the default version.
+  --collect-detailed-traces COLLECT_DETAILED_TRACES
+                        Valid choices are model,worker,all. It makes sense to set this only if
+                        --otlp-traces-endpoint is set. If set, it will collect detailed traces for
+                        the specified modules. This involves use of possibly costly and or
+                        blocking operations and hence might have a performance impact.
+  --compilation-config COMPILATION_CONFIG, -O COMPILATION_CONFIG
+                        torch.compile configuration for the model.When it is a number (0, 1, 2,
+                        3), it will be interpreted as the optimization level. NOTE: level 0 is the
+                        default level without any optimization. level 1 and 2 are for internal
+                        testing only. level 3 is the recommended level for production. To specify
+                        the full compilation config, use a JSON string. Following the convention
+                        of traditional compilers, using -O without space is also supported. -O3 is
+                        equivalent to -O 3.
+  --config CONFIG       Read CLI options from a config file.Must be a YAML with the following opti
+                        ons:https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html#c
+                        ommand-line-arguments-for-the-server
+  --config-format {auto,hf,mistral}
+                        The format of the model config to load. * "auto" will try to load the
+                        config in hf format if available else it will try to load in mistral
+                        format
+  --cpu-offload-gb CPU_OFFLOAD_GB
+                        The space in GiB to offload to CPU, per GPU. Default is 0, which means no
+                        offloading. Intuitively, this argument can be seen as a virtual way to
+                        increase the GPU memory size. For example, if you have one 24 GB GPU and
+                        set this to 10, virtually you can think of it as a 34 GB GPU. Then you can
+                        load a 13B model with BF16 weight, which requires at least 26GB GPU
+                        memory. Note that this requires fast CPU-GPU interconnect, as part of the
+                        model is loaded from CPU memory to GPU memory on the fly in each model
+                        forward pass.
+  --device {auto,cuda,neuron,cpu,openvino,tpu,xpu,hpu}
+                        Device type for vLLM execution.
+  --disable-async-output-proc
+                        Disable async output processing. This may result in lower performance.
+  --disable-custom-all-reduce
+                        See ParallelConfig.
+  --disable-fastapi-docs
+                        Disable FastAPI's OpenAPI schema, Swagger UI, and ReDoc endpoint
+  --disable-frontend-multiprocessing
+                        If specified, will run the OpenAI frontend server in the same process as
+                        the model serving engine.
+  --disable-log-requests
+                        Disable logging requests.
+  --disable-log-stats   Disable logging statistics.
+  --disable-logprobs-during-spec-decoding [DISABLE_LOGPROBS_DURING_SPEC_DECODING]
+                        If set to True, token log probabilities are not returned during
+                        speculative decoding. If set to False, log probabilities are returned
+                        according to the settings in SamplingParams. If not specified, it defaults
+                        to True. Disabling log probabilities during speculative decoding reduces
+                        latency by skipping logprob calculation in proposal sampling, target
+                        sampling, and after accepted tokens are determined.
+  --disable-sliding-window
+                        Disables sliding window, capping to sliding window size
+  --distributed-executor-backend {ray,mp}
+                        Backend to use for distributed model workers, either "ray" or "mp"
+                        (multiprocessing). If the product of pipeline_parallel_size and
+                        tensor_parallel_size is less than or equal to the number of GPUs
+                        available, "mp" will be used to keep processing on a single host.
+                        Otherwise, this will default to "ray" if Ray is installed and fail
+                        otherwise. Note that tpu and hpu only support Ray for distributed
+                        inference.
+  --download-dir DOWNLOAD_DIR
+                        Directory to download and load the weights, default to the default cache
+                        dir of huggingface.
+  --dtype {auto,half,float16,bfloat16,float,float32}
+                        Data type for model weights and activations. * "auto" will use FP16
+                        precision for FP32 and FP16 models, and BF16 precision for BF16 models. *
+                        "half" for FP16. Recommended for AWQ quantization. * "float16" is the same
+                        as "half". * "bfloat16" for a balance between precision and range. *
+                        "float" is shorthand for FP32 precision. * "float32" for FP32 precision.
+  --enable-auto-tool-choice
+                        Enable auto tool choice for supported models. Use --tool-call-parser to
+                        specify which parser to use
+  --enable-chunked-prefill [ENABLE_CHUNKED_PREFILL]
+                        If set, the prefill requests can be chunked based on the
+                        max_num_batched_tokens.
+  --enable-lora         If True, enable handling of LoRA adapters.
+  --enable-lora-bias    If True, enable bias for LoRA adapters.
+  --enable-prefix-caching, --no-enable-prefix-caching
+                        Enables automatic prefix caching. Use --no-enable-prefix-caching to
+                        disable explicitly.
+  --enable-prompt-adapter
+                        If True, enable handling of PromptAdapters.
+  --enable-prompt-tokens-details
+                        If set to True, enable prompt_tokens_details in usage.
+  --enforce-eager       Always use eager-mode PyTorch. If False, will use eager mode and CUDA
+                        graph in hybrid for maximal performance and flexibility.
+  --fully-sharded-loras
+                        By default, only half of the LoRA computation is sharded with tensor
+                        parallelism. Enabling this will use the fully sharded layers. At high
+                        sequence length, max rank or tensor parallel size, this is likely faster.
+  --gpu-memory-utilization GPU_MEMORY_UTILIZATION
+                        The fraction of GPU memory to be used for the model executor, which can
+                        range from 0 to 1. For example, a value of 0.5 would imply 50% GPU memory
+                        utilization. If unspecified, will use the default value of 0.9. This is a
+                        global gpu memory utilization limit, for example if 50% of the gpu memory
+                        is already used before vLLM starts and --gpu-memory-utilization is set to
+                        0.9, then only 40% of the gpu memory will be allocated to the model
+                        executor.
+  --guided-decoding-backend {outlines,lm-format-enforcer,xgrammar}
+                        Which engine will be used for guided decoding (JSON schema / regex etc) by
+                        default. Currently support https://github.com/outlines-
+                        dev/outlines,https://github.com/mlc-ai/xgrammar, and
+                        https://github.com/noamgat/lm-format-enforcer. Can be overridden per
+                        request via guided_decoding_backend parameter.
+  --hf-overrides HF_OVERRIDES
+                        Extra arguments for the HuggingFace config. This should be a JSON string
+                        that will be parsed into a dictionary.
+  --host HOST           host name
+  --ignore-patterns IGNORE_PATTERNS
+                        The pattern(s) to ignore when loading the model.Default to `original/**/*`
+                        to avoid repeated loading of llama's checkpoints.
+  --kv-cache-dtype {auto,fp8,fp8_e5m2,fp8_e4m3}
+                        Data type for kv cache storage. If "auto", will use model data type. CUDA
+                        11.8+ supports fp8 (=fp8_e4m3) and fp8_e5m2. ROCm (AMD GPU) supports fp8
+                        (=fp8_e4m3)
+  --kv-transfer-config KV_TRANSFER_CONFIG
+                        The configurations for distributed KV cache transfer. Should be a JSON
+                        string.
+  --limit-mm-per-prompt LIMIT_MM_PER_PROMPT
+                        For each multimodal plugin, limit how many input instances to allow for
+                        each prompt. Expects a comma-separated list of items, e.g.:
+                        `image=16,video=2` allows a maximum of 16 images and 2 videos per prompt.
+                        Defaults to 1 for each modality.
+  --load-format {auto,pt,safetensors,npcache,dummy,tensorizer,sharded_state,gguf,bitsandbytes,mistral}
+                        The format of the model weights to load. * "auto" will try to load the
+                        weights in the safetensors format and fall back to the pytorch bin format
+                        if safetensors format is not available. * "pt" will load the weights in
+                        the pytorch bin format. * "safetensors" will load the weights in the
+                        safetensors format. * "npcache" will load the weights in pytorch format
+                        and store a numpy cache to speed up the loading. * "dummy" will initialize
+                        the weights with random values, which is mainly for profiling. *
+                        "tensorizer" will load the weights using tensorizer from CoreWeave. See
+                        the Tensorize vLLM Model script in the Examples section for more
+                        information. * "bitsandbytes" will load the weights using bitsandbytes
+                        quantization.
+  --long-lora-scaling-factors LONG_LORA_SCALING_FACTORS
+                        Specify multiple scaling factors (which can be different from base model
+                        scaling factor - see eg. Long LoRA) to allow for multiple LoRA adapters
+                        trained with those scaling factors to be used at the same time. If not
+                        specified, only adapters trained with the base model scaling factor are
+                        allowed.
+  --lora-dtype {auto,float16,bfloat16}
+                        Data type for LoRA. If auto, will default to base model dtype.
+  --lora-extra-vocab-size LORA_EXTRA_VOCAB_SIZE
+                        Maximum size of extra vocabulary that can be present in a LoRA adapter
+                        (added to the base model vocabulary).
+  --lora-modules LORA_MODULES [LORA_MODULES ...]
+                        LoRA module configurations in either 'name=path' formator JSON format.
+                        Example (old format): 'name=path' Example (new format): '{"name": "name",
+                        "local_path": "path", "base_model_name": "id"}'
+  --max-cpu-loras MAX_CPU_LORAS
+                        Maximum number of LoRAs to store in CPU memory. Must be >= than max_loras.
+                        Defaults to max_loras.
+  --max-log-len MAX_LOG_LEN
+                        Max number of prompt characters or prompt ID numbers being printed in log.
+                        Default: Unlimited
+  --max-logprobs MAX_LOGPROBS
+                        Max number of log probs to return logprobs is specified in SamplingParams.
+  --max-lora-rank MAX_LORA_RANK
+                        Max LoRA rank.
+  --max-loras MAX_LORAS
+                        Max number of LoRAs in a single batch.
+  --max-model-len MAX_MODEL_LEN
+                        Model context length. If unspecified, will be automatically derived from
+                        the model config.
+  --max-num-batched-tokens MAX_NUM_BATCHED_TOKENS
+                        Maximum number of batched tokens per iteration.
+  --max-num-seqs MAX_NUM_SEQS
+                        Maximum number of sequences per iteration.
+  --max-parallel-loading-workers MAX_PARALLEL_LOADING_WORKERS
+                        Load model sequentially in multiple batches, to avoid RAM OOM when using
+                        tensor parallel and large models.
+  --max-prompt-adapter-token MAX_PROMPT_ADAPTER_TOKEN
+                        Max number of PromptAdapters tokens
+  --max-prompt-adapters MAX_PROMPT_ADAPTERS
+                        Max number of PromptAdapters in a batch.
+  --max-seq-len-to-capture MAX_SEQ_LEN_TO_CAPTURE
+                        Maximum sequence length covered by CUDA graphs. When a sequence has
+                        context length larger than this, we fall back to eager mode. Additionally
+                        for encoder-decoder models, if the sequence length of the encoder input is
+                        larger than this, we fall back to the eager mode.
+  --middleware MIDDLEWARE
+                        Additional ASGI middleware to apply to the app. We accept multiple
+                        --middleware arguments. The value should be an import path. If a function
+                        is provided, vLLM will add it to the server using @app.middleware('http').
+                        If a class is provided, vLLM will add it to the server using
+                        app.add_middleware().
+  --mm-processor-kwargs MM_PROCESSOR_KWARGS
+                        Overrides for the multimodal input mapping/processing, e.g., image
+                        processor. For example: {"num_crops": 4}.
+  --model MODEL         Name or path of the huggingface model to use.
+  --model-loader-extra-config MODEL_LOADER_EXTRA_CONFIG
+                        Extra config for model loader. This will be passed to the model loader
+                        corresponding to the chosen load_format. This should be a JSON string that
+                        will be parsed into a dictionary.
+  --multi-step-stream-outputs [MULTI_STEP_STREAM_OUTPUTS]
+                        If False, then multi-step will stream outputs at the end of all steps
+  --ngram-prompt-lookup-max NGRAM_PROMPT_LOOKUP_MAX
+                        Max size of window for ngram prompt lookup in speculative decoding.
+  --ngram-prompt-lookup-min NGRAM_PROMPT_LOOKUP_MIN
+                        Min size of window for ngram prompt lookup in speculative decoding.
+  --num-gpu-blocks-override NUM_GPU_BLOCKS_OVERRIDE
+                        If specified, ignore GPU profiling result and use this number of GPU
+                        blocks. Used for testing preemption.
+  --num-lookahead-slots NUM_LOOKAHEAD_SLOTS
+                        Experimental scheduling config necessary for speculative decoding. This
+                        will be replaced by speculative config in the future; it is present to
+                        enable correctness tests until then.
+  --num-scheduler-steps NUM_SCHEDULER_STEPS
+                        Maximum number of forward steps per scheduler call.
+  --num-speculative-tokens NUM_SPECULATIVE_TOKENS
+                        The number of speculative tokens to sample from the draft model in
+                        speculative decoding.
+  --otlp-traces-endpoint OTLP_TRACES_ENDPOINT
+                        Target URL to which OpenTelemetry traces will be sent.
+  --override-neuron-config OVERRIDE_NEURON_CONFIG
+                        Override or set neuron device configuration. e.g. {"cast_logits_dtype":
+                        "bloat16"}.'
+  --override-pooler-config OVERRIDE_POOLER_CONFIG
+                        Override or set the pooling method in the embedding model. e.g.
+                        {"pooling_type": "mean", "normalize": false}.'
+  --pipeline-parallel-size PIPELINE_PARALLEL_SIZE, -pp PIPELINE_PARALLEL_SIZE
+                        Number of pipeline stages.
+  --port PORT           port number
+  --preemption-mode PREEMPTION_MODE
+                        If 'recompute', the engine performs preemption by recomputing; If 'swap',
+                        the engine performs preemption by block swapping.
+  --prompt-adapters PROMPT_ADAPTERS [PROMPT_ADAPTERS ...]
+                        Prompt adapter configurations in the format name=path. Multiple adapters
+                        can be specified.
+  --qlora-adapter-name-or-path QLORA_ADAPTER_NAME_OR_PATH
+                        Name or path of the QLoRA adapter.
+  --quantization {aqlm,awq,deepspeedfp,tpu_int8,fp8,fbgemm_fp8,modelopt,marlin,gguf,gptq_marlin_24,gptq_marlin,awq_marlin,gptq,compressed-tensors,bitsandbytes,qqq,hqq,experts_int8,neuron_quant,ipex,None}, -q {aqlm,awq,deepspeedfp,tpu_int8,fp8,fbgemm_fp8,modelopt,marlin,gguf,gptq_marlin_24,gptq_marlin,awq_marlin,gptq,compressed-tensors,bitsandbytes,qqq,hqq,experts_int8,neuron_quant,ipex,None}
+                        Method used to quantize the weights. If None, we first check the
+                        `quantization_config` attribute in the model config file. If that is None,
+                        we assume the model weights are not quantized and use `dtype` to determine
+                        the data type of the weights.
+  --quantization-param-path QUANTIZATION_PARAM_PATH
+                        Path to the JSON file containing the KV cache scaling factors. This should
+                        generally be supplied, when KV cache dtype is FP8. Otherwise, KV cache
+                        scaling factors default to 1.0, which may cause accuracy issues. FP8_E5M2
+                        (without scaling) is only supported on cuda version greater than 11.8. On
+                        ROCm (AMD GPU), FP8_E4M3 is instead supported for common inference
+                        criteria.
+  --ray-workers-use-nsight
+                        If specified, use nsight to profile Ray workers.
+  --response-role RESPONSE_ROLE
+                        The role name to return if `request.add_generation_prompt=true`.
+  --return-tokens-as-token-ids
+                        When --max-logprobs is specified, represents single tokens as strings of
+                        the form 'token_id:{token_id}' so that tokens that are not JSON-encodable
+                        can be identified.
+  --revision REVISION   The specific model version to use. It can be a branch name, a tag name, or
+                        a commit id. If unspecified, will use the default version.
+  --root-path ROOT_PATH
+                        FastAPI root_path when app is behind a path based routing proxy
+  --rope-scaling ROPE_SCALING
+                        RoPE scaling configuration in JSON format. For example,
+                        {"rope_type":"dynamic","factor":2.0}
+  --rope-theta ROPE_THETA
+                        RoPE theta. Use with `rope_scaling`. In some cases, changing the RoPE
+                        theta improves the performance of the scaled model.
+  --scheduler-delay-factor SCHEDULER_DELAY_FACTOR
+                        Apply a delay (of delay factor multiplied by previous prompt latency)
+                        before scheduling next prompt.
+  --scheduling-policy {fcfs,priority}
+                        The scheduling policy to use. "fcfs" (first come first served, i.e.
+                        requests are handled in order of arrival; default) or "priority" (requests
+                        are handled based on given priority (lower value means earlier handling)
+                        and time of arrival deciding any ties).
+  --seed SEED           Random seed for operations.
+  --served-model-name SERVED_MODEL_NAME [SERVED_MODEL_NAME ...]
+                        The model name(s) used in the API. If multiple names are provided, the
+                        server will respond to any of the provided names. The model name in the
+                        model field of a response will be the first name in this list. If not
+                        specified, the model name will be the same as the `--model` argument.
+                        Noted that this name(s) will also be used in `model_name` tag content of
+                        prometheus metrics, if multiple names provided, metrics tag will take the
+                        first one.
+  --skip-tokenizer-init
+                        Skip initialization of tokenizer and detokenizer
+  --spec-decoding-acceptance-method {rejection_sampler,typical_acceptance_sampler}
+                        Specify the acceptance method to use during draft token verification in
+                        speculative decoding. Two types of acceptance routines are supported: 1)
+                        RejectionSampler which does not allow changing the acceptance rate of
+                        draft tokens, 2) TypicalAcceptanceSampler which is configurable, allowing
+                        for a higher acceptance rate at the cost of lower quality, and vice versa.
+  --speculative-disable-by-batch-size SPECULATIVE_DISABLE_BY_BATCH_SIZE
+                        Disable speculative decoding for new incoming requests if the number of
+                        enqueue requests is larger than this value.
+  --speculative-disable-mqa-scorer
+                        If set to True, the MQA scorer will be disabled in speculative and fall
+                        back to batch expansion
+  --speculative-draft-tensor-parallel-size SPECULATIVE_DRAFT_TENSOR_PARALLEL_SIZE, -spec-draft-tp SPECULATIVE_DRAFT_TENSOR_PARALLEL_SIZE
+                        Number of tensor parallel replicas for the draft model in speculative
+                        decoding.
+  --speculative-max-model-len SPECULATIVE_MAX_MODEL_LEN
+                        The maximum sequence length supported by the draft model. Sequences over
+                        this length will skip speculation.
+  --speculative-model SPECULATIVE_MODEL
+                        The name of the draft model to be used in speculative decoding.
+  --speculative-model-quantization {aqlm,awq,deepspeedfp,tpu_int8,fp8,fbgemm_fp8,modelopt,marlin,gguf,gptq_marlin_24,gptq_marlin,awq_marlin,gptq,compressed-tensors,bitsandbytes,qqq,hqq,experts_int8,neuron_quant,ipex,None}
+                        Method used to quantize the weights of speculative model. If None, we
+                        first check the `quantization_config` attribute in the model config file.
+                        If that is None, we assume the model weights are not quantized and use
+                        `dtype` to determine the data type of the weights.
+  --ssl-ca-certs SSL_CA_CERTS
+                        The CA certificates file
+  --ssl-cert-reqs SSL_CERT_REQS
+                        Whether client certificate is required (see stdlib ssl module's)
+  --ssl-certfile SSL_CERTFILE
+                        The file path to the SSL cert file
+  --ssl-keyfile SSL_KEYFILE
+                        The file path to the SSL key file
+  --swap-space SWAP_SPACE
+                        CPU swap space size (GiB) per GPU.
+  --task {auto,generate,embedding}
+                        The task to use the model for. Each vLLM instance only supports one task,
+                        even if the same model can be used for multiple tasks. When the model only
+                        supports one task, "auto" can be used to select it; otherwise, you must
+                        specify explicitly which task to use.
+  --tensor-parallel-size TENSOR_PARALLEL_SIZE, -tp TENSOR_PARALLEL_SIZE
+                        Number of tensor parallel replicas.
+  --tokenizer TOKENIZER
+                        Name or path of the huggingface tokenizer to use. If unspecified, model
+                        name or path will be used.
+  --tokenizer-mode {auto,slow,mistral}
+                        The tokenizer mode. * "auto" will use the fast tokenizer if available. *
+                        "slow" will always use the slow tokenizer. * "mistral" will always use the
+                        `mistral_common` tokenizer.
+  --tokenizer-pool-extra-config TOKENIZER_POOL_EXTRA_CONFIG
+                        Extra config for tokenizer pool. This should be a JSON string that will be
+                        parsed into a dictionary. Ignored if tokenizer_pool_size is 0.
+  --tokenizer-pool-size TOKENIZER_POOL_SIZE
+                        Size of tokenizer pool to use for asynchronous tokenization. If 0, will
+                        use synchronous tokenization.
+  --tokenizer-pool-type TOKENIZER_POOL_TYPE
+                        Type of tokenizer pool to use for asynchronous tokenization. Ignored if
+                        tokenizer_pool_size is 0.
+  --tokenizer-revision TOKENIZER_REVISION
+                        Revision of the huggingface tokenizer to use. It can be a branch name, a
+                        tag name, or a commit id. If unspecified, will use the default version.
+  --tool-call-parser {granite-20b-fc,granite,hermes,internlm,jamba,llama3_json,mistral,pythonic} or name registered in --tool-parser-plugin
+                        Select the tool call parser depending on the model that you're using. This
+                        is used to parse the model-generated tool call into OpenAI API format.
+                        Required for --enable-auto-tool-choice.
+  --tool-parser-plugin TOOL_PARSER_PLUGIN
+                        Special the tool parser plugin write to parse the model-generated tool
+                        into OpenAI API format, the name register in this plugin can be used in
+                        --tool-call-parser.
+  --trust-remote-code   Trust remote code from huggingface.
+  --typical-acceptance-sampler-posterior-alpha TYPICAL_ACCEPTANCE_SAMPLER_POSTERIOR_ALPHA
+                        A scaling factor for the entropy-based threshold for token acceptance in
+                        the TypicalAcceptanceSampler. Typically defaults to sqrt of --typical-
+                        acceptance-sampler-posterior-threshold i.e. 0.3
+  --typical-acceptance-sampler-posterior-threshold TYPICAL_ACCEPTANCE_SAMPLER_POSTERIOR_THRESHOLD
+                        Set the lower bound threshold for the posterior probability of a token to
+                        be accepted. This threshold is used by the TypicalAcceptanceSampler to
+                        make sampling decisions during speculative decoding. Defaults to 0.09
+  --use-v2-block-manager
+                        [DEPRECATED] block manager v1 has been removed and
+                        SelfAttnBlockSpaceManager (i.e. block manager v2) is now the default.
+                        Setting this flag to True or False has no effect on vLLM behavior.
+  --uvicorn-log-level {debug,info,warning,error,critical,trace}
+                        log level for uvicorn
+  --worker-cls WORKER_CLS
+                        The worker class to use for distributed execution.
+  --worker-use-ray      Deprecated, use --distributed-executor-backend=ray.
+  -h, --help            show this help message and exit
+```

a_mllm_notebooks/vllm/download_md.ipynb ADDED Viewed

	@@ -0,0 +1,213 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Requirement already satisfied: wget in /dscilab_dungvo/workspace/bin/envs/lmdeploy/lib/python3.8/site-packages (3.2)\n",
+      "Collecting jupytext\n",
+      "  Using cached jupytext-1.16.6-py3-none-any.whl.metadata (13 kB)\n",
+      "Requirement already satisfied: markdown-it-py>=1.0 in /dscilab_dungvo/workspace/bin/envs/lmdeploy/lib/python3.8/site-packages (from jupytext) (3.0.0)\n",
+      "Collecting mdit-py-plugins (from jupytext)\n",
+      "  Downloading mdit_py_plugins-0.4.2-py3-none-any.whl.metadata (2.8 kB)\n",
+      "Collecting nbformat (from jupytext)\n",
+      "  Using cached nbformat-5.10.4-py3-none-any.whl.metadata (3.6 kB)\n",
+      "Requirement already satisfied: packaging in /dscilab_dungvo/workspace/bin/envs/lmdeploy/lib/python3.8/site-packages (from jupytext) (24.1)\n",
+      "Requirement already satisfied: pyyaml in /dscilab_dungvo/workspace/bin/envs/lmdeploy/lib/python3.8/site-packages (from jupytext) (6.0.2)\n",
+      "Requirement already satisfied: tomli in /dscilab_dungvo/workspace/bin/envs/lmdeploy/lib/python3.8/site-packages (from jupytext) (2.0.1)\n",
+      "Requirement already satisfied: mdurl~=0.1 in /dscilab_dungvo/workspace/bin/envs/lmdeploy/lib/python3.8/site-packages (from markdown-it-py>=1.0->jupytext) (0.1.2)\n",
+      "Collecting fastjsonschema>=2.15 (from nbformat->jupytext)\n",
+      "  Using cached fastjsonschema-2.21.1-py3-none-any.whl.metadata (2.2 kB)\n",
+      "Requirement already satisfied: jsonschema>=2.6 in /dscilab_dungvo/workspace/bin/envs/lmdeploy/lib/python3.8/site-packages (from nbformat->jupytext) (4.23.0)\n",
+      "Requirement already satisfied: jupyter-core!=5.0.*,>=4.12 in /dscilab_dungvo/workspace/bin/envs/lmdeploy/lib/python3.8/site-packages (from nbformat->jupytext) (5.7.2)\n",
+      "Requirement already satisfied: traitlets>=5.1 in /dscilab_dungvo/workspace/bin/envs/lmdeploy/lib/python3.8/site-packages (from nbformat->jupytext) (5.14.3)\n",
+      "Requirement already satisfied: attrs>=22.2.0 in /dscilab_dungvo/workspace/bin/envs/lmdeploy/lib/python3.8/site-packages (from jsonschema>=2.6->nbformat->jupytext) (24.2.0)\n",
+      "Requirement already satisfied: importlib-resources>=1.4.0 in /dscilab_dungvo/workspace/bin/envs/lmdeploy/lib/python3.8/site-packages (from jsonschema>=2.6->nbformat->jupytext) (6.4.5)\n",
+      "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /dscilab_dungvo/workspace/bin/envs/lmdeploy/lib/python3.8/site-packages (from jsonschema>=2.6->nbformat->jupytext) (2023.12.1)\n",
+      "Requirement already satisfied: pkgutil-resolve-name>=1.3.10 in /dscilab_dungvo/workspace/bin/envs/lmdeploy/lib/python3.8/site-packages (from jsonschema>=2.6->nbformat->jupytext) (1.3.10)\n",
+      "Requirement already satisfied: referencing>=0.28.4 in /dscilab_dungvo/workspace/bin/envs/lmdeploy/lib/python3.8/site-packages (from jsonschema>=2.6->nbformat->jupytext) (0.35.1)\n",
+      "Requirement already satisfied: rpds-py>=0.7.1 in /dscilab_dungvo/workspace/bin/envs/lmdeploy/lib/python3.8/site-packages (from jsonschema>=2.6->nbformat->jupytext) (0.20.0)\n",
+      "Requirement already satisfied: platformdirs>=2.5 in /dscilab_dungvo/workspace/bin/envs/lmdeploy/lib/python3.8/site-packages (from jupyter-core!=5.0.*,>=4.12->nbformat->jupytext) (4.3.6)\n",
+      "Requirement already satisfied: zipp>=3.1.0 in /dscilab_dungvo/workspace/bin/envs/lmdeploy/lib/python3.8/site-packages (from importlib-resources>=1.4.0->jsonschema>=2.6->nbformat->jupytext) (3.20.2)\n",
+      "Using cached jupytext-1.16.6-py3-none-any.whl (154 kB)\n",
+      "Downloading mdit_py_plugins-0.4.2-py3-none-any.whl (55 kB)\n",
+      "Using cached nbformat-5.10.4-py3-none-any.whl (78 kB)\n",
+      "Using cached fastjsonschema-2.21.1-py3-none-any.whl (23 kB)\n",
+      "Installing collected packages: fastjsonschema, mdit-py-plugins, nbformat, jupytext\n",
+      "Successfully installed fastjsonschema-2.21.1 jupytext-1.16.6 mdit-py-plugins-0.4.2 nbformat-5.10.4\n",
+      "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager, possibly rendering your system unusable.It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv. Use the --root-user-action option if you know what you are doing and want to suppress this warning.\u001b[0m\u001b[33m\n",
+      "\u001b[0m"
+     ]
+    }
+   ],
+   "source": [
+    "# https://github.com/InternLM/lmdeploy/blob/main/docs/en/llm/pipeline.md\n",
+    "\n",
+    "# download then convert to jupyter notebook\n",
+    "!pip install wget jupytext\n",
+    "import os\n",
+    "import sys\n",
+    "import json\n",
+    "import requests\n",
+    "# import jupyter_text\n",
+    "\n",
+    "\n",
+    "def download_markdown_and_save(url, filename):\n",
+    "    # remove existing file\n",
+    "    if os.path.exists(filename):\n",
+    "        os.remove(filename)\n",
+    "    \n",
+    "    import wget \n",
+    "    # preprocess url to downloadable url\n",
+    "    url = url.replace(\"github.com\", \"raw.githubusercontent.com\")\n",
+    "    url = url.replace(\"blob/\", \"\")\n",
+    "    print(f\"Downloading {url}\")\n",
+    "    wget.download(url, filename)\n",
+    "    print(f\"Downloaded {filename}\")\n",
+    "    \n",
+    "    \n",
+    "    \n",
+    "# !jupytext --to notebook your_markdown_file.md\n",
+    "\n",
+    "def convert_markdown_to_jupyter_notebook(filename):\n",
+    "    os.system(f\"jupytext --to notebook {filename}\")\n",
+    "    print(f\"Converted {filename} to jupyter notebook.\")\n",
+    "    \n",
+    "    \n",
+    "def markdown2jupyter(url, filename):\n",
+    "    download_markdown_and_save(url, filename)\n",
+    "    convert_markdown_to_jupyter_notebook(filename)\n",
+    "\n",
+    "\n",
+    "# def main():\n",
+    "#     url = \"https://raw.githubusercontent.com/InternLM/lmdeploy/main/docs/en/llm/pipeline.md\"\n",
+    "#     filename = \"pipeline.md\"\n",
+    "#     download_markdown_and_save(url, filename)\n",
+    "#     convert_markdown_to_jupyter_notebook(filename)\n",
+    "    \n",
+    "        \n",
+    "# if __name__ == \"__main__\":\n",
+    "#     main()\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading https://raw.githubusercontent.com/vllm-project/vllm/main/docs/source/getting_started/quickstart.md\n",
+      "Downloaded quickstart.md\n",
+      "[jupytext] Reading quickstart.md in format md\n",
+      "[jupytext] Writing quickstart.ipynb\n",
+      "Converted quickstart.md to jupyter notebook.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# markdown2jupyter(\n",
+    "#     'https://github.com/vllm-project/vllm/blob/main/docs/source/getting_started/quickstart.md',\n",
+    "#     'quickstart.md'\n",
+    "# )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Overwriting links.txt\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%writefile links.txt\n",
+    "'https://github.com/vllm-project/vllm/blob/main/docs/source/serving/distributed_serving.md'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading https://raw.githubusercontent.com/vllm-project/vllm/main/docs/source/serving/distributed_serving.md\n",
+      "Downloaded distributed_serving.md\n",
+      "[jupytext] Reading distributed_serving.md in format md\n",
+      "[jupytext] Writing distributed_serving.ipynb\n",
+      "Converted distributed_serving.md to jupyter notebook.\n"
+     ]
+    },
+    {
+     "ename": "",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n",
+      "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n",
+      "\u001b[1;31mClick <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. \n",
+      "\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
+     ]
+    }
+   ],
+   "source": [
+    "list_url = []\n",
+    "with open('links.txt') as f:\n",
+    "    list_url = f.readlines()\n",
+    "for i in range(len(list_url)):\n",
+    "    list_url[i] = eval(list_url[i])\n",
+    "\n",
+    "for i in range(len(list_url)):\n",
+    "    url = list_url[i]\n",
+    "    name = url.split('/')[-1]\n",
+    "    markdown2jupyter(url, name)\n",
+    "    \n",
+    "# delete all file{i}.md"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "lmdeploy",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.19"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

a_mllm_notebooks/vllm/florence_2.ipynb ADDED Viewed

	@@ -0,0 +1,355 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "054990e3-e0cb-4e36-8783-8af0ed9ebc9a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from PIL import Image\n",
+    "temp_image = PIL.Image.open('cat.jpg')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "e1d28b8b-5c71-4681-9b4b-a9ed6834867d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "INFO 12-31 05:01:23 config.py:2272] Downcasting torch.float32 to torch.bfloat16.\n",
+      "INFO 12-31 05:01:31 config.py:510] This model supports multiple tasks: {'reward', 'generate', 'classify', 'embed', 'score'}. Defaulting to 'generate'.\n",
+      "INFO 12-31 05:01:31 llm_engine.py:234] Initializing an LLM engine (v0.6.6.post1) with config: model='microsoft/Florence-2-base', speculative_config=None, tokenizer='facebook/bart-base', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.bfloat16, max_seq_len=1024, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='xgrammar'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=0, served_model_name=microsoft/Florence-2-base, num_scheduler_steps=1, multi_step_stream_outputs=True, enable_prefix_caching=False, chunked_prefill_enabled=False, use_async_output_proc=True, disable_mm_preprocessor_cache=False, mm_processor_kwargs=None, pooler_config=None, compilation_config={\"splitting_ops\":[\"vllm.unified_attention\",\"vllm.unified_attention_with_output\"],\"candidate_compile_sizes\":[],\"compile_sizes\":[],\"capture_sizes\":[256,248,240,232,224,216,208,200,192,184,176,168,160,152,144,136,128,120,112,104,96,88,80,72,64,56,48,40,32,24,16,8,4,2,1],\"max_capture_size\":256}, use_cached_outputs=False, \n",
+      "INFO 12-31 05:01:35 selector.py:120] Using Flash Attention backend.\n",
+      "INFO 12-31 05:01:37 model_runner.py:1094] Starting to load model microsoft/Florence-2-base...\n",
+      "INFO 12-31 05:01:37 weight_utils.py:251] Using model weights format ['*.bin']\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "3359625d0d944cb29803552332d7b5fa",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Loading pt checkpoint shards:   0% Completed | 0/1 [00:00<?, ?it/s]\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/vllm/model_executor/model_loader/weight_utils.py:450: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
+      "  state = torch.load(bin_file, map_location=\"cpu\")\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "INFO 12-31 05:01:39 model_runner.py:1099] Loading model weights took 0.3440 GB\n",
+      "INFO 12-31 05:01:41 worker.py:241] Memory profiling takes 1.83 seconds\n",
+      "INFO 12-31 05:01:41 worker.py:241] the current vLLM instance can use total_gpu_memory (39.39GiB) x gpu_memory_utilization (0.90) = 35.45GiB\n",
+      "INFO 12-31 05:01:41 worker.py:241] model weights take 0.34GiB; non_torch_memory takes 0.11GiB; PyTorch activation peak memory takes 0.48GiB; the rest of the memory reserved for KV Cache is 34.53GiB.\n",
+      "INFO 12-31 05:01:41 gpu_executor.py:76] # GPU blocks: 125715, # CPU blocks: 14563\n",
+      "INFO 12-31 05:01:41 gpu_executor.py:80] Maximum concurrency for 1024 tokens per request: 1964.30x\n",
+      "INFO 12-31 05:01:46 model_runner.py:1415] Capturing cudagraphs for decoding. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in the CLI. If out-of-memory error occurs during cudagraph capture, consider decreasing `gpu_memory_utilization` or switching to eager mode. You can also reduce the `max_num_seqs` as needed to decrease memory usage.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Capturing CUDA graph shapes: 100%|██████████| 35/35 [00:24<00:00,  1.45it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "INFO 12-31 05:02:10 model_runner.py:1535] Graph capturing finished in 24 secs, took 0.31 GiB\n",
+      "INFO 12-31 05:02:10 llm_engine.py:431] init engine (profile, create kv cache, warmup model) took 30.87 seconds\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "ename": "TypeError",
+     "evalue": "inputs must be a string, TextPrompt, or TokensPrompt",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[6], line 35\u001b[0m\n\u001b[1;32m     25\u001b[0m sampling_params \u001b[38;5;241m=\u001b[39m SamplingParams(\n\u001b[1;32m     26\u001b[0m     temperature\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m,\n\u001b[1;32m     27\u001b[0m     top_p\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1.0\u001b[39m,\n\u001b[1;32m     28\u001b[0m     min_tokens\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m,\n\u001b[1;32m     29\u001b[0m     max_tokens\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m20\u001b[39m,\n\u001b[1;32m     30\u001b[0m )\n\u001b[1;32m     32\u001b[0m \u001b[38;5;66;03m# Generate output tokens from the prompts. The output is a list of\u001b[39;00m\n\u001b[1;32m     33\u001b[0m \u001b[38;5;66;03m# RequestOutput objects that contain the prompt, generated\u001b[39;00m\n\u001b[1;32m     34\u001b[0m \u001b[38;5;66;03m# text, and other information.\u001b[39;00m\n\u001b[0;32m---> 35\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[43mllm\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m     36\u001b[0m \u001b[43m    \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m     37\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mprompts\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mprompts\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     38\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmulti_modal_data\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m  \u001b[49m\u001b[43m{\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mimage\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[43mtemp_image\u001b[49m\u001b[43m]\u001b[49m\u001b[43m}\u001b[49m\n\u001b[1;32m     39\u001b[0m \u001b[43m    \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[1;32m     40\u001b[0m \u001b[43m    \u001b[49m\u001b[43msampling_params\u001b[49m\n\u001b[1;32m     41\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m     43\u001b[0m \u001b[38;5;66;03m# Print the outputs.\u001b[39;00m\n\u001b[1;32m     44\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m output \u001b[38;5;129;01min\u001b[39;00m outputs:\n",
+      "File \u001b[0;32m/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/vllm/utils.py:1021\u001b[0m, in \u001b[0;36mdeprecate_kwargs.<locals>.wrapper.<locals>.inner\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m   1014\u001b[0m             msg \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m \u001b[39m\u001b[38;5;132;01m{\u001b[39;00madditional_message\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1016\u001b[0m         warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[1;32m   1017\u001b[0m             \u001b[38;5;167;01mDeprecationWarning\u001b[39;00m(msg),\n\u001b[1;32m   1018\u001b[0m             stacklevel\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m3\u001b[39m,  \u001b[38;5;66;03m# The inner function takes up one level\u001b[39;00m\n\u001b[1;32m   1019\u001b[0m         )\n\u001b[0;32m-> 1021\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/vllm/entrypoints/llm.py:454\u001b[0m, in \u001b[0;36mLLM.generate\u001b[0;34m(self, prompts, sampling_params, prompt_token_ids, use_tqdm, lora_request, prompt_adapter_request, guided_options_request, priority)\u001b[0m\n\u001b[1;32m    450\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m sampling_params \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    451\u001b[0m     \u001b[38;5;66;03m# Use default sampling params.\u001b[39;00m\n\u001b[1;32m    452\u001b[0m     sampling_params \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mget_default_sampling_params()\n\u001b[0;32m--> 454\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_validate_and_add_requests\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    455\u001b[0m \u001b[43m    \u001b[49m\u001b[43mprompts\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparsed_prompts\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    456\u001b[0m \u001b[43m    \u001b[49m\u001b[43mparams\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msampling_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    457\u001b[0m \u001b[43m    \u001b[49m\u001b[43mlora_request\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlora_request\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    458\u001b[0m \u001b[43m    \u001b[49m\u001b[43mprompt_adapter_request\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mprompt_adapter_request\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    459\u001b[0m \u001b[43m    \u001b[49m\u001b[43mguided_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mguided_options_request\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    460\u001b[0m \u001b[43m    \u001b[49m\u001b[43mpriority\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpriority\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    462\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_run_engine(use_tqdm\u001b[38;5;241m=\u001b[39muse_tqdm)\n\u001b[1;32m    463\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mengine_class\u001b[38;5;241m.\u001b[39mvalidate_outputs(outputs, RequestOutput)\n",
+      "File \u001b[0;32m/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/vllm/entrypoints/llm.py:1175\u001b[0m, in \u001b[0;36mLLM._validate_and_add_requests\u001b[0;34m(self, prompts, params, lora_request, prompt_adapter_request, guided_options, priority)\u001b[0m\n\u001b[1;32m   1173\u001b[0m \u001b[38;5;66;03m# Add requests to the engine.\u001b[39;00m\n\u001b[1;32m   1174\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i, prompt \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(prompts):\n\u001b[0;32m-> 1175\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_add_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1176\u001b[0m \u001b[43m        \u001b[49m\u001b[43mprompt\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1177\u001b[0m \u001b[43m        \u001b[49m\u001b[43mparams\u001b[49m\u001b[43m[\u001b[49m\u001b[43mi\u001b[49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43misinstance\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mSequence\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1178\u001b[0m \u001b[43m        \u001b[49m\u001b[43mlora_request\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlora_request\u001b[49m\u001b[43m[\u001b[49m\u001b[43mi\u001b[49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43misinstance\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1179\u001b[0m \u001b[43m            \u001b[49m\u001b[43mlora_request\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mSequence\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mlora_request\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1180\u001b[0m \u001b[43m        \u001b[49m\u001b[43mprompt_adapter_request\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mprompt_adapter_request\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1181\u001b[0m \u001b[43m        \u001b[49m\u001b[43mpriority\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpriority\u001b[49m\u001b[43m[\u001b[49m\u001b[43mi\u001b[49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mpriority\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1182\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/vllm/entrypoints/llm.py:1193\u001b[0m, in \u001b[0;36mLLM._add_request\u001b[0;34m(self, prompt, params, lora_request, prompt_adapter_request, priority)\u001b[0m\n\u001b[1;32m   1184\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_add_request\u001b[39m(\n\u001b[1;32m   1185\u001b[0m     \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m   1186\u001b[0m     prompt: PromptType,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1190\u001b[0m     priority: \u001b[38;5;28mint\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m,\n\u001b[1;32m   1191\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m   1192\u001b[0m     request_id \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mstr\u001b[39m(\u001b[38;5;28mnext\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrequest_counter))\n\u001b[0;32m-> 1193\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mllm_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43madd_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1194\u001b[0m \u001b[43m        \u001b[49m\u001b[43mrequest_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1195\u001b[0m \u001b[43m        \u001b[49m\u001b[43mprompt\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1196\u001b[0m \u001b[43m        \u001b[49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1197\u001b[0m \u001b[43m        \u001b[49m\u001b[43mlora_request\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlora_request\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1198\u001b[0m \u001b[43m        \u001b[49m\u001b[43mprompt_adapter_request\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mprompt_adapter_request\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1199\u001b[0m \u001b[43m        \u001b[49m\u001b[43mpriority\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpriority\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1200\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/vllm/utils.py:1021\u001b[0m, in \u001b[0;36mdeprecate_kwargs.<locals>.wrapper.<locals>.inner\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m   1014\u001b[0m             msg \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m \u001b[39m\u001b[38;5;132;01m{\u001b[39;00madditional_message\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1016\u001b[0m         warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[1;32m   1017\u001b[0m             \u001b[38;5;167;01mDeprecationWarning\u001b[39;00m(msg),\n\u001b[1;32m   1018\u001b[0m             stacklevel\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m3\u001b[39m,  \u001b[38;5;66;03m# The inner function takes up one level\u001b[39;00m\n\u001b[1;32m   1019\u001b[0m         )\n\u001b[0;32m-> 1021\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/vllm/engine/llm_engine.py:782\u001b[0m, in \u001b[0;36mLLMEngine.add_request\u001b[0;34m(self, request_id, prompt, params, arrival_time, lora_request, trace_headers, prompt_adapter_request, priority, inputs)\u001b[0m\n\u001b[1;32m    777\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtokenizer \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    778\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_validate_token_prompt(\n\u001b[1;32m    779\u001b[0m         prompt,\n\u001b[1;32m    780\u001b[0m         tokenizer\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mget_tokenizer(lora_request\u001b[38;5;241m=\u001b[39mlora_request))\n\u001b[0;32m--> 782\u001b[0m preprocessed_inputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minput_preprocessor\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpreprocess\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    783\u001b[0m \u001b[43m    \u001b[49m\u001b[43mprompt\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    784\u001b[0m \u001b[43m    \u001b[49m\u001b[43mrequest_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    785\u001b[0m \u001b[43m    \u001b[49m\u001b[43mlora_request\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlora_request\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    786\u001b[0m \u001b[43m    \u001b[49m\u001b[43mprompt_adapter_request\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mprompt_adapter_request\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    787\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    788\u001b[0m processed_inputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39minput_processor(preprocessed_inputs)\n\u001b[1;32m    790\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_add_processed_request(\n\u001b[1;32m    791\u001b[0m     request_id\u001b[38;5;241m=\u001b[39mrequest_id,\n\u001b[1;32m    792\u001b[0m     processed_inputs\u001b[38;5;241m=\u001b[39mprocessed_inputs,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    798\u001b[0m     priority\u001b[38;5;241m=\u001b[39mpriority,\n\u001b[1;32m    799\u001b[0m )\n",
+      "File \u001b[0;32m/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/vllm/inputs/preprocess.py:640\u001b[0m, in \u001b[0;36mInputPreprocessor.preprocess\u001b[0;34m(self, prompt, request_id, lora_request, prompt_adapter_request)\u001b[0m\n\u001b[1;32m    636\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Preprocess the input prompt.\"\"\"\u001b[39;00m\n\u001b[1;32m    637\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_config\u001b[38;5;241m.\u001b[39mis_encoder_decoder:\n\u001b[1;32m    638\u001b[0m     \u001b[38;5;66;03m# Encoder-decoder model requires special mapping of\u001b[39;00m\n\u001b[1;32m    639\u001b[0m     \u001b[38;5;66;03m# input prompts to encoder & decoder\u001b[39;00m\n\u001b[0;32m--> 640\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_process_encoder_decoder_prompt\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    641\u001b[0m \u001b[43m        \u001b[49m\u001b[43mprompt\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    642\u001b[0m \u001b[43m        \u001b[49m\u001b[43mrequest_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    643\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    645\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_explicit_encoder_decoder_prompt(prompt):\n\u001b[1;32m    646\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot pass encoder-decoder prompt \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    647\u001b[0m                      \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mto decoder-only models\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
+      "File \u001b[0;32m/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/vllm/inputs/preprocess.py:515\u001b[0m, in \u001b[0;36mInputPreprocessor._process_encoder_decoder_prompt\u001b[0;34m(self, prompt, request_id)\u001b[0m\n\u001b[1;32m    510\u001b[0m         decoder_inputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_prompt_to_llm_inputs(\n\u001b[1;32m    511\u001b[0m             decoder_input,\n\u001b[1;32m    512\u001b[0m             request_id\u001b[38;5;241m=\u001b[39mrequest_id,\n\u001b[1;32m    513\u001b[0m         )\n\u001b[1;32m    514\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 515\u001b[0m     encoder_inputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_prompt_to_llm_inputs\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    516\u001b[0m \u001b[43m        \u001b[49m\u001b[43mprompt\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    517\u001b[0m \u001b[43m        \u001b[49m\u001b[43mrequest_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    518\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    520\u001b[0m     decoder_inputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m    522\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_build_enc_dec_llm_inputs(encoder_inputs, decoder_inputs)\n",
+      "File \u001b[0;32m/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/vllm/inputs/preprocess.py:289\u001b[0m, in \u001b[0;36mInputPreprocessor._prompt_to_llm_inputs\u001b[0;34m(self, prompt, request_id, lora_request)\u001b[0m\n\u001b[1;32m    270\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_prompt_to_llm_inputs\u001b[39m(\n\u001b[1;32m    271\u001b[0m     \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m    272\u001b[0m     prompt: SingletonPrompt,\n\u001b[1;32m    273\u001b[0m     request_id: \u001b[38;5;28mstr\u001b[39m,\n\u001b[1;32m    274\u001b[0m     lora_request: Optional[LoRARequest] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m    275\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m SingletonInputs:\n\u001b[1;32m    276\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m    277\u001b[0m \u001b[38;5;124;03m    Extract the singleton inputs from a prompt.\u001b[39;00m\n\u001b[1;32m    278\u001b[0m \n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    287\u001b[0m \u001b[38;5;124;03m    * :class:`SingletonInputs` instance\u001b[39;00m\n\u001b[1;32m    288\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[0;32m--> 289\u001b[0m     parsed \u001b[38;5;241m=\u001b[39m \u001b[43mparse_singleton_prompt\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprompt\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    291\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m parsed[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtype\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstr\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m    292\u001b[0m         prompt_text \u001b[38;5;241m=\u001b[39m parsed[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcontent\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n",
+      "File \u001b[0;32m/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/vllm/inputs/parse.py:98\u001b[0m, in \u001b[0;36mparse_singleton_prompt\u001b[0;34m(prompt)\u001b[0m\n\u001b[1;32m     95\u001b[0m     \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mprompt\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m prompt:\n\u001b[1;32m     96\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m ParsedTextPrompt(\u001b[38;5;28mtype\u001b[39m\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext\u001b[39m\u001b[38;5;124m\"\u001b[39m, content\u001b[38;5;241m=\u001b[39mprompt)\n\u001b[0;32m---> 98\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minputs must be a string, TextPrompt, or TokensPrompt\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
+      "\u001b[0;31mTypeError\u001b[0m: inputs must be a string, TextPrompt, or TokensPrompt"
+     ]
+    }
+   ],
+   "source": [
+    "'''\n",
+    "Demonstrate prompting of text-to-text\n",
+    "encoder/decoder models, specifically Florence-2\n",
+    "'''\n",
+    "# TODO(Isotr0py):\n",
+    "# Move to offline_inference_vision_language.py after porting vision backbone\n",
+    "from vllm import LLM, SamplingParams\n",
+    "\n",
+    "dtype = \"bfloat16\"\n",
+    "\n",
+    "# Create a Florence-2 encoder/decoder model instance\n",
+    "llm = LLM(\n",
+    "    model=\"microsoft/Florence-2-base\",\n",
+    "    tokenizer=\"facebook/bart-base\",\n",
+    "    dtype=dtype,\n",
+    "    trust_remote_code=True,\n",
+    ")\n",
+    "\n",
+    "prompts = [\n",
+    "    \"<CAPTION>\", \"<DETAILED_CAPTION>\", \"<MORE_DETAILED_CAPTION>\",\n",
+    "    \"<CAPTION_TO_PHRASE_GROUNDING>\", \"<OD>\", \"<DENSE_REGION_CAPTION>\",\n",
+    "    \"<REGION_PROPOSAL>\", \"<OCR>\", \"<OCR_WITH_REGION>\"\n",
+    "]\n",
+    "# Create a sampling params object.\n",
+    "sampling_params = SamplingParams(\n",
+    "    temperature=0,\n",
+    "    top_p=1.0,\n",
+    "    min_tokens=0,\n",
+    "    max_tokens=20,\n",
+    ")\n",
+    "\n",
+    "# Generate output tokens from the prompts. The output is a list of\n",
+    "# RequestOutput objects that contain the prompt, generated\n",
+    "# text, and other information.\n",
+    "outputs = llm.generate(\n",
+    "    {\n",
+    "        \"prompts\": prompts,\n",
+    "        \"multi_modal_data\":  {\"image\": [temp_image]}\n",
+    "    }, \n",
+    "    sampling_params\n",
+    ")\n",
+    "\n",
+    "# Print the outputs.\n",
+    "for output in outputs:\n",
+    "    prompt = output.prompt\n",
+    "    encoder_prompt = output.encoder_prompt\n",
+    "    generated_text = output.outputs[0].text\n",
+    "    print(f\"Encoder prompt: {encoder_prompt!r}, \"\n",
+    "          f\"Decoder prompt: {prompt!r}, \"\n",
+    "          f\"Generated text: {generated_text!r}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "efdc95be-45af-4099-9110-040622c2689a",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "\u001b[0;31mSignature:\u001b[0m\n",
+       "\u001b[0mllm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgenerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m    \u001b[0mprompts\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvllm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTextPrompt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvllm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTokensPrompt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvllm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mExplicitEncoderDecoderPrompt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mSequence\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvllm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTextPrompt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvllm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTokensPrompt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvllm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mExplicitEncoderDecoderPrompt\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mNoneType\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m    \u001b[0msampling_params\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mvllm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msampling_params\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSamplingParams\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mSequence\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mvllm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msampling_params\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSamplingParams\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mNoneType\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m    \u001b[0mprompt_token_ids\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mNoneType\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m    \u001b[0muse_tqdm\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mbool\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m    \u001b[0mlora_request\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mvllm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlora\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mLoRARequest\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvllm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlora\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mLoRARequest\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mNoneType\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m    \u001b[0mprompt_adapter_request\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mvllm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprompt_adapter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mPromptAdapterRequest\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m    \u001b[0mguided_options_request\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mvllm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel_executor\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mguided_decoding\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mguided_fields\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mLLMGuidedOptions\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvllm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel_executor\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mguided_decoding\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mguided_fields\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mGuidedDecodingRequest\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mNoneType\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m    \u001b[0mpriority\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mvllm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moutputs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mRequestOutput\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+       "\u001b[0;31mSource:\u001b[0m   \n",
+       "    \u001b[0;34m@\u001b[0m\u001b[0mdeprecate_kwargs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m        \u001b[0;34m\"prompt_token_ids\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m        \u001b[0mis_deprecated\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mlambda\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mLLM\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDEPRECATE_LEGACY\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m        \u001b[0madditional_message\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"Please use the 'prompts' parameter instead.\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m    \u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m    \u001b[0;32mdef\u001b[0m \u001b[0mgenerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m        \u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m        \u001b[0mprompts\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mPromptType\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mSequence\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mPromptType\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m                       \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m        \u001b[0msampling_params\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mSamplingParams\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m                                        \u001b[0mSequence\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mSamplingParams\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m        \u001b[0mprompt_token_ids\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m        \u001b[0muse_tqdm\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mbool\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m        \u001b[0mlora_request\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mLoRARequest\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mLoRARequest\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m        \u001b[0mprompt_adapter_request\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mPromptAdapterRequest\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m        \u001b[0mguided_options_request\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mLLMGuidedOptions\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m                                               \u001b[0mGuidedDecodingRequest\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m        \u001b[0mpriority\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m    \u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mRequestOutput\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m        \u001b[0;34m\"\"\"Generates the completions for the input prompts.\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m        This class automatically batches the given prompts, considering\u001b[0m\n",
+       "\u001b[0;34m        the memory constraint. For the best performance, put all of your prompts\u001b[0m\n",
+       "\u001b[0;34m        into a single list and pass it to this method.\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m        Args:\u001b[0m\n",
+       "\u001b[0;34m            prompts: The prompts to the LLM. You may pass a sequence of prompts\u001b[0m\n",
+       "\u001b[0;34m                for batch inference. See :class:`~vllm.inputs.PromptType`\u001b[0m\n",
+       "\u001b[0;34m                for more details about the format of each prompts.\u001b[0m\n",
+       "\u001b[0;34m            sampling_params: The sampling parameters for text generation. If\u001b[0m\n",
+       "\u001b[0;34m                None, we use the default sampling parameters.\u001b[0m\n",
+       "\u001b[0;34m                When it is a single value, it is applied to every prompt.\u001b[0m\n",
+       "\u001b[0;34m                When it is a list, the list must have the same length as the\u001b[0m\n",
+       "\u001b[0;34m                prompts and it is paired one by one with the prompt.\u001b[0m\n",
+       "\u001b[0;34m            use_tqdm: Whether to use tqdm to display the progress bar.\u001b[0m\n",
+       "\u001b[0;34m            lora_request: LoRA request to use for generation, if any.\u001b[0m\n",
+       "\u001b[0;34m            prompt_adapter_request: Prompt Adapter request to use for\u001b[0m\n",
+       "\u001b[0;34m                generation, if any.\u001b[0m\n",
+       "\u001b[0;34m            priority: The priority of the requests, if any.\u001b[0m\n",
+       "\u001b[0;34m                Only applicable when priority scheduling policy is enabled.\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m        Returns:\u001b[0m\n",
+       "\u001b[0;34m            A list of ``RequestOutput`` objects containing the\u001b[0m\n",
+       "\u001b[0;34m            generated completions in the same order as the input prompts.\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m        Note:\u001b[0m\n",
+       "\u001b[0;34m            Using ``prompts`` and ``prompt_token_ids`` as keyword parameters is\u001b[0m\n",
+       "\u001b[0;34m            considered legacy and may be deprecated in the future. You should\u001b[0m\n",
+       "\u001b[0;34m            instead pass them via the ``inputs`` parameter.\u001b[0m\n",
+       "\u001b[0;34m        \"\"\"\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m        \u001b[0mrunner_type\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mllm_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel_config\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrunner_type\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m        \u001b[0;32mif\u001b[0m \u001b[0mrunner_type\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;34m\"generate\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m            \u001b[0mmessages\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m                \u001b[0;34m\"LLM.generate() is only supported for (conditional) generation \"\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m                \u001b[0;34m\"models (XForCausalLM, XForConditionalGeneration).\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m            \u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m            \u001b[0msupported_runner_types\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mllm_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel_config\u001b[0m \\\n",
+       "                \u001b[0;34m.\u001b[0m\u001b[0msupported_runner_types\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m            \u001b[0;32mif\u001b[0m \u001b[0;34m\"generate\"\u001b[0m \u001b[0;32min\u001b[0m \u001b[0msupported_runner_types\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m                \u001b[0mmessages\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m                    \u001b[0;34m\"Your model supports the 'generate' runner, but is \"\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m                    \u001b[0;34mf\"currently initialized for the '{runner_type}' runner. \"\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m                    \u001b[0;34m\"Please initialize vLLM using `--task generate`.\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m            \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\" \"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmessages\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m        \u001b[0;32mif\u001b[0m \u001b[0mprompt_token_ids\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m            \u001b[0mparsed_prompts\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_convert_v1_inputs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m                \u001b[0mprompts\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcast\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mprompts\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m                \u001b[0mprompt_token_ids\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mprompt_token_ids\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m            \u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m        \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m            \u001b[0mparsed_prompts\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcast\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mPromptType\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mSequence\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mPromptType\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m                                  \u001b[0mprompts\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m        \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mguided_options_request\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m            \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mguided_options_request\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m                \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m                    \u001b[0;34m\"You can only use one guided decoding but multiple is \"\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m                    \u001b[0;34mf\"specified: {guided_options_request}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m            \u001b[0mguided_options_request\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mGuidedDecodingRequest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m                \u001b[0;34m**\u001b[0m\u001b[0mguided_options_request\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m        \u001b[0;32mif\u001b[0m \u001b[0msampling_params\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m            \u001b[0;31m# Use default sampling params.\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m            \u001b[0msampling_params\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_default_sampling_params\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m        \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_validate_and_add_requests\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m            \u001b[0mprompts\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mparsed_prompts\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m            \u001b[0mparams\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msampling_params\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m            \u001b[0mlora_request\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlora_request\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m            \u001b[0mprompt_adapter_request\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mprompt_adapter_request\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m            \u001b[0mguided_options\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mguided_options_request\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m            \u001b[0mpriority\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpriority\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m        \u001b[0moutputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_run_engine\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0muse_tqdm\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0muse_tqdm\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m        \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mengine_class\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalidate_outputs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mRequestOutput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+       "\u001b[0;31mFile:\u001b[0m      /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/vllm/entrypoints/llm.py\n",
+       "\u001b[0;31mType:\u001b[0m      method"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    " llm.generate??"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "670b9a7f-f6c6-4b80-a117-5c3359eccfd6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!nvidia-smi"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ce71d417-1fdb-4a14-bf85-31a9d5bb693d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Generate output tokens from the prompts. The output is a list of\n",
+    "# RequestOutput objects that contain the prompt, generated\n",
+    "# text, and other information.\n",
+    "outputs = llm.generate(\n",
+    "    {\n",
+    "        \"prompts\": prompts,\n",
+    "        \"multi_modal_data\":  {\"image\": [temp_image]}\n",
+    "    }, \n",
+    "    sampling_params\n",
+    ")\n",
+    "\n",
+    "# Print the outputs.\n",
+    "for output in outputs:\n",
+    "    prompt = output.prompt\n",
+    "    encoder_prompt = output.encoder_prompt\n",
+    "    generated_text = output.outputs[0].text\n",
+    "    print(f\"Encoder prompt: {encoder_prompt!r}, \"\n",
+    "          f\"Decoder prompt: {prompt!r}, \"\n",
+    "          f\"Generated text: {generated_text!r}\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "vllm",
+   "language": "python",
+   "name": "vllm"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.14"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

a_mllm_notebooks/vllm/serve.sh ADDED Viewed

	@@ -0,0 +1,452 @@

+eval "$(conda shell.bash hook)"
+conda activate vllm
+# MODEL_NAME=Qwen/Qwen2-VL-7B-Instruct-AWQ
+MODEL_NAME=Qwen/Qwen2.5-VL-72B-Instruct-AWQ
+# MODEL_NAME=OpenGVLab/InternVL2_5-8B-AWQ
+# MODEL_NAME=microsoft/Florence-2-large
+PORT=8001
+CUDA_VISIBLE_DEVICES=0,1,2,3 \
+vllm serve $MODEL_NAME \
+--port $PORT \
+-tp 4 \
+--trust-remote-code \
+--quantization awq \
+--dtype float16
+# --gpu-memory-utilization 0.7 \
+# --cpu-offload-gb 10
+# \
+# &
+# --gpu-memory-utilization 0.2 \
+# --gpu-memory-utilization 0.8 \
+# --cpu-offload-gb 80 \
+#
+# curl -X 'POST' \
+#   'http://localhost:8082/nodes/add' \
+#   -H 'accept: application/json' \
+#   -H 'Content-Type: application/json' \
+#   -d "{
+#   \"url\": \"http://0.0.0.0:$PORT\"
+# }"
+# --quantization awq \
+# usage: vllm serve <model_tag> [options]
+# positional arguments:
+#   model_tag             The model tag to serve
+# options:
+#   --allow-credentials   allow credentials
+#   --allowed-headers ALLOWED_HEADERS
+#                         allowed headers
+#   --allowed-local-media-path ALLOWED_LOCAL_MEDIA_PATH
+#                         Allowing API requests to read local images or videos from directories
+#                         specified by the server file system. This is a security risk. Should only
+#                         be enabled in trusted environments.
+#   --allowed-methods ALLOWED_METHODS
+#                         allowed methods
+#   --allowed-origins ALLOWED_ORIGINS
+#                         allowed origins
+#   --api-key API_KEY     If provided, the server will require this key to be presented in the
+#                         header.
+#   --block-size {8,16,32,64,128}
+#                         Token block size for contiguous chunks of tokens. This is ignored on
+#                         neuron devices and set to max-model-len
+#   --chat-template CHAT_TEMPLATE
+#                         The file path to the chat template, or the template in single-line form
+#                         for the specified model
+#   --chat-template-content-format {auto,string,openai}
+#                         The format to render message content within a chat template. * "string"
+#                         will render the content as a string. Example: "Hello World" * "openai"
+#                         will render the content as a list of dictionaries, similar to OpenAI
+#                         schema. Example: [{"type": "text", "text": "Hello world!"}]
+#   --code-revision CODE_REVISION
+#                         The specific revision to use for the model code on Hugging Face Hub. It
+#                         can be a branch name, a tag name, or a commit id. If unspecified, will use
+#                         the default version.
+#   --collect-detailed-traces COLLECT_DETAILED_TRACES
+#                         Valid choices are model,worker,all. It makes sense to set this only if
+#                         --otlp-traces-endpoint is set. If set, it will collect detailed traces for
+#                         the specified modules. This involves use of possibly costly and or
+#                         blocking operations and hence might have a performance impact.
+#   --compilation-config COMPILATION_CONFIG, -O COMPILATION_CONFIG
+#                         torch.compile configuration for the model.When it is a number (0, 1, 2,
+#                         3), it will be interpreted as the optimization level. NOTE: level 0 is the
+#                         default level without any optimization. level 1 and 2 are for internal
+#                         testing only. level 3 is the recommended level for production. To specify
+#                         the full compilation config, use a JSON string. Following the convention
+#                         of traditional compilers, using -O without space is also supported. -O3 is
+#                         equivalent to -O 3.
+#   --config CONFIG       Read CLI options from a config file.Must be a YAML with the following opti
+#                         ons:https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html#c
+#                         ommand-line-arguments-for-the-server
+#   --config-format {auto,hf,mistral}
+#                         The format of the model config to load. * "auto" will try to load the
+#                         config in hf format if available else it will try to load in mistral
+#                         format
+#   --cpu-offload-gb CPU_OFFLOAD_GB
+#                         The space in GiB to offload to CPU, per GPU. Default is 0, which means no
+#                         offloading. Intuitively, this argument can be seen as a virtual way to
+#                         increase the GPU memory size. For example, if you have one 24 GB GPU and
+#                         set this to 10, virtually you can think of it as a 34 GB GPU. Then you can
+#                         load a 13B model with BF16 weight, which requires at least 26GB GPU
+#                         memory. Note that this requires fast CPU-GPU interconnect, as part of the
+#                         model is loaded from CPU memory to GPU memory on the fly in each model
+#                         forward pass.
+#   --device {auto,cuda,neuron,cpu,openvino,tpu,xpu,hpu}
+#                         Device type for vLLM execution.
+#   --disable-async-output-proc
+#                         Disable async output processing. This may result in lower performance.
+#   --disable-custom-all-reduce
+#                         See ParallelConfig.
+#   --disable-fastapi-docs
+#                         Disable FastAPI's OpenAPI schema, Swagger UI, and ReDoc endpoint
+#   --disable-frontend-multiprocessing
+#                         If specified, will run the OpenAI frontend server in the same process as
+#                         the model serving engine.
+#   --disable-log-requests
+#                         Disable logging requests.
+#   --disable-log-stats   Disable logging statistics.
+#   --disable-logprobs-during-spec-decoding [DISABLE_LOGPROBS_DURING_SPEC_DECODING]
+#                         If set to True, token log probabilities are not returned during
+#                         speculative decoding. If set to False, log probabilities are returned
+#                         according to the settings in SamplingParams. If not specified, it defaults
+#                         to True. Disabling log probabilities during speculative decoding reduces
+#                         latency by skipping logprob calculation in proposal sampling, target
+#                         sampling, and after accepted tokens are determined.
+#   --disable-sliding-window
+#                         Disables sliding window, capping to sliding window size
+#   --distributed-executor-backend {ray,mp}
+#                         Backend to use for distributed model workers, either "ray" or "mp"
+#                         (multiprocessing). If the product of pipeline_parallel_size and
+#                         tensor_parallel_size is less than or equal to the number of GPUs
+#                         available, "mp" will be used to keep processing on a single host.
+#                         Otherwise, this will default to "ray" if Ray is installed and fail
+#                         otherwise. Note that tpu and hpu only support Ray for distributed
+#                         inference.
+#   --download-dir DOWNLOAD_DIR
+#                         Directory to download and load the weights, default to the default cache
+#                         dir of huggingface.
+#   --dtype {auto,half,float16,bfloat16,float,float32}
+#                         Data type for model weights and activations. * "auto" will use FP16
+#                         precision for FP32 and FP16 models, and BF16 precision for BF16 models. *
+#                         "half" for FP16. Recommended for AWQ quantization. * "float16" is the same
+#                         as "half". * "bfloat16" for a balance between precision and range. *
+#                         "float" is shorthand for FP32 precision. * "float32" for FP32 precision.
+#   --enable-auto-tool-choice
+#                         Enable auto tool choice for supported models. Use --tool-call-parser to
+#                         specify which parser to use
+#   --enable-chunked-prefill [ENABLE_CHUNKED_PREFILL]
+#                         If set, the prefill requests can be chunked based on the
+#                         max_num_batched_tokens.
+#   --enable-lora         If True, enable handling of LoRA adapters.
+#   --enable-lora-bias    If True, enable bias for LoRA adapters.
+#   --enable-prefix-caching, --no-enable-prefix-caching
+#                         Enables automatic prefix caching. Use --no-enable-prefix-caching to
+#                         disable explicitly.
+#   --enable-prompt-adapter
+#                         If True, enable handling of PromptAdapters.
+#   --enable-prompt-tokens-details
+#                         If set to True, enable prompt_tokens_details in usage.
+#   --enforce-eager       Always use eager-mode PyTorch. If False, will use eager mode and CUDA
+#                         graph in hybrid for maximal performance and flexibility.
+#   --fully-sharded-loras
+#                         By default, only half of the LoRA computation is sharded with tensor
+#                         parallelism. Enabling this will use the fully sharded layers. At high
+#                         sequence length, max rank or tensor parallel size, this is likely faster.
+#   --gpu-memory-utilization GPU_MEMORY_UTILIZATION
+#                         The fraction of GPU memory to be used for the model executor, which can
+#                         range from 0 to 1. For example, a value of 0.5 would imply 50% GPU memory
+#                         utilization. If unspecified, will use the default value of 0.9. This is a
+#                         global gpu memory utilization limit, for example if 50% of the gpu memory
+#                         is already used before vLLM starts and --gpu-memory-utilization is set to
+#                         0.9, then only 40% of the gpu memory will be allocated to the model
+#                         executor.
+#   --guided-decoding-backend {outlines,lm-format-enforcer,xgrammar}
+#                         Which engine will be used for guided decoding (JSON schema / regex etc) by
+#                         default. Currently support https://github.com/outlines-
+#                         dev/outlines,https://github.com/mlc-ai/xgrammar, and
+#                         https://github.com/noamgat/lm-format-enforcer. Can be overridden per
+#                         request via guided_decoding_backend parameter.
+#   --hf-overrides HF_OVERRIDES
+#                         Extra arguments for the HuggingFace config. This should be a JSON string
+#                         that will be parsed into a dictionary.
+#   --host HOST           host name
+#   --ignore-patterns IGNORE_PATTERNS
+#                         The pattern(s) to ignore when loading the model.Default to `original/**/*`
+#                         to avoid repeated loading of llama's checkpoints.
+#   --kv-cache-dtype {auto,fp8,fp8_e5m2,fp8_e4m3}
+#                         Data type for kv cache storage. If "auto", will use model data type. CUDA
+#                         11.8+ supports fp8 (=fp8_e4m3) and fp8_e5m2. ROCm (AMD GPU) supports fp8
+#                         (=fp8_e4m3)
+#   --kv-transfer-config KV_TRANSFER_CONFIG
+#                         The configurations for distributed KV cache transfer. Should be a JSON
+#                         string.
+#   --limit-mm-per-prompt LIMIT_MM_PER_PROMPT
+#                         For each multimodal plugin, limit how many input instances to allow for
+#                         each prompt. Expects a comma-separated list of items, e.g.:
+#                         `image=16,video=2` allows a maximum of 16 images and 2 videos per prompt.
+#                         Defaults to 1 for each modality.
+#   --load-format {auto,pt,safetensors,npcache,dummy,tensorizer,sharded_state,gguf,bitsandbytes,mistral}
+#                         The format of the model weights to load. * "auto" will try to load the
+#                         weights in the safetensors format and fall back to the pytorch bin format
+#                         if safetensors format is not available. * "pt" will load the weights in
+#                         the pytorch bin format. * "safetensors" will load the weights in the
+#                         safetensors format. * "npcache" will load the weights in pytorch format
+#                         and store a numpy cache to speed up the loading. * "dummy" will initialize
+#                         the weights with random values, which is mainly for profiling. *
+#                         "tensorizer" will load the weights using tensorizer from CoreWeave. See
+#                         the Tensorize vLLM Model script in the Examples section for more
+#                         information. * "bitsandbytes" will load the weights using bitsandbytes
+#                         quantization.
+#   --long-lora-scaling-factors LONG_LORA_SCALING_FACTORS
+#                         Specify multiple scaling factors (which can be different from base model
+#                         scaling factor - see eg. Long LoRA) to allow for multiple LoRA adapters
+#                         trained with those scaling factors to be used at the same time. If not
+#                         specified, only adapters trained with the base model scaling factor are
+#                         allowed.
+#   --lora-dtype {auto,float16,bfloat16}
+#                         Data type for LoRA. If auto, will default to base model dtype.
+#   --lora-extra-vocab-size LORA_EXTRA_VOCAB_SIZE
+#                         Maximum size of extra vocabulary that can be present in a LoRA adapter
+#                         (added to the base model vocabulary).
+#   --lora-modules LORA_MODULES [LORA_MODULES ...]
+#                         LoRA module configurations in either 'name=path' formator JSON format.
+#                         Example (old format): 'name=path' Example (new format): '{"name": "name",
+#                         "local_path": "path", "base_model_name": "id"}'
+#   --max-cpu-loras MAX_CPU_LORAS
+#                         Maximum number of LoRAs to store in CPU memory. Must be >= than max_loras.
+#                         Defaults to max_loras.
+#   --max-log-len MAX_LOG_LEN
+#                         Max number of prompt characters or prompt ID numbers being printed in log.
+#                         Default: Unlimited
+#   --max-logprobs MAX_LOGPROBS
+#                         Max number of log probs to return logprobs is specified in SamplingParams.
+#   --max-lora-rank MAX_LORA_RANK
+#                         Max LoRA rank.
+#   --max-loras MAX_LORAS
+#                         Max number of LoRAs in a single batch.
+#   --max-model-len MAX_MODEL_LEN
+#                         Model context length. If unspecified, will be automatically derived from
+#                         the model config.
+#   --max-num-batched-tokens MAX_NUM_BATCHED_TOKENS
+#                         Maximum number of batched tokens per iteration.
+#   --max-num-seqs MAX_NUM_SEQS
+#                         Maximum number of sequences per iteration.
+#   --max-parallel-loading-workers MAX_PARALLEL_LOADING_WORKERS
+#                         Load model sequentially in multiple batches, to avoid RAM OOM when using
+#                         tensor parallel and large models.
+#   --max-prompt-adapter-token MAX_PROMPT_ADAPTER_TOKEN
+#                         Max number of PromptAdapters tokens
+#   --max-prompt-adapters MAX_PROMPT_ADAPTERS
+#                         Max number of PromptAdapters in a batch.
+#   --max-seq-len-to-capture MAX_SEQ_LEN_TO_CAPTURE
+#                         Maximum sequence length covered by CUDA graphs. When a sequence has
+#                         context length larger than this, we fall back to eager mode. Additionally
+#                         for encoder-decoder models, if the sequence length of the encoder input is
+#                         larger than this, we fall back to the eager mode.
+#   --middleware MIDDLEWARE
+#                         Additional ASGI middleware to apply to the app. We accept multiple
+#                         --middleware arguments. The value should be an import path. If a function
+#                         is provided, vLLM will add it to the server using @app.middleware('http').
+#                         If a class is provided, vLLM will add it to the server using
+#                         app.add_middleware().
+#   --mm-processor-kwargs MM_PROCESSOR_KWARGS
+#                         Overrides for the multimodal input mapping/processing, e.g., image
+#                         processor. For example: {"num_crops": 4}.
+#   --model MODEL         Name or path of the huggingface model to use.
+#   --model-loader-extra-config MODEL_LOADER_EXTRA_CONFIG
+#                         Extra config for model loader. This will be passed to the model loader
+#                         corresponding to the chosen load_format. This should be a JSON string that
+#                         will be parsed into a dictionary.
+#   --multi-step-stream-outputs [MULTI_STEP_STREAM_OUTPUTS]
+#                         If False, then multi-step will stream outputs at the end of all steps
+#   --ngram-prompt-lookup-max NGRAM_PROMPT_LOOKUP_MAX
+#                         Max size of window for ngram prompt lookup in speculative decoding.
+#   --ngram-prompt-lookup-min NGRAM_PROMPT_LOOKUP_MIN
+#                         Min size of window for ngram prompt lookup in speculative decoding.
+#   --num-gpu-blocks-override NUM_GPU_BLOCKS_OVERRIDE
+#                         If specified, ignore GPU profiling result and use this number of GPU
+#                         blocks. Used for testing preemption.
+#   --num-lookahead-slots NUM_LOOKAHEAD_SLOTS
+#                         Experimental scheduling config necessary for speculative decoding. This
+#                         will be replaced by speculative config in the future; it is present to
+#                         enable correctness tests until then.
+#   --num-scheduler-steps NUM_SCHEDULER_STEPS
+#                         Maximum number of forward steps per scheduler call.
+#   --num-speculative-tokens NUM_SPECULATIVE_TOKENS
+#                         The number of speculative tokens to sample from the draft model in
+#                         speculative decoding.
+#   --otlp-traces-endpoint OTLP_TRACES_ENDPOINT
+#                         Target URL to which OpenTelemetry traces will be sent.
+#   --override-neuron-config OVERRIDE_NEURON_CONFIG
+#                         Override or set neuron device configuration. e.g. {"cast_logits_dtype":
+#                         "bloat16"}.'
+#   --override-pooler-config OVERRIDE_POOLER_CONFIG
+#                         Override or set the pooling method in the embedding model. e.g.
+#                         {"pooling_type": "mean", "normalize": false}.'
+#   --pipeline-parallel-size PIPELINE_PARALLEL_SIZE, -pp PIPELINE_PARALLEL_SIZE
+#                         Number of pipeline stages.
+#   --port PORT           port number
+#   --preemption-mode PREEMPTION_MODE
+#                         If 'recompute', the engine performs preemption by recomputing; If 'swap',
+#                         the engine performs preemption by block swapping.
+#   --prompt-adapters PROMPT_ADAPTERS [PROMPT_ADAPTERS ...]
+#                         Prompt adapter configurations in the format name=path. Multiple adapters
+#                         can be specified.
+#   --qlora-adapter-name-or-path QLORA_ADAPTER_NAME_OR_PATH
+#                         Name or path of the QLoRA adapter.
+#   --quantization {aqlm,awq,deepspeedfp,tpu_int8,fp8,fbgemm_fp8,modelopt,marlin,gguf,gptq_marlin_24,gptq_marlin,awq_marlin,gptq,compressed-tensors,bitsandbytes,qqq,hqq,experts_int8,neuron_quant,ipex,None}, -q {aqlm,awq,deepspeedfp,tpu_int8,fp8,fbgemm_fp8,modelopt,marlin,gguf,gptq_marlin_24,gptq_marlin,awq_marlin,gptq,compressed-tensors,bitsandbytes,qqq,hqq,experts_int8,neuron_quant,ipex,None}
+#                         Method used to quantize the weights. If None, we first check the
+#                         `quantization_config` attribute in the model config file. If that is None,
+#                         we assume the model weights are not quantized and use `dtype` to determine
+#                         the data type of the weights.
+#   --quantization-param-path QUANTIZATION_PARAM_PATH
+#                         Path to the JSON file containing the KV cache scaling factors. This should
+#                         generally be supplied, when KV cache dtype is FP8. Otherwise, KV cache
+#                         scaling factors default to 1.0, which may cause accuracy issues. FP8_E5M2
+#                         (without scaling) is only supported on cuda version greater than 11.8. On
+#                         ROCm (AMD GPU), FP8_E4M3 is instead supported for common inference
+#                         criteria.
+#   --ray-workers-use-nsight
+#                         If specified, use nsight to profile Ray workers.
+#   --response-role RESPONSE_ROLE
+#                         The role name to return if `request.add_generation_prompt=true`.
+#   --return-tokens-as-token-ids
+#                         When --max-logprobs is specified, represents single tokens as strings of
+#                         the form 'token_id:{token_id}' so that tokens that are not JSON-encodable
+#                         can be identified.
+#   --revision REVISION   The specific model version to use. It can be a branch name, a tag name, or
+#                         a commit id. If unspecified, will use the default version.
+#   --root-path ROOT_PATH
+#                         FastAPI root_path when app is behind a path based routing proxy
+#   --rope-scaling ROPE_SCALING
+#                         RoPE scaling configuration in JSON format. For example,
+#                         {"rope_type":"dynamic","factor":2.0}
+#   --rope-theta ROPE_THETA
+#                         RoPE theta. Use with `rope_scaling`. In some cases, changing the RoPE
+#                         theta improves the performance of the scaled model.
+#   --scheduler-delay-factor SCHEDULER_DELAY_FACTOR
+#                         Apply a delay (of delay factor multiplied by previous prompt latency)
+#                         before scheduling next prompt.
+#   --scheduling-policy {fcfs,priority}
+#                         The scheduling policy to use. "fcfs" (first come first served, i.e.
+#                         requests are handled in order of arrival; default) or "priority" (requests
+#                         are handled based on given priority (lower value means earlier handling)
+#                         and time of arrival deciding any ties).
+#   --seed SEED           Random seed for operations.
+#   --served-model-name SERVED_MODEL_NAME [SERVED_MODEL_NAME ...]
+#                         The model name(s) used in the API. If multiple names are provided, the
+#                         server will respond to any of the provided names. The model name in the
+#                         model field of a response will be the first name in this list. If not
+#                         specified, the model name will be the same as the `--model` argument.
+#                         Noted that this name(s) will also be used in `model_name` tag content of
+#                         prometheus metrics, if multiple names provided, metrics tag will take the
+#                         first one.
+#   --skip-tokenizer-init
+#                         Skip initialization of tokenizer and detokenizer
+#   --spec-decoding-acceptance-method {rejection_sampler,typical_acceptance_sampler}
+#                         Specify the acceptance method to use during draft token verification in
+#                         speculative decoding. Two types of acceptance routines are supported: 1)
+#                         RejectionSampler which does not allow changing the acceptance rate of
+#                         draft tokens, 2) TypicalAcceptanceSampler which is configurable, allowing
+#                         for a higher acceptance rate at the cost of lower quality, and vice versa.
+#   --speculative-disable-by-batch-size SPECULATIVE_DISABLE_BY_BATCH_SIZE
+#                         Disable speculative decoding for new incoming requests if the number of
+#                         enqueue requests is larger than this value.
+#   --speculative-disable-mqa-scorer
+#                         If set to True, the MQA scorer will be disabled in speculative and fall
+#                         back to batch expansion
+#   --speculative-draft-tensor-parallel-size SPECULATIVE_DRAFT_TENSOR_PARALLEL_SIZE, -spec-draft-tp SPECULATIVE_DRAFT_TENSOR_PARALLEL_SIZE
+#                         Number of tensor parallel replicas for the draft model in speculative
+#                         decoding.
+#   --speculative-max-model-len SPECULATIVE_MAX_MODEL_LEN
+#                         The maximum sequence length supported by the draft model. Sequences over
+#                         this length will skip speculation.
+#   --speculative-model SPECULATIVE_MODEL
+#                         The name of the draft model to be used in speculative decoding.
+#   --speculative-model-quantization {aqlm,awq,deepspeedfp,tpu_int8,fp8,fbgemm_fp8,modelopt,marlin,gguf,gptq_marlin_24,gptq_marlin,awq_marlin,gptq,compressed-tensors,bitsandbytes,qqq,hqq,experts_int8,neuron_quant,ipex,None}
+#                         Method used to quantize the weights of speculative model. If None, we
+#                         first check the `quantization_config` attribute in the model config file.
+#                         If that is None, we assume the model weights are not quantized and use
+#                         `dtype` to determine the data type of the weights.
+#   --ssl-ca-certs SSL_CA_CERTS
+#                         The CA certificates file
+#   --ssl-cert-reqs SSL_CERT_REQS
+#                         Whether client certificate is required (see stdlib ssl module's)
+#   --ssl-certfile SSL_CERTFILE
+#                         The file path to the SSL cert file
+#   --ssl-keyfile SSL_KEYFILE
+#                         The file path to the SSL key file
+#   --swap-space SWAP_SPACE
+#                         CPU swap space size (GiB) per GPU.
+#   --task {auto,generate,embedding}
+#                         The task to use the model for. Each vLLM instance only supports one task,
+#                         even if the same model can be used for multiple tasks. When the model only
+#                         supports one task, "auto" can be used to select it; otherwise, you must
+#                         specify explicitly which task to use.
+#   --tensor-parallel-size TENSOR_PARALLEL_SIZE, -tp TENSOR_PARALLEL_SIZE
+#                         Number of tensor parallel replicas.
+#   --tokenizer TOKENIZER
+#                         Name or path of the huggingface tokenizer to use. If unspecified, model
+#                         name or path will be used.
+#   --tokenizer-mode {auto,slow,mistral}
+#                         The tokenizer mode. * "auto" will use the fast tokenizer if available. *
+#                         "slow" will always use the slow tokenizer. * "mistral" will always use the
+#                         `mistral_common` tokenizer.
+#   --tokenizer-pool-extra-config TOKENIZER_POOL_EXTRA_CONFIG
+#                         Extra config for tokenizer pool. This should be a JSON string that will be
+#                         parsed into a dictionary. Ignored if tokenizer_pool_size is 0.
+#   --tokenizer-pool-size TOKENIZER_POOL_SIZE
+#                         Size of tokenizer pool to use for asynchronous tokenization. If 0, will
+#                         use synchronous tokenization.
+#   --tokenizer-pool-type TOKENIZER_POOL_TYPE
+#                         Type of tokenizer pool to use for asynchronous tokenization. Ignored if
+#                         tokenizer_pool_size is 0.
+#   --tokenizer-revision TOKENIZER_REVISION
+#                         Revision of the huggingface tokenizer to use. It can be a branch name, a
+#                         tag name, or a commit id. If unspecified, will use the default version.
+#   --tool-call-parser {granite-20b-fc,granite,hermes,internlm,jamba,llama3_json,mistral,pythonic} or name registered in --tool-parser-plugin
+#                         Select the tool call parser depending on the model that you're using. This
+#                         is used to parse the model-generated tool call into OpenAI API format.
+#                         Required for --enable-auto-tool-choice.
+#   --tool-parser-plugin TOOL_PARSER_PLUGIN
+#                         Special the tool parser plugin write to parse the model-generated tool
+#                         into OpenAI API format, the name register in this plugin can be used in
+#                         --tool-call-parser.
+#   --trust-remote-code   Trust remote code from huggingface.
+#   --typical-acceptance-sampler-posterior-alpha TYPICAL_ACCEPTANCE_SAMPLER_POSTERIOR_ALPHA
+#                         A scaling factor for the entropy-based threshold for token acceptance in
+#                         the TypicalAcceptanceSampler. Typically defaults to sqrt of --typical-
+#                         acceptance-sampler-posterior-threshold i.e. 0.3
+#   --typical-acceptance-sampler-posterior-threshold TYPICAL_ACCEPTANCE_SAMPLER_POSTERIOR_THRESHOLD
+#                         Set the lower bound threshold for the posterior probability of a token to
+#                         be accepted. This threshold is used by the TypicalAcceptanceSampler to
+#                         make sampling decisions during speculative decoding. Defaults to 0.09
+#   --use-v2-block-manager
+#                         [DEPRECATED] block manager v1 has been removed and
+#                         SelfAttnBlockSpaceManager (i.e. block manager v2) is now the default.
+#                         Setting this flag to True or False has no effect on vLLM behavior.
+#   --uvicorn-log-level {debug,info,warning,error,critical,trace}
+#                         log level for uvicorn
+#   --worker-cls WORKER_CLS
+#                         The worker class to use for distributed execution.
+#   --worker-use-ray      Deprecated, use --distributed-executor-backend=ray.
+#   -h, --help            show this help message and exit

a_mllm_notebooks/vllm/start.ipynb ADDED Viewed

	@@ -0,0 +1,432 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from vllm import LLM, SamplingParams"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "File downloaded successfully: quickstart.html\n"
+     ]
+    }
+   ],
+   "source": [
+    "import requests\n",
+    "\n",
+    "def download_file(url, filename):\n",
+    "    try:\n",
+    "        r = requests.get(url)\n",
+    "        r.raise_for_status()  # Check if the request was successful\n",
+    "        with open(filename, 'wb') as f:\n",
+    "            f.write(r.content)\n",
+    "        print(f\"File downloaded successfully: {filename}\")\n",
+    "    except requests.exceptions.RequestException as e:\n",
+    "        print(f\"Failed to download file: {e}\")\n",
+    "\n",
+    "download_file('https://docs.vllm.ai/en/stable/getting_started/quickstart.html', 'quickstart.html')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[33mWARNING: Ignoring invalid distribution -vidia-cublas-cu12 (/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages)\u001b[0m\u001b[33m\n",
+      "\u001b[0mCollecting jupyter\n",
+      "  Downloading jupyter-1.1.1-py2.py3-none-any.whl.metadata (2.0 kB)\n",
+      "Collecting notebook (from jupyter)\n",
+      "  Downloading notebook-7.3.2-py3-none-any.whl.metadata (10 kB)\n",
+      "Collecting jupyter-console (from jupyter)\n",
+      "  Downloading jupyter_console-6.6.3-py3-none-any.whl.metadata (5.8 kB)\n",
+      "Collecting nbconvert (from jupyter)\n",
+      "  Downloading nbconvert-7.16.4-py3-none-any.whl.metadata (8.5 kB)\n",
+      "Requirement already satisfied: ipykernel in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from jupyter) (6.29.5)\n",
+      "Collecting ipywidgets (from jupyter)\n",
+      "  Downloading ipywidgets-8.1.5-py3-none-any.whl.metadata (2.3 kB)\n",
+      "Collecting jupyterlab (from jupyter)\n",
+      "  Downloading jupyterlab-4.3.4-py3-none-any.whl.metadata (16 kB)\n",
+      "Requirement already satisfied: comm>=0.1.1 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from ipykernel->jupyter) (0.2.2)\n",
+      "Requirement already satisfied: debugpy>=1.6.5 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from ipykernel->jupyter) (1.8.9)\n",
+      "Requirement already satisfied: ipython>=7.23.1 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from ipykernel->jupyter) (8.30.0)\n",
+      "Requirement already satisfied: jupyter-client>=6.1.12 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from ipykernel->jupyter) (8.6.3)\n",
+      "Requirement already satisfied: jupyter-core!=5.0.*,>=4.12 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from ipykernel->jupyter) (5.7.2)\n",
+      "Requirement already satisfied: matplotlib-inline>=0.1 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from ipykernel->jupyter) (0.1.7)\n",
+      "Requirement already satisfied: nest-asyncio in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from ipykernel->jupyter) (1.6.0)\n",
+      "Requirement already satisfied: packaging in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from ipykernel->jupyter) (24.1)\n",
+      "Requirement already satisfied: psutil in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from ipykernel->jupyter) (6.0.0)\n",
+      "Requirement already satisfied: pyzmq>=24 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from ipykernel->jupyter) (26.2.0)\n",
+      "Requirement already satisfied: tornado>=6.1 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from ipykernel->jupyter) (6.4.2)\n",
+      "Requirement already satisfied: traitlets>=5.4.0 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from ipykernel->jupyter) (5.14.3)\n",
+      "Collecting widgetsnbextension~=4.0.12 (from ipywidgets->jupyter)\n",
+      "  Downloading widgetsnbextension-4.0.13-py3-none-any.whl.metadata (1.6 kB)\n",
+      "Collecting jupyterlab-widgets~=3.0.12 (from ipywidgets->jupyter)\n",
+      "  Downloading jupyterlab_widgets-3.0.13-py3-none-any.whl.metadata (4.1 kB)\n",
+      "Requirement already satisfied: prompt-toolkit>=3.0.30 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from jupyter-console->jupyter) (3.0.48)\n",
+      "Requirement already satisfied: pygments in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from jupyter-console->jupyter) (2.18.0)\n",
+      "Collecting async-lru>=1.0.0 (from jupyterlab->jupyter)\n",
+      "  Downloading async_lru-2.0.4-py3-none-any.whl.metadata (4.5 kB)\n",
+      "Requirement already satisfied: httpx>=0.25.0 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from jupyterlab->jupyter) (0.27.2)\n",
+      "Requirement already satisfied: jinja2>=3.0.3 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from jupyterlab->jupyter) (3.1.4)\n",
+      "Collecting jupyter-lsp>=2.0.0 (from jupyterlab->jupyter)\n",
+      "  Downloading jupyter_lsp-2.2.5-py3-none-any.whl.metadata (1.8 kB)\n",
+      "Collecting jupyter-server<3,>=2.4.0 (from jupyterlab->jupyter)\n",
+      "  Downloading jupyter_server-2.15.0-py3-none-any.whl.metadata (8.4 kB)\n",
+      "Collecting jupyterlab-server<3,>=2.27.1 (from jupyterlab->jupyter)\n",
+      "  Downloading jupyterlab_server-2.27.3-py3-none-any.whl.metadata (5.9 kB)\n",
+      "Collecting notebook-shim>=0.2 (from jupyterlab->jupyter)\n",
+      "  Downloading notebook_shim-0.2.4-py3-none-any.whl.metadata (4.0 kB)\n",
+      "Requirement already satisfied: setuptools>=40.8.0 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from jupyterlab->jupyter) (72.1.0)\n",
+      "Requirement already satisfied: tomli>=1.2.2 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from jupyterlab->jupyter) (2.2.1)\n",
+      "Requirement already satisfied: beautifulsoup4 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from nbconvert->jupyter) (4.12.3)\n",
+      "Collecting bleach!=5.0.0 (from nbconvert->jupyter)\n",
+      "  Downloading bleach-6.2.0-py3-none-any.whl.metadata (30 kB)\n",
+      "Collecting defusedxml (from nbconvert->jupyter)\n",
+      "  Downloading defusedxml-0.7.1-py2.py3-none-any.whl.metadata (32 kB)\n",
+      "Collecting jupyterlab-pygments (from nbconvert->jupyter)\n",
+      "  Downloading jupyterlab_pygments-0.3.0-py3-none-any.whl.metadata (4.4 kB)\n",
+      "Requirement already satisfied: markupsafe>=2.0 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from nbconvert->jupyter) (2.1.5)\n",
+      "Collecting mistune<4,>=2.0.3 (from nbconvert->jupyter)\n",
+      "  Downloading mistune-3.0.2-py3-none-any.whl.metadata (1.7 kB)\n",
+      "Collecting nbclient>=0.5.0 (from nbconvert->jupyter)\n",
+      "  Downloading nbclient-0.10.2-py3-none-any.whl.metadata (8.3 kB)\n",
+      "Requirement already satisfied: nbformat>=5.7 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from nbconvert->jupyter) (5.10.4)\n",
+      "Collecting pandocfilters>=1.4.1 (from nbconvert->jupyter)\n",
+      "  Downloading pandocfilters-1.5.1-py2.py3-none-any.whl.metadata (9.0 kB)\n",
+      "Collecting tinycss2 (from nbconvert->jupyter)\n",
+      "  Downloading tinycss2-1.4.0-py3-none-any.whl.metadata (3.0 kB)\n",
+      "Requirement already satisfied: typing-extensions>=4.0.0 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from async-lru>=1.0.0->jupyterlab->jupyter) (4.12.2)\n",
+      "Collecting webencodings (from bleach!=5.0.0->nbconvert->jupyter)\n",
+      "  Downloading webencodings-0.5.1-py2.py3-none-any.whl.metadata (2.1 kB)\n",
+      "Requirement already satisfied: anyio in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from httpx>=0.25.0->jupyterlab->jupyter) (4.4.0)\n",
+      "Requirement already satisfied: certifi in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from httpx>=0.25.0->jupyterlab->jupyter) (2024.8.30)\n",
+      "Requirement already satisfied: httpcore==1.* in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from httpx>=0.25.0->jupyterlab->jupyter) (1.0.5)\n",
+      "Requirement already satisfied: idna in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from httpx>=0.25.0->jupyterlab->jupyter) (3.10)\n",
+      "Requirement already satisfied: sniffio in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from httpx>=0.25.0->jupyterlab->jupyter) (1.3.1)\n",
+      "Requirement already satisfied: h11<0.15,>=0.13 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from httpcore==1.*->httpx>=0.25.0->jupyterlab->jupyter) (0.14.0)\n",
+      "Requirement already satisfied: decorator in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyter) (5.1.1)\n",
+      "Requirement already satisfied: exceptiongroup in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyter) (1.2.2)\n",
+      "Requirement already satisfied: jedi>=0.16 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyter) (0.19.2)\n",
+      "Requirement already satisfied: pexpect>4.3 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyter) (4.9.0)\n",
+      "Requirement already satisfied: stack_data in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyter) (0.6.2)\n",
+      "Requirement already satisfied: python-dateutil>=2.8.2 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from jupyter-client>=6.1.12->ipykernel->jupyter) (2.9.0.post0)\n",
+      "Requirement already satisfied: platformdirs>=2.5 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from jupyter-core!=5.0.*,>=4.12->ipykernel->jupyter) (4.3.6)\n",
+      "Collecting argon2-cffi>=21.1 (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter)\n",
+      "  Downloading argon2_cffi-23.1.0-py3-none-any.whl.metadata (5.2 kB)\n",
+      "Collecting jupyter-events>=0.11.0 (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter)\n",
+      "  Downloading jupyter_events-0.11.0-py3-none-any.whl.metadata (5.8 kB)\n",
+      "Collecting jupyter-server-terminals>=0.4.4 (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter)\n",
+      "  Downloading jupyter_server_terminals-0.5.3-py3-none-any.whl.metadata (5.6 kB)\n",
+      "Collecting overrides>=5.0 (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter)\n",
+      "  Downloading overrides-7.7.0-py3-none-any.whl.metadata (5.8 kB)\n",
+      "Requirement already satisfied: prometheus-client>=0.9 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (0.20.0)\n",
+      "Collecting send2trash>=1.8.2 (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter)\n",
+      "  Downloading Send2Trash-1.8.3-py3-none-any.whl.metadata (4.0 kB)\n",
+      "Collecting terminado>=0.8.3 (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter)\n",
+      "  Downloading terminado-0.18.1-py3-none-any.whl.metadata (5.8 kB)\n",
+      "Collecting websocket-client>=1.7 (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter)\n",
+      "  Downloading websocket_client-1.8.0-py3-none-any.whl.metadata (8.0 kB)\n",
+      "Collecting babel>=2.10 (from jupyterlab-server<3,>=2.27.1->jupyterlab->jupyter)\n",
+      "  Downloading babel-2.16.0-py3-none-any.whl.metadata (1.5 kB)\n",
+      "Collecting json5>=0.9.0 (from jupyterlab-server<3,>=2.27.1->jupyterlab->jupyter)\n",
+      "  Downloading json5-0.10.0-py3-none-any.whl.metadata (34 kB)\n",
+      "Requirement already satisfied: jsonschema>=4.18.0 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from jupyterlab-server<3,>=2.27.1->jupyterlab->jupyter) (4.23.0)\n",
+      "Requirement already satisfied: requests>=2.31 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from jupyterlab-server<3,>=2.27.1->jupyterlab->jupyter) (2.32.3)\n",
+      "Requirement already satisfied: fastjsonschema>=2.15 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from nbformat>=5.7->nbconvert->jupyter) (2.21.1)\n",
+      "Requirement already satisfied: wcwidth in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from prompt-toolkit>=3.0.30->jupyter-console->jupyter) (0.2.13)\n",
+      "Requirement already satisfied: soupsieve>1.2 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from beautifulsoup4->nbconvert->jupyter) (2.6)\n",
+      "Collecting argon2-cffi-bindings (from argon2-cffi>=21.1->jupyter-server<3,>=2.4.0->jupyterlab->jupyter)\n",
+      "  Downloading argon2_cffi_bindings-21.2.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)\n",
+      "Requirement already satisfied: parso<0.9.0,>=0.8.4 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from jedi>=0.16->ipython>=7.23.1->ipykernel->jupyter) (0.8.4)\n",
+      "Requirement already satisfied: attrs>=22.2.0 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from jsonschema>=4.18.0->jupyterlab-server<3,>=2.27.1->jupyterlab->jupyter) (24.2.0)\n",
+      "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from jsonschema>=4.18.0->jupyterlab-server<3,>=2.27.1->jupyterlab->jupyter) (2023.12.1)\n",
+      "Requirement already satisfied: referencing>=0.28.4 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from jsonschema>=4.18.0->jupyterlab-server<3,>=2.27.1->jupyterlab->jupyter) (0.35.1)\n",
+      "Requirement already satisfied: rpds-py>=0.7.1 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from jsonschema>=4.18.0->jupyterlab-server<3,>=2.27.1->jupyterlab->jupyter) (0.20.0)\n",
+      "Collecting python-json-logger>=2.0.4 (from jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter)\n",
+      "  Downloading python_json_logger-3.2.1-py3-none-any.whl.metadata (4.1 kB)\n",
+      "Requirement already satisfied: pyyaml>=5.3 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (6.0.2)\n",
+      "Collecting rfc3339-validator (from jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter)\n",
+      "  Downloading rfc3339_validator-0.1.4-py2.py3-none-any.whl.metadata (1.5 kB)\n",
+      "Collecting rfc3986-validator>=0.1.1 (from jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter)\n",
+      "  Downloading rfc3986_validator-0.1.1-py2.py3-none-any.whl.metadata (1.7 kB)\n",
+      "Requirement already satisfied: ptyprocess>=0.5 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from pexpect>4.3->ipython>=7.23.1->ipykernel->jupyter) (0.7.0)\n",
+      "Requirement already satisfied: six>=1.5 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from python-dateutil>=2.8.2->jupyter-client>=6.1.12->ipykernel->jupyter) (1.16.0)\n",
+      "Requirement already satisfied: charset-normalizer<4,>=2 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from requests>=2.31->jupyterlab-server<3,>=2.27.1->jupyterlab->jupyter) (3.3.2)\n",
+      "Requirement already satisfied: urllib3<3,>=1.21.1 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from requests>=2.31->jupyterlab-server<3,>=2.27.1->jupyterlab->jupyter) (2.2.3)\n",
+      "Requirement already satisfied: executing>=1.2.0 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from stack_data->ipython>=7.23.1->ipykernel->jupyter) (2.1.0)\n",
+      "Requirement already satisfied: asttokens>=2.1.0 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from stack_data->ipython>=7.23.1->ipykernel->jupyter) (3.0.0)\n",
+      "Requirement already satisfied: pure-eval in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from stack_data->ipython>=7.23.1->ipykernel->jupyter) (0.2.3)\n",
+      "Collecting fqdn (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter)\n",
+      "  Downloading fqdn-1.5.1-py3-none-any.whl.metadata (1.4 kB)\n",
+      "Collecting isoduration (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter)\n",
+      "  Downloading isoduration-20.11.0-py3-none-any.whl.metadata (5.7 kB)\n",
+      "Collecting jsonpointer>1.13 (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter)\n",
+      "  Downloading jsonpointer-3.0.0-py2.py3-none-any.whl.metadata (2.3 kB)\n",
+      "Collecting uri-template (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter)\n",
+      "  Downloading uri_template-1.3.0-py3-none-any.whl.metadata (8.8 kB)\n",
+      "Collecting webcolors>=24.6.0 (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter)\n",
+      "  Downloading webcolors-24.11.1-py3-none-any.whl.metadata (2.2 kB)\n",
+      "Collecting cffi>=1.0.1 (from argon2-cffi-bindings->argon2-cffi>=21.1->jupyter-server<3,>=2.4.0->jupyterlab->jupyter)\n",
+      "  Downloading cffi-1.17.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.5 kB)\n",
+      "Collecting pycparser (from cffi>=1.0.1->argon2-cffi-bindings->argon2-cffi>=21.1->jupyter-server<3,>=2.4.0->jupyterlab->jupyter)\n",
+      "  Downloading pycparser-2.22-py3-none-any.whl.metadata (943 bytes)\n",
+      "Collecting arrow>=0.15.0 (from isoduration->jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter)\n",
+      "  Downloading arrow-1.3.0-py3-none-any.whl.metadata (7.5 kB)\n",
+      "Collecting types-python-dateutil>=2.8.10 (from arrow>=0.15.0->isoduration->jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter)\n",
+      "  Downloading types_python_dateutil-2.9.0.20241206-py3-none-any.whl.metadata (2.1 kB)\n",
+      "Downloading jupyter-1.1.1-py2.py3-none-any.whl (2.7 kB)\n",
+      "Downloading ipywidgets-8.1.5-py3-none-any.whl (139 kB)\n",
+      "Downloading jupyter_console-6.6.3-py3-none-any.whl (24 kB)\n",
+      "Downloading jupyterlab-4.3.4-py3-none-any.whl (11.7 MB)\n",
+      "\u001b[2K   \u001b[91m━━━━━━━━━━━━━━━━━\u001b[0m\u001b[90m╺\u001b[0m\u001b[90m━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.0/11.7 MB\u001b[0m \u001b[31m14.9 kB/s\u001b[0m eta \u001b[36m0:07:28\u001b[0m\n",
+      "\u001b[?25h\u001b[31mERROR: Exception:\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/pip/_vendor/urllib3/response.py\", line 438, in _error_catcher\n",
+      "    yield\n",
+      "  File \"/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/pip/_vendor/urllib3/response.py\", line 561, in read\n",
+      "    data = self._fp_read(amt) if not fp_closed else b\"\"\n",
+      "  File \"/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/pip/_vendor/urllib3/response.py\", line 527, in _fp_read\n",
+      "    return self._fp.read(amt) if amt is not None else self._fp.read()\n",
+      "  File \"/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/pip/_vendor/cachecontrol/filewrapper.py\", line 98, in read\n",
+      "    data: bytes = self.__fp.read(amt)\n",
+      "  File \"/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/http/client.py\", line 466, in read\n",
+      "    s = self.fp.read(amt)\n",
+      "  File \"/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/socket.py\", line 705, in readinto\n",
+      "    return self._sock.recv_into(b)\n",
+      "  File \"/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/ssl.py\", line 1307, in recv_into\n",
+      "    return self.read(nbytes, buffer)\n",
+      "  File \"/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/ssl.py\", line 1163, in read\n",
+      "    return self._sslobj.read(len, buffer)\n",
+      "TimeoutError: The read operation timed out\n",
+      "\n",
+      "During handling of the above exception, another exception occurred:\n",
+      "\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/pip/_internal/cli/base_command.py\", line 105, in _run_wrapper\n",
+      "    status = _inner_run()\n",
+      "  File \"/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/pip/_internal/cli/base_command.py\", line 96, in _inner_run\n",
+      "    return self.run(options, args)\n",
+      "  File \"/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/pip/_internal/cli/req_command.py\", line 67, in wrapper\n",
+      "    return func(self, options, args)\n",
+      "  File \"/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/pip/_internal/commands/install.py\", line 379, in run\n",
+      "    requirement_set = resolver.resolve(\n",
+      "  File \"/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/pip/_internal/resolution/resolvelib/resolver.py\", line 179, in resolve\n",
+      "    self.factory.preparer.prepare_linked_requirements_more(reqs)\n",
+      "  File \"/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/pip/_internal/operations/prepare.py\", line 554, in prepare_linked_requirements_more\n",
+      "    self._complete_partial_requirements(\n",
+      "  File \"/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/pip/_internal/operations/prepare.py\", line 469, in _complete_partial_requirements\n",
+      "    for link, (filepath, _) in batch_download:\n",
+      "  File \"/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/pip/_internal/network/download.py\", line 184, in __call__\n",
+      "    for chunk in chunks:\n",
+      "  File \"/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/pip/_internal/cli/progress_bars.py\", line 55, in _rich_progress_bar\n",
+      "    for chunk in iterable:\n",
+      "  File \"/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/pip/_internal/network/utils.py\", line 65, in response_chunks\n",
+      "    for chunk in response.raw.stream(\n",
+      "  File \"/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/pip/_vendor/urllib3/response.py\", line 622, in stream\n",
+      "    data = self.read(amt=amt, decode_content=decode_content)\n",
+      "  File \"/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/pip/_vendor/urllib3/response.py\", line 560, in read\n",
+      "    with self._error_catcher():\n",
+      "  File \"/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/contextlib.py\", line 153, in __exit__\n",
+      "    self.gen.throw(typ, value, traceback)\n",
+      "  File \"/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/pip/_vendor/urllib3/response.py\", line 443, in _error_catcher\n",
+      "    raise ReadTimeoutError(self._pool, None, \"Read timed out.\")\n",
+      "pip._vendor.urllib3.exceptions.ReadTimeoutError: HTTPSConnectionPool(host='files.pythonhosted.org', port=443): Read timed out.\u001b[0m\u001b[31m\n",
+      "\u001b[0m"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "--2024-12-25 08:56:46--  https://docs.vllm.ai/en/stable/getting_started/quickstart.html\n",
+      "Resolving docs.vllm.ai (docs.vllm.ai)... 104.21.88.245, 172.67.154.127, 2606:4700:3030::6815:58f5, ...\n",
+      "Connecting to docs.vllm.ai (docs.vllm.ai)|104.21.88.245|:443... connected.\n",
+      "HTTP request sent, awaiting response... 200 OK\n",
+      "Length: unspecified [text/html]\n",
+      "Saving to: ‘downloaded_page.html’\n",
+      "\n",
+      "     0K .......... .......... .......... .......... .......... 75.9M\n",
+      "    50K                                                         754G=0.001s\n",
+      "\n",
+      "2024-12-25 08:56:47 (76.5 MB/s) - ‘downloaded_page.html’ saved [51605]\n",
+      "\n",
+      "[NbConvertApp] Converting notebook downloaded_page.html to notebook\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/nbformat/reader.py\", line 20, in parse_json\n",
+      "    nb_dict = json.loads(s, **kwargs)\n",
+      "              ^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "  File \"/usr/lib/python3.11/json/__init__.py\", line 346, in loads\n",
+      "    return _default_decoder.decode(s)\n",
+      "           ^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "  File \"/usr/lib/python3.11/json/decoder.py\", line 337, in decode\n",
+      "    obj, end = self.raw_decode(s, idx=_w(s, 0).end())\n",
+      "               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "  File \"/usr/lib/python3.11/json/decoder.py\", line 355, in raw_decode\n",
+      "    raise JSONDecodeError(\"Expecting value\", s, err.value) from None\n",
+      "json.decoder.JSONDecodeError: Expecting value: line 3 column 1 (char 2)\n",
+      "\n",
+      "The above exception was the direct cause of the following exception:\n",
+      "\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/usr/local/bin/jupyter-nbconvert\", line 8, in <module>\n",
+      "    sys.exit(main())\n",
+      "             ^^^^^^\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/jupyter_core/application.py\", line 280, in launch_instance\n",
+      "    super().launch_instance(argv=argv, **kwargs)\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/traitlets/config/application.py\", line 1053, in launch_instance\n",
+      "    app.start()\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/nbconvert/nbconvertapp.py\", line 412, in start\n",
+      "    self.convert_notebooks()\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/nbconvert/nbconvertapp.py\", line 590, in convert_notebooks\n",
+      "    self.convert_single_notebook(notebook_filename)\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/nbconvert/nbconvertapp.py\", line 556, in convert_single_notebook\n",
+      "    output, resources = self.export_single_notebook(\n",
+      "                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/nbconvert/nbconvertapp.py\", line 479, in export_single_notebook\n",
+      "    output, resources = self.exporter.from_filename(\n",
+      "                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/nbconvert/exporters/exporter.py\", line 201, in from_filename\n",
+      "    return self.from_file(f, resources=resources, **kw)\n",
+      "           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/nbconvert/exporters/exporter.py\", line 221, in from_file\n",
+      "    nbformat.read(file_stream, as_version=4), resources=resources, **kw\n",
+      "    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/nbformat/__init__.py\", line 171, in read\n",
+      "    return reads(buf, as_version, capture_validation_error, **kwargs)\n",
+      "           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/nbformat/__init__.py\", line 89, in reads\n",
+      "    nb = reader.reads(s, **kwargs)\n",
+      "         ^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/nbformat/reader.py\", line 76, in reads\n",
+      "    nb_dict = parse_json(s, **kwargs)\n",
+      "              ^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "  File \"/usr/local/lib/python3.11/dist-packages/nbformat/reader.py\", line 26, in parse_json\n",
+      "    raise NotJSONError(message) from e\n",
+      "nbformat.reader.NotJSONError: Notebook does not appear to be JSON: '\\n\\n<!DOCTYPE html>\\n\\n\\n<html lang=\"en...\n"
+     ]
+    },
+    {
+     "ename": "CalledProcessError",
+     "evalue": "Command '['jupyter', 'nbconvert', '--to', 'notebook', '--output', 'quickstart_notebook.ipynb', 'downloaded_page.html']' returned non-zero exit status 1.",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mCalledProcessError\u001b[0m                        Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[5], line 16\u001b[0m\n\u001b[1;32m     14\u001b[0m url \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhttps://docs.vllm.ai/en/stable/getting_started/quickstart.html\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m     15\u001b[0m output_notebook \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquickstart_notebook.ipynb\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m---> 16\u001b[0m \u001b[43mdownload_html_and_convert\u001b[49m\u001b[43m(\u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moutput_notebook\u001b[49m\u001b[43m)\u001b[49m\n",
+      "Cell \u001b[0;32mIn[5], line 10\u001b[0m, in \u001b[0;36mdownload_html_and_convert\u001b[0;34m(url, output_notebook)\u001b[0m\n\u001b[1;32m      7\u001b[0m subprocess\u001b[38;5;241m.\u001b[39mrun([\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mwget\u001b[39m\u001b[38;5;124m'\u001b[39m, url, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m-O\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdownloaded_page.html\u001b[39m\u001b[38;5;124m'\u001b[39m], check\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m      9\u001b[0m \u001b[38;5;66;03m# Convert HTML to notebook\u001b[39;00m\n\u001b[0;32m---> 10\u001b[0m \u001b[43msubprocess\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mjupyter\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mnbconvert\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m--to\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mnotebook\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m--output\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moutput_notebook\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mdownloaded_page.html\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcheck\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/subprocess.py:526\u001b[0m, in \u001b[0;36mrun\u001b[0;34m(input, capture_output, timeout, check, *popenargs, **kwargs)\u001b[0m\n\u001b[1;32m    524\u001b[0m     retcode \u001b[38;5;241m=\u001b[39m process\u001b[38;5;241m.\u001b[39mpoll()\n\u001b[1;32m    525\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m check \u001b[38;5;129;01mand\u001b[39;00m retcode:\n\u001b[0;32m--> 526\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m CalledProcessError(retcode, process\u001b[38;5;241m.\u001b[39margs,\n\u001b[1;32m    527\u001b[0m                                  output\u001b[38;5;241m=\u001b[39mstdout, stderr\u001b[38;5;241m=\u001b[39mstderr)\n\u001b[1;32m    528\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m CompletedProcess(process\u001b[38;5;241m.\u001b[39margs, retcode, stdout, stderr)\n",
+      "\u001b[0;31mCalledProcessError\u001b[0m: Command '['jupyter', 'nbconvert', '--to', 'notebook', '--output', 'quickstart_notebook.ipynb', 'downloaded_page.html']' returned non-zero exit status 1."
+     ]
+    }
+   ],
+   "source": [
+    "# https://docs.vllm.ai/en/stable/getting_started/quickstart.rst\n",
+    "# # !pip install jupyter\n",
+    "# import subprocess\n",
+    "\n",
+    "# def download_html_and_convert(url, output_notebook):\n",
+    "#     # !pip install jupyter\n",
+    "#     subprocess.run(['pip', 'install', 'jupyter'], check=True)\n",
+    "\n",
+    "#     # Download HTML\n",
+    "#     subprocess.run(['wget', url, '-O', 'downloaded_page.html'], check=True)\n",
+    "    \n",
+    "#     # Convert HTML to notebook\n",
+    "#     subprocess.run(['jupyter', 'nbconvert', '--to', 'notebook', '--output', output_notebook, 'downloaded_page.html'], check=True)\n",
+    "\n",
+    "# # Example usage\n",
+    "# if __name__ == \"__main__\":\n",
+    "#     url = \"https://docs.vllm.ai/en/stable/getting_started/quickstart.html\"\n",
+    "#     output_notebook = \"quickstart_notebook.ipynb\"\n",
+    "#     download_html_and_convert(url, output_notebook)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "prompts = [\n",
+    "    \"Hello, my name is\",\n",
+    "    \"The president of the United States is\",\n",
+    "    \"The capital of France is\",\n",
+    "    \"The future of AI is\",\n",
+    "]\n",
+    "sampling_params = SamplingParams(temperature=0.8, top_p=0.95)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "INFO 12-25 08:56:54 __init__.py:46] No plugins found.\n",
+      "INFO 12-25 08:57:09 config.py:403] This model supports multiple tasks: {'embedding', 'generate'}. Defaulting to 'generate'.\n",
+      "INFO 12-25 08:57:09 llm_engine.py:249] Initializing an LLM engine (v0.6.4.post2.dev227+gd2bd88b1) with config: model='facebook/opt-125m', speculative_config=None, tokenizer='facebook/opt-125m', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=2048, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='xgrammar'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=0, served_model_name=facebook/opt-125m, num_scheduler_steps=1, chunked_prefill_enabled=False multi_step_stream_outputs=True, enable_prefix_caching=False, use_async_output_proc=True, use_cached_outputs=False, mm_processor_kwargs=None, pooler_config=None,compilation_config=CompilationConfig(level=0, backend='', custom_ops=[], splitting_ops=['vllm.unified_attention', 'vllm.unified_attention_with_output'], use_inductor=True, inductor_specialize_for_cudagraph_no_more_than=None, inductor_compile_sizes=[], inductor_compile_config={}, inductor_passes={}, use_cudagraph=False, cudagraph_num_of_warmups=0, cudagraph_capture_sizes=None, cudagraph_copy_inputs=False, pass_config=PassConfig(dump_graph_stages=[], dump_graph_dir=PosixPath('.'), enable_fusion=True, enable_reshape=True), compile_sizes=[], capture_sizes=[256, 248, 240, 232, 224, 216, 208, 200, 192, 184, 176, 168, 160, 152, 144, 136, 128, 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 4, 2, 1], enabled_custom_ops=Counter(), disabled_custom_ops=Counter(), static_forward_context={})\n",
+      "INFO 12-25 08:57:17 selector.py:120] Using Flash Attention backend.\n",
+      "INFO 12-25 08:57:18 model_runner.py:1089] Starting to load model facebook/opt-125m...\n",
+      "INFO 12-25 08:57:18 weight_utils.py:243] Using model weights format ['*.bin']\n"
+     ]
+    }
+   ],
+   "source": [
+    "llm = LLM(model=\"facebook/opt-125m\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "vllm",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.14"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

mlruns/0/meta.yaml ADDED Viewed

	@@ -0,0 +1,6 @@

+artifact_location: mlflow-artifacts:/0
+creation_time: 1735239171092
+experiment_id: '0'
+last_update_time: 1735239171092
+lifecycle_stage: active
+name: Default

recognize-anything/.ipynb_checkpoints/README-checkpoint.md ADDED Viewed

	@@ -0,0 +1,601 @@

+# <font size=8> :label: Recognize Anything Model </font>
+This project aims to develop a series of open-source and strong fundamental image recognition models.
+[![Training Dataset](https://img.shields.io/badge/📦-Training%20Dataset-orange.svg)](#open_book-training-datasets)
+[![Tag List](https://img.shields.io/badge/🏷️-4585%20Tags-green.svg)](ram/data/ram_tag_list.txt)
+[![Web Demo](https://img.shields.io/badge/🤗-HuggingFace%20Space-cyan.svg)](https://huggingface.co/spaces/xinyu1205/Recognize_Anything-Tag2Text)
+[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mhd-medfa/recognize-anything/blob/main/recognize_anything_demo.ipynb)
+[![Open in Bohrium](https://cdn.dp.tech/bohrium/web/static/images/open-in-bohrium.svg)](https://bohrium.dp.tech/notebooks/63116114759)
+- **Recognize Anything Plus Model (RAM++)** [[Paper](https://arxiv.org/abs/2310.15200)] <br>
+  RAM++ is the next generation of RAM, which can **recognize any category with high accuracy**, including **both predefined common categories and diverse open-set categories**.
+- **Recognize Anything Model (RAM)** [[Paper](https://arxiv.org/abs/2306.03514)][[Demo](https://huggingface.co/spaces/xinyu1205/recognize-anything)] <br>
+  RAM is an image tagging model, which can **recognize any common category with high accuracy**.
+  RAM is accepted at **CVPR 2024 Multimodal Foundation Models Workshop**.
+- **Tag2Text (ICLR 2024)**  [[Paper](https://arxiv.org/abs/2303.05657)] [[Demo](https://huggingface.co/spaces/xinyu1205/recognize-anything)]<br>
+  Tag2Text is a vision-language model guided by tagging, which can **support tagging and comprehensive captioning simultaneously**.
+  Tag2Text is accepted at **ICLR 2024!** See you in Vienna!
+## :bulb: Highlight
+### **Superior Image Recognition Capability**
+RAM++ outperforms existing SOTA image fundamental recognition models on common tag categories, uncommon tag categories, and  human-object interaction phrases.
+<p align="center">
+ <table class="tg">
+  <tr>
+    <td class="tg-c3ow"><img src="images/ram_plus_compare.jpg" align="center" width="700" ></td>
+  </tr>
+</table>
+   <p align="center">Comparison of zero-shot image recognition performance.</p>
+</p>
+### **Strong Visual Semantic Analysis**
+We have combined Tag2Text and RAM with localization models (Grounding-DINO and SAM) and developed a strong visual semantic analysis pipeline in the [Grounded-SAM](https://github.com/IDEA-Research/Grounded-Segment-Anything) project.
+![](./images/ram_grounded_sam.jpg)
+## :sunrise: Model Zoo
+<details>
+<summary><font size="3" style="font-weight:bold;">
+RAM++
+</font></summary>
+RAM++ is the next generation of RAM, which can recognize any category with high accuracy, including both predefined common categories and diverse open-set categories.
+- **For Common Predefined Categoies.** RAM++ exhibits exceptional image tagging capabilities with powerful zero-shot generalization, which maintains the same capabilities as RAM.
+    <!-- - RAM++ showcases impressive zero-shot performance, significantly outperforming CLIP and BLIP.
+    - RAM++ even surpasses the fully supervised manners (ML-Decoder).
+    - RAM++ exhibits competitive performance with the Google tagging API. -->
+- **For Diverse Open-set Categoires.** RAM++ achieves notably enhancements beyond CLIP and RAM.
+    <!-- - RAM++ integrate the image-tags-text triplets within a unified alignment framework.
+    - RAM++ pioneer the intergation of LLM's knowledge into image tagging training. -->
+<p align="center">
+ <table class="tg">
+  <tr>
+    <td class="tg-c3ow"><img src="images/ram_plus_experiment.png" align="center" width="800" ></td>
+  </tr>
+</table>
+  <p align="center">(Green color means fully supervised learning and others means zero-shot performance.)</p>
+</p>
+<p align="center">
+ <table class="tg">
+  <tr>
+    <td class="tg-c3ow"><img src="images/ram_plus_visualization.jpg" align="center" width="800" ></td>
+  </tr>
+</table>
+  <p align="center">RAM++ demonstrate a significant improvement in open-set category recognition.</p>
+</p>
+</details>
+<details>
+<summary><font size="3" style="font-weight:bold;">
+RAM
+</font></summary>
+RAM is a strong image tagging model, which can recognize any common category with high accuracy.
+- **Strong and general.** RAM exhibits exceptional image tagging capabilities with powerful zero-shot generalization;
+    - RAM showcases impressive zero-shot performance, significantly outperforming CLIP and BLIP.
+    - RAM even surpasses the fully supervised manners (ML-Decoder).
+    - RAM exhibits competitive performance with the Google tagging API.
+- **Reproducible and affordable.** RAM requires Low reproduction cost with open-source and annotation-free dataset;
+- **Flexible and versatile.** RAM offers remarkable flexibility, catering to various application scenarios.
+<p align="center">
+ <table class="tg">
+  <tr>
+    <td class="tg-c3ow"><img src="images/experiment_comparison.png" align="center" width="800" ></td>
+  </tr>
+</table>
+  <p align="center">(Green color means fully supervised learning and Blue color means zero-shot performance.)</p>
+</p>
+<p align="center">
+ <table class="tg">
+  <tr>
+    <td class="tg-c3ow"><img src="images/tagging_results.jpg" align="center" width="800" ></td>
+  </tr>
+</table>
+</p>
+RAM significantly improves the tagging ability based on the Tag2text framework.
+- **Accuracy.** RAM utilizes a **data engine** to **generate** additional annotations and **clean** incorrect ones,  **higher accuracy** compared to Tag2Text.
+- **Scope.** RAM upgrades the number of fixed tags from  3,400+ to **[6,400+](./ram/data/ram_tag_list.txt)** (synonymous reduction to 4,500+ different semantic tags), covering **more valuable categories**.
+  Moreover, RAM is equipped with **open-set capability**, feasible to recognize tags not seen during training
+</details>
+<details>
+<summary><font size="3" style="font-weight:bold;">
+Tag2text
+</font></summary>
+Tag2Text is an efficient and controllable vision-language model with tagging guidance.
+- **Tagging.** Tag2Text recognizes **[3,400+](./ram/data/tag2text_ori_tag_list.txt)** commonly human-used categories without manual annotations.
+- **Captioning.** Tag2Text integrates **tags information** into text generation as the **guiding elements**, resulting in **more controllable and comprehensive descriptions**.
+- **Retrieval.** Tag2Text provides **tags** as **additional visible alignment indicators** for image-text retrieval.
+<p align="center">
+ <table class="tg">
+  <tr>
+    <td class="tg-c3ow"><img src="images/tag2text_visualization.png" align="center" width="800" ></td>
+  </tr>
+</table>
+    <p align="center">Tag2Text generate more comprehensive captions with tagging guidance.</p>
+</p>
+<p align="center">
+ <table class="tg">
+  <tr>
+    <td class="tg-c3ow"><img src="images/tag2text_retrieval_visualization.png" align="center" width="800" ></td>
+  </tr>
+</table>
+    <p align="center">Tag2Text provides tags as additional visible alignment indicators.</p>
+</p>
+</details>
+<!-- ## :sparkles: Highlight Projects with other Models
+- [Tag2Text/RAM with Grounded-SAM](https://github.com/IDEA-Research/Grounded-Segment-Anything) is trong and general pipeline for visual semantic analysis, which can automatically **recognize**, detect, and segment for an image!
+- [Ask-Anything](https://github.com/OpenGVLab/Ask-Anything) is a multifunctional video question answering tool. Tag2Text provides powerful tagging and captioning capabilities as a fundamental component.
+- [Prompt-can-anything](https://github.com/positive666/Prompt-Can-Anything) is a gradio web library that integrates SOTA multimodal large models, including Tag2text as the core model for graphic understanding -->
+<!--
+## :fire: News
+- **`2023/10/30`**: We release the [Recognize Anything Model Plus Model(RAM++)](), checkpoints and inference code!
+- **`2023/06/08`**: We release the [Recognize Anything Model (RAM) Tag2Text web demo 🤗](https://huggingface.co/spaces/xinyu1205/Recognize_Anything-Tag2Text), checkpoints and inference code!
+- **`2023/06/07`**: We release the [Recognize Anything Model (RAM)](https://recognize-anything.github.io/), a strong image tagging model!
+- **`2023/06/05`**: Tag2Text is combined with [Prompt-can-anything](https://github.com/OpenGVLab/Ask-Anything).
+- **`2023/05/20`**: Tag2Text is combined with [VideoChat](https://github.com/OpenGVLab/Ask-Anything).
+- **`2023/04/20`**: We marry Tag2Text with with [Grounded-SAM](https://github.com/IDEA-Research/Grounded-Segment-Anything).
+- **`2023/04/10`**: Code and checkpoint is available Now!
+- **`2023/03/14`**: [Tag2Text web demo 🤗](https://huggingface.co/spaces/xinyu1205/Recognize_Anything-Tag2Text) is available on Hugging Face Space!   -->
+<!--
+## :writing_hand: TODO
+- [x] Release checkpoints.
+- [x] Release inference code.
+- [x] Release demo and checkpoints.
+- [x] Release training codes.
+- [x] Release training datasets.
+- [ ] Release full training codes and scripts. -->
+## :open_book: Training Datasets
+### **Image Texts and Tags**
+These annotation files come from the [Tag2Text](https://arxiv.org/abs/2303.05657) and [RAM](https://recognize-anything.github.io/). Tag2Text automatically extracts image tags from image-text pairs. RAM further augments both tags and texts via an automatic data engine.
+| DataSet  | Size    | Images | Texts | Tags  |
+|----------|---------|--------|-------|-------|
+| [COCO](https://huggingface.co/datasets/xinyu1205/recognize-anything-dataset/blob/main/coco_train_rmcocodev_ram.json)     | 168 MB  | 113K   | 680K  | 3.2M  |
+| [VG](https://huggingface.co/datasets/xinyu1205/recognize-anything-dataset/blob/main/vg_ram.json)       | 55 MB   | 100K   | 923K  | 2.7M  |
+| [SBU](https://huggingface.co/datasets/xinyu1205/recognize-anything-dataset/blob/main/sbu_ram.json)      | 234 MB  | 849K   | 1.7M  | 7.6M  |
+| [CC3M](https://huggingface.co/datasets/xinyu1205/recognize-anything-dataset/blob/main/cc3m_train_ram.json)     | 766 MB  | 2.8M   | 5.6M  | 28.2M |
+| [CC3M-val](https://huggingface.co/datasets/xinyu1205/recognize-anything-dataset/blob/main/cc3m_val_ram.json) | 3.5 MB  | 12K    | 26K   | 132K  |
+CC12M to be released in the next update.
+### **LLM Tag Descriptions**
+These tag descriptions files come from the [RAM++](https://arxiv.org/abs/2310.15200) by calling GPT api. You can also customize any tag categories by [generate_tag_des_llm.py](generate_tag_des_llm.py).
+| Tag Descriptions    | Tag List |
+|---------------------|----------|
+| [RAM Tag List](https://huggingface.co/datasets/xinyu1205/recognize-anything-plus-model-tag-descriptions/blob/main/ram_tag_list_4585_llm_tag_descriptions.json)        | [4,585](ram/data/ram_tag_list.txt)    |
+| [OpenImages Uncommon](./datasets/openimages_rare_200/openimages_rare_200_llm_tag_descriptions.json) | [200](datasets/openimages_rare_200/openimages_rare_200_ram_taglist.txt)      |
+## :toolbox: Checkpoints
+Note : you need to create 'pretrained' folder and download these checkpoints into this folder.
+<!-- insert a table -->
+<table>
+  <thead>
+    <tr style="text-align: right;">
+      <th></th>
+      <th>Name</th>
+      <th>Backbone</th>
+      <th>Data</th>
+      <th>Illustration</th>
+      <th>Checkpoint</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <th>1</th>
+      <td>RAM++ (14M)</td>
+      <td>Swin-Large</td>
+      <td>COCO, VG, SBU, CC3M, CC3M-val, CC12M</td>
+      <td>Provide strong image tagging ability for any category.</td>
+      <td><a href="https://huggingface.co/xinyu1205/recognize-anything-plus-model/blob/main/ram_plus_swin_large_14m.pth">Download  link</a></td>
+    </tr>
+    <tr>
+      <th>2</th>
+      <td>RAM (14M)</td>
+      <td>Swin-Large</td>
+      <td>COCO, VG, SBU, CC3M, CC3M-val, CC12M</td>
+      <td>Provide strong image tagging ability for common category.</td>
+      <td><a href="https://huggingface.co/spaces/xinyu1205/Recognize_Anything-Tag2Text/blob/main/ram_swin_large_14m.pth">Download  link</a></td>
+    </tr>
+    <tr>
+      <th>3</th>
+      <td>Tag2Text (14M)</td>
+      <td>Swin-Base</td>
+      <td>COCO, VG, SBU, CC3M, CC3M-val, CC12M</td>
+      <td>Support comprehensive captioning and tagging.</td>
+      <td><a href="https://huggingface.co/spaces/xinyu1205/Recognize_Anything-Tag2Text/blob/main/tag2text_swin_14m.pth">Download  link</a></td>
+    </tr>
+  </tbody>
+</table>
+## :running: Model Inference
+### **Setting Up** ###
+1. Create and activate a Conda environment:
+```bash
+conda create -n recognize-anything python=3.8 -y
+conda activate recognize-anything
+```
+2. Install `recognize-anything` as a package:
+```bash
+pip install git+https://github.com/xinyu1205/recognize-anything.git
+```
+3. Or, for development, you may build from source:
+```bash
+git clone https://github.com/xinyu1205/recognize-anything.git
+cd recognize-anything
+pip install -e .
+```
+Then the RAM++, RAM, and Tag2Text models can be imported in other projects:
+```python
+from ram.models import ram_plus, ram, tag2text
+```
+### **RAM++ Inference** ###
+Get the English and Chinese outputs of the images:
+```bash
+python inference_ram_plus.py --image images/demo/demo1.jpg --pretrained pretrained/ram_plus_swin_large_14m.pth
+```
+The output will look like the following:
+```
+Image Tags:  armchair | blanket | lamp | carpet | couch | dog | gray | green | hassock | home | lay | living room | picture frame | pillow | plant | room | wall lamp | sit | wood floor
+图像标签:  扶手椅  | 毯子/覆盖层 | 灯  | 地毯  | 沙发 | 狗 | 灰色 | 绿色  | 坐垫/搁脚凳/草丛 | 家/住宅 | 躺  | 客厅  | 相框  | 枕头  | 植物  | 房间  | 壁灯  | 坐/放置/坐落 | 木地板
+```
+### **RAM++ Inference on Unseen Categories (Open-Set)** ##
+1. Get the [OpenImages-Uncommon categories](./datasets/openimages_rare_200/openimages_rare_200_ram_taglist.txt) of the image:
+We have released the LLM tag descriptions of OpenImages-Uncommon categories in [openimages_rare_200_llm_tag_descriptions](./datasets/openimages_rare_200/).
+<pre/>
+python inference_ram_plus_openset.py  --image images/openset_example.jpg \
+--pretrained pretrained/ram_plus_swin_large_14m.pth \
+--llm_tag_des datasets/openimages_rare_200/openimages_rare_200_llm_tag_descriptions.json
+</pre>
+The output will look like the following:
+```
+Image Tags: Close-up | Compact car | Go-kart | Horse racing | Sport utility vehicle | Touring car
+```
+2. You can also customize any tag categories for recognition through tag descriptions:
+Modify [categories](./generate_tag_des_llm.py#L56), and call GPT api to generate corresponding tag descriptions:
+<pre/>
+python generate_tag_des_llm.py \
+--openai_api_key 'your openai api key' \
+--output_file_path datasets/openimages_rare_200/openimages_rare_200_llm_tag_descriptions.json
+</pre>
+<details>
+<summary><font size="4" style="font-weight:bold;">
+RAM Inference
+</font></summary>
+Get the English and Chinese outputs of the images:
+<pre/>
+python inference_ram.py  --image images/demo/demo1.jpg \
+--pretrained pretrained/ram_swin_large_14m.pth
+</pre>
+The output will look like the following:
+```
+Image Tags:  armchair | blanket | lamp | carpet | couch | dog | floor | furniture | gray | green | living room | picture frame | pillow | plant | room | sit | stool | wood floor
+图像标签:  扶手椅  | 毯子/覆盖层 | 灯  | 地毯  | 沙发 | 狗 | 地板/地面 | 家具  | 灰色 | 绿色  | 客厅  | 相框  | 枕头  | 植物  | 房间  | 坐/放置/坐落 | 凳子  | 木地板
+```
+</details>
+<details>
+<summary><font size="4" style="font-weight:bold;">
+RAM Inference on Unseen Categories (Open-Set)
+</font></summary>
+Firstly, custom recognition categories in [build_openset_label_embedding](./ram/utils/openset_utils.py), then get the tags of the images:
+<pre/>
+python inference_ram_openset.py  --image images/openset_example.jpg \
+--pretrained pretrained/ram_swin_large_14m.pth
+</pre>
+The output will look like the following:
+```
+Image Tags: Black-and-white | Go-kart
+```
+</details>
+<details>
+<summary><font size="4" style="font-weight:bold;">
+Tag2Text Inference
+</font></summary>
+Get the tagging and captioning results:
+<pre/>
+python inference_tag2text.py  --image images/demo/demo1.jpg \
+--pretrained pretrained/tag2text_swin_14m.pth
+</pre>
+Or get the tagging and sepcifed captioning results (optional):
+<pre/>python inference_tag2text.py  --image images/demo/demo1.jpg \
+--pretrained pretrained/tag2text_swin_14m.pth \
+--specified-tags "cloud,sky"</pre>
+</details>
+### **Batch Inference and Evaluation** ##
+We release two datasets `OpenImages-common` (214 common tag classes) and `OpenImages-rare` (200 uncommon tag classes). Copy or sym-link test images of [OpenImages v6](https://storage.googleapis.com/openimages/web/download_v6.html) to `datasets/openimages_common_214/imgs/` and `datasets/openimages_rare_200/imgs`.
+To evaluate RAM++ on `OpenImages-common`:
+```bash
+python batch_inference.py \
+  --model-type ram_plus \
+  --checkpoint pretrained/ram_plus_swin_large_14m.pth \
+  --dataset openimages_common_214 \
+  --output-dir outputs/ram_plus
+```
+To evaluate RAM++ open-set capability on `OpenImages-rare`:
+```bash
+python batch_inference.py \
+  --model-type ram_plus \
+  -- pretrained/ram_plus_swin_large_14m.pth \
+  --open-set \
+  --dataset openimages_rare_200 \
+  --output-dir outputs/ram_plus_openset
+```
+To evaluate RAM on `OpenImages-common`:
+```bash
+python batch_inference.py \
+  --model-type ram \
+  -- pretrained/ram_swin_large_14m.pth \
+  --dataset openimages_common_214 \
+  --output-dir outputs/ram
+```
+To evaluate RAM open-set capability on `OpenImages-rare`:
+```bash
+python batch_inference.py \
+  --model-type ram \
+  -- pretrained/ram_swin_large_14m.pth \
+  --open-set \
+  --dataset openimages_rare_200 \
+  --output-dir outputs/ram_openset
+```
+To evaluate Tag2Text on `OpenImages-common`:
+```bash
+python batch_inference.py \
+  --model-type tag2text \
+  -- pretrained/tag2text_swin_14m.pth \
+  --dataset openimages_common_214 \
+  --output-dir outputs/tag2text
+```
+Please refer to `batch_inference.py` for more options. To get P/R in table 3 of RAM paper, pass `--threshold=0.86` for RAM and `--threshold=0.68` for Tag2Text.
+To batch inference custom images, you can set up you own datasets following the given two datasets.
+## :golfing: Model Training/Finetuning
+### **RAM++** ##
+1. Download [RAM training datasets](#open_book-training-datasets) where each json file contains a list. Each item in the list is a dictonary with three key-value pairs: {'image_path': path_of_image, 'caption': text_of_image, 'union_label_id': image tags for tagging which including parsed tags and pseudo tags }.
+2. In ram/configs/pretrain.yaml, set 'train_file' as the paths for the json files.
+3. Prepare [pretained Swin-Transformer](https://github.com/microsoft/Swin-Transformer), and set 'ckpt' in ram/configs/swin.
+4. Download RAM++ frozen tag embedding file "[ram_plus_tag_embedding_class_4585_des_51.pth](https://huggingface.co/xinyu1205/recognize-anything-plus-model/blob/main/ram_plus_tag_embedding_class_4585_des_51.pth)", and set file in "ram/data/frozen_tag_embedding/ram_plus_tag_embedding_class_4585_des_51.pth"
+5. Pre-train the model using 8 A100 GPUs:
+```bash
+python -m torch.distributed.run --nproc_per_node=8 pretrain.py \
+  --model-type ram_plus \
+  --config ram/configs/pretrain.yaml  \
+  --output-dir outputs/ram_plus
+```
+6. Fine-tune the pre-trained checkpoint using 8 A100 GPUs:
+```bash
+python -m torch.distributed.run --nproc_per_node=8 finetune.py \
+  --model-type ram_plus \
+  --config ram/configs/finetune.yaml  \
+  --checkpoint outputs/ram_plus/checkpoint_04.pth \
+  --output-dir outputs/ram_plus_ft
+```
+<details>
+<summary><font size="4" style="font-weight:bold;">
+RAM
+</font></summary>
+1. Download [RAM training datasets](#open_book-training-datasets) where each json file contains a list. Each item in the list is a dictonary with four key-value pairs: {'image_path': path_of_image, 'caption': text_of_image, 'union_label_id': image tags for tagging which including parsed tags and pseudo tags, 'parse_label_id': image tags parsed from caption }.
+2. In ram/configs/pretrain.yaml, set 'train_file' as the paths for the json files.
+3. Prepare [pretained Swin-Transformer](https://github.com/microsoft/Swin-Transformer), and set 'ckpt' in ram/configs/swin.
+4. Download RAM frozen tag embedding file "[ram_tag_embedding_class_4585.pth](https://huggingface.co/xinyu1205/recognize_anything_model/blob/main/ram_tag_embedding_class_4585.pth)", and set file in "ram/data/frozen_tag_embedding/ram_tag_embedding_class_4585.pth"
+5. Pre-train the model using 8 A100 GPUs:
+```bash
+python -m torch.distributed.run --nproc_per_node=8 pretrain.py \
+  --model-type ram \
+  --config ram/configs/pretrain.yaml  \
+  --output-dir outputs/ram
+```
+6. Fine-tune the pre-trained checkpoint using 8 A100 GPUs:
+```bash
+python -m torch.distributed.run --nproc_per_node=8 finetune.py \
+  --model-type ram \
+  --config ram/configs/finetune.yaml  \
+  --checkpoint outputs/ram/checkpoint_04.pth \
+  --output-dir outputs/ram_ft
+```
+</details>
+<details>
+<summary><font size="4" style="font-weight:bold;">
+Tag2Text
+</font></summary>
+1. Download [RAM training datasets](#open_book-training-datasets) where each json file contains a list. Each item in the list is a dictonary with three key-value pairs: {'image_path': path_of_image, 'caption': text_of_image, 'parse_label_id': image tags parsed from caption }.
+2. In ram/configs/pretrain_tag2text.yaml, set 'train_file' as the paths for the json files.
+3. Prepare [pretained Swin-Transformer](https://github.com/microsoft/Swin-Transformer), and set 'ckpt' in ram/configs/swin.
+4. Pre-train the model using 8 A100 GPUs:
+```bash
+python -m torch.distributed.run --nproc_per_node=8 pretrain.py \
+  --model-type tag2text \
+  --config ram/configs/pretrain_tag2text.yaml  \
+  --output-dir outputs/tag2text
+```
+5. Fine-tune the pre-trained checkpoint using 8 A100 GPUs:
+```bash
+python -m torch.distributed.run --nproc_per_node=8 finetune.py \
+  --model-type tag2text \
+  --config ram/configs/finetune_tag2text.yaml  \
+  --checkpoint outputs/tag2text/checkpoint_04.pth \
+  --output-dir outputs/tag2text_ft
+```
+</details>
+## :black_nib: Citation
+If you find our work to be useful for your research, please consider citing.
+```
+@article{huang2023open,
+  title={Open-Set Image Tagging with Multi-Grained Text Supervision},
+  author={Huang, Xinyu and Huang, Yi-Jie and Zhang, Youcai and Tian, Weiwei and Feng, Rui and Zhang, Yuejie and Xie, Yanchun and Li, Yaqian and Zhang, Lei},
+  journal={arXiv e-prints},
+  pages={arXiv--2310},
+  year={2023}
+}
+@article{zhang2023recognize,
+  title={Recognize Anything: A Strong Image Tagging Model},
+  author={Zhang, Youcai and Huang, Xinyu and Ma, Jinyu and Li, Zhaoyang and Luo, Zhaochuan and Xie, Yanchun and Qin, Yuzhuo and Luo, Tong and Li, Yaqian and Liu, Shilong and others},
+  journal={arXiv preprint arXiv:2306.03514},
+  year={2023}
+}
+@article{huang2023tag2text,
+  title={Tag2Text: Guiding Vision-Language Model via Image Tagging},
+  author={Huang, Xinyu and Zhang, Youcai and Ma, Jinyu and Tian, Weiwei and Feng, Rui and Zhang, Yuejie and Li, Yaqian and Guo, Yandong and Zhang, Lei},
+  journal={arXiv preprint arXiv:2303.05657},
+  year={2023}
+}
+```
+## :hearts: Acknowledgements
+This work is done with the help of the amazing code base of [BLIP](https://github.com/salesforce/BLIP), thanks very much!
+We want to thank @Cheng Rui @Shilong Liu @Ren Tianhe for their help in [marrying RAM/Tag2Text with Grounded-SAM](https://github.com/IDEA-Research/Grounded-Segment-Anything).
+We also want to thank [Ask-Anything](https://github.com/OpenGVLab/Ask-Anything), [Prompt-can-anything](https://github.com/positive666/Prompt-Can-Anything) for  combining RAM/Tag2Text, which greatly expands the application boundaries of RAM/Tag2Text.

recognize-anything/.ipynb_checkpoints/recognize_anything_demo-checkpoint.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

recognize-anything/datasets/hico/hico_600_annots.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

recognize-anything/datasets/hico/hico_600_taglist.txt ADDED Viewed

	@@ -0,0 +1,600 @@

+person board airplane
+person direct airplane
+person exit airplane
+person fly airplane
+person inspect airplane
+person load airplane
+person ride airplane
+person sit_on airplane
+person wash airplane
+person no_interaction airplane
+person carry bicycle
+person hold bicycle
+person inspect bicycle
+person jump bicycle
+person hop_on bicycle
+person park bicycle
+person push bicycle
+person repair bicycle
+person ride bicycle
+person sit_on bicycle
+person straddle bicycle
+person walk bicycle
+person wash bicycle
+person no_interaction bicycle
+person chase bird
+person feed bird
+person hold bird
+person pet bird
+person release bird
+person watch bird
+person no_interaction bird
+person board boat
+person drive boat
+person exit boat
+person inspect boat
+person jump boat
+person launch boat
+person repair boat
+person ride boat
+person row boat
+person sail boat
+person sit_on boat
+person stand_on boat
+person tie boat
+person wash boat
+person no_interaction boat
+person carry bottle
+person drink_with bottle
+person hold bottle
+person inspect bottle
+person lick bottle
+person open bottle
+person pour bottle
+person no_interaction bottle
+person board bus
+person direct bus
+person drive bus
+person exit bus
+person inspect bus
+person load bus
+person ride bus
+person sit_on bus
+person wash bus
+person wave bus
+person no_interaction bus
+person board car
+person direct car
+person drive car
+person hose car
+person inspect car
+person jump car
+person load car
+person park car
+person ride car
+person wash car
+person no_interaction car
+person dry cat
+person feed cat
+person hold cat
+person hug cat
+person kiss cat
+person pet cat
+person scratch cat
+person wash cat
+person chase cat
+person no_interaction cat
+person carry chair
+person hold chair
+person lie_on chair
+person sit_on chair
+person stand_on chair
+person no_interaction chair
+person carry couch
+person lie_on couch
+person sit_on couch
+person no_interaction couch
+person feed cow
+person herd cow
+person hold cow
+person hug cow
+person kiss cow
+person lasso cow
+person milk cow
+person pet cow
+person ride cow
+person walk cow
+person no_interaction cow
+person clean dining_table
+person eat_at dining_table
+person sit_at dining_table
+person no_interaction dining_table
+person carry dog
+person dry dog
+person feed dog
+person groom dog
+person hold dog
+person hose dog
+person hug dog
+person inspect dog
+person kiss dog
+person pet dog
+person run dog
+person scratch dog
+person straddle dog
+person train dog
+person walk dog
+person wash dog
+person chase dog
+person no_interaction dog
+person feed horse
+person groom horse
+person hold horse
+person hug horse
+person jump horse
+person kiss horse
+person load horse
+person hop_on horse
+person pet horse
+person race horse
+person ride horse
+person run horse
+person straddle horse
+person train horse
+person walk horse
+person wash horse
+person no_interaction horse
+person hold motorcycle
+person inspect motorcycle
+person jump motorcycle
+person hop_on motorcycle
+person park motorcycle
+person push motorcycle
+person race motorcycle
+person ride motorcycle
+person sit_on motorcycle
+person straddle motorcycle
+person turn motorcycle
+person walk motorcycle
+person wash motorcycle
+person no_interaction motorcycle
+person carry person
+person greet person
+person hold person
+person hug person
+person kiss person
+person stab person
+person tag person
+person teach person
+person lick person
+person no_interaction person
+person carry potted_plant
+person hold potted_plant
+person hose potted_plant
+person no_interaction potted_plant
+person carry sheep
+person feed sheep
+person herd sheep
+person hold sheep
+person hug sheep
+person kiss sheep
+person pet sheep
+person ride sheep
+person shear sheep
+person walk sheep
+person wash sheep
+person no_interaction sheep
+person board train
+person drive train
+person exit train
+person load train
+person ride train
+person sit_on train
+person wash train
+person no_interaction train
+person control tv
+person repair tv
+person watch tv
+person no_interaction tv
+person buy apple
+person cut apple
+person eat apple
+person hold apple
+person inspect apple
+person peel apple
+person pick apple
+person smell apple
+person wash apple
+person no_interaction apple
+person carry backpack
+person hold backpack
+person inspect backpack
+person open backpack
+person wear backpack
+person no_interaction backpack
+person buy banana
+person carry banana
+person cut banana
+person eat banana
+person hold banana
+person inspect banana
+person peel banana
+person pick banana
+person smell banana
+person no_interaction banana
+person break baseball_bat
+person carry baseball_bat
+person hold baseball_bat
+person sign baseball_bat
+person swing baseball_bat
+person throw baseball_bat
+person wield baseball_bat
+person no_interaction baseball_bat
+person hold baseball_glove
+person wear baseball_glove
+person no_interaction baseball_glove
+person feed bear
+person hunt bear
+person watch bear
+person no_interaction bear
+person clean bed
+person lie_on bed
+person sit_on bed
+person no_interaction bed
+person inspect bench
+person lie_on bench
+person sit_on bench
+person no_interaction bench
+person carry book
+person hold book
+person open book
+person read book
+person no_interaction book
+person hold bowl
+person stir bowl
+person wash bowl
+person lick bowl
+person no_interaction bowl
+person cut broccoli
+person eat broccoli
+person hold broccoli
+person smell broccoli
+person stir broccoli
+person wash broccoli
+person no_interaction broccoli
+person blow cake
+person carry cake
+person cut cake
+person eat cake
+person hold cake
+person light cake
+person make cake
+person pick_up cake
+person no_interaction cake
+person carry carrot
+person cook carrot
+person cut carrot
+person eat carrot
+person hold carrot
+person peel carrot
+person smell carrot
+person stir carrot
+person wash carrot
+person no_interaction carrot
+person carry cell_phone
+person hold cell_phone
+person read cell_phone
+person repair cell_phone
+person talk_on cell_phone
+person text_on cell_phone
+person no_interaction cell_phone
+person check clock
+person hold clock
+person repair clock
+person set clock
+person no_interaction clock
+person carry cup
+person drink_with cup
+person hold cup
+person inspect cup
+person pour cup
+person sip cup
+person smell cup
+person fill cup
+person wash cup
+person no_interaction cup
+person buy donut
+person carry donut
+person eat donut
+person hold donut
+person make donut
+person pick_up donut
+person smell donut
+person no_interaction donut
+person feed elephant
+person hold elephant
+person hose elephant
+person hug elephant
+person kiss elephant
+person hop_on elephant
+person pet elephant
+person ride elephant
+person walk elephant
+person wash elephant
+person watch elephant
+person no_interaction elephant
+person hug fire_hydrant
+person inspect fire_hydrant
+person open fire_hydrant
+person paint fire_hydrant
+person no_interaction fire_hydrant
+person hold fork
+person lift fork
+person stick fork
+person lick fork
+person wash fork
+person no_interaction fork
+person block frisbee
+person catch frisbee
+person hold frisbee
+person spin frisbee
+person throw frisbee
+person no_interaction frisbee
+person feed giraffe
+person kiss giraffe
+person pet giraffe
+person ride giraffe
+person watch giraffe
+person no_interaction giraffe
+person hold hair_drier
+person operate hair_drier
+person repair hair_drier
+person no_interaction hair_drier
+person carry handbag
+person hold handbag
+person inspect handbag
+person no_interaction handbag
+person carry hot_dog
+person cook hot_dog
+person cut hot_dog
+person eat hot_dog
+person hold hot_dog
+person make hot_dog
+person no_interaction hot_dog
+person carry keyboard
+person clean keyboard
+person hold keyboard
+person type_on keyboard
+person no_interaction keyboard
+person assemble kite
+person carry kite
+person fly kite
+person hold kite
+person inspect kite
+person launch kite
+person pull kite
+person no_interaction kite
+person cut_with knife
+person hold knife
+person stick knife
+person wash knife
+person wield knife
+person lick knife
+person no_interaction knife
+person hold laptop
+person open laptop
+person read laptop
+person repair laptop
+person type_on laptop
+person no_interaction laptop
+person clean microwave
+person open microwave
+person operate microwave
+person no_interaction microwave
+person control mouse
+person hold mouse
+person repair mouse
+person no_interaction mouse
+person buy orange
+person cut orange
+person eat orange
+person hold orange
+person inspect orange
+person peel orange
+person pick orange
+person squeeze orange
+person wash orange
+person no_interaction orange
+person clean oven
+person hold oven
+person inspect oven
+person open oven
+person repair oven
+person operate oven
+person no_interaction oven
+person check parking_meter
+person pay parking_meter
+person repair parking_meter
+person no_interaction parking_meter
+person buy pizza
+person carry pizza
+person cook pizza
+person cut pizza
+person eat pizza
+person hold pizza
+person make pizza
+person pick_up pizza
+person slide pizza
+person smell pizza
+person no_interaction pizza
+person clean refrigerator
+person hold refrigerator
+person move refrigerator
+person open refrigerator
+person no_interaction refrigerator
+person hold remote
+person point remote
+person swing remote
+person no_interaction remote
+person carry sandwich
+person cook sandwich
+person cut sandwich
+person eat sandwich
+person hold sandwich
+person make sandwich
+person no_interaction sandwich
+person cut_with scissors
+person hold scissors
+person open scissors
+person no_interaction scissors
+person clean sink
+person repair sink
+person wash sink
+person no_interaction sink
+person carry skateboard
+person flip skateboard
+person grind skateboard
+person hold skateboard
+person jump skateboard
+person pick_up skateboard
+person ride skateboard
+person sit_on skateboard
+person stand_on skateboard
+person no_interaction skateboard
+person adjust skis
+person carry skis
+person hold skis
+person inspect skis
+person jump skis
+person pick_up skis
+person repair skis
+person ride skis
+person stand_on skis
+person wear skis
+person no_interaction skis
+person adjust snowboard
+person carry snowboard
+person grind snowboard
+person hold snowboard
+person jump snowboard
+person ride snowboard
+person stand_on snowboard
+person wear snowboard
+person no_interaction snowboard
+person hold spoon
+person lick spoon
+person wash spoon
+person sip spoon
+person no_interaction spoon
+person block sports_ball
+person carry sports_ball
+person catch sports_ball
+person dribble sports_ball
+person hit sports_ball
+person hold sports_ball
+person inspect sports_ball
+person kick sports_ball
+person pick_up sports_ball
+person serve sports_ball
+person sign sports_ball
+person spin sports_ball
+person throw sports_ball
+person no_interaction sports_ball
+person hold stop_sign
+person stand_under stop_sign
+person stop_at stop_sign
+person no_interaction stop_sign
+person carry suitcase
+person drag suitcase
+person hold suitcase
+person hug suitcase
+person load suitcase
+person open suitcase
+person pack suitcase
+person pick_up suitcase
+person zip suitcase
+person no_interaction suitcase
+person carry surfboard
+person drag surfboard
+person hold surfboard
+person inspect surfboard
+person jump surfboard
+person lie_on surfboard
+person load surfboard
+person ride surfboard
+person stand_on surfboard
+person sit_on surfboard
+person wash surfboard
+person no_interaction surfboard
+person carry teddy_bear
+person hold teddy_bear
+person hug teddy_bear
+person kiss teddy_bear
+person no_interaction teddy_bear
+person carry tennis_racket
+person hold tennis_racket
+person inspect tennis_racket
+person swing tennis_racket
+person no_interaction tennis_racket
+person adjust tie
+person cut tie
+person hold tie
+person inspect tie
+person pull tie
+person tie tie
+person wear tie
+person no_interaction tie
+person hold toaster
+person operate toaster
+person repair toaster
+person no_interaction toaster
+person clean toilet
+person flush toilet
+person open toilet
+person repair toilet
+person sit_on toilet
+person stand_on toilet
+person wash toilet
+person no_interaction toilet
+person brush_with toothbrush
+person hold toothbrush
+person wash toothbrush
+person no_interaction toothbrush
+person install traffic_light
+person repair traffic_light
+person stand_under traffic_light
+person stop_at traffic_light
+person no_interaction traffic_light
+person direct truck
+person drive truck
+person inspect truck
+person load truck
+person repair truck
+person ride truck
+person sit_on truck
+person wash truck
+person no_interaction truck
+person carry umbrella
+person hold umbrella
+person lose umbrella
+person open umbrella
+person repair umbrella
+person set umbrella
+person stand_under umbrella
+person no_interaction umbrella
+person hold vase
+person make vase
+person paint vase
+person no_interaction vase
+person fill wine_glass
+person hold wine_glass
+person sip wine_glass
+person toast wine_glass
+person lick wine_glass
+person wash wine_glass
+person no_interaction wine_glass
+person feed zebra
+person hold zebra
+person pet zebra
+person watch zebra
+person no_interaction zebra

recognize-anything/datasets/imagenet_multi/imagenet_multi_1000_annots.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

recognize-anything/datasets/imagenet_multi/imagenet_multi_1000_taglist.txt ADDED Viewed

	@@ -0,0 +1,1000 @@

+tench
+goldfish
+great white shark
+tiger shark
+hammerhead shark
+electric ray
+stingray
+rooster
+hen
+ostrich
+brambling
+goldfinch
+house finch
+junco
+indigo bunting
+American robin
+bulbul
+jay
+magpie
+chickadee
+American dipper
+kite (bird of prey)
+bald eagle
+vulture
+great grey owl
+fire salamander
+smooth newt
+newt
+spotted salamander
+axolotl
+American bullfrog
+tree frog
+tailed frog
+loggerhead sea turtle
+leatherback sea turtle
+mud turtle
+terrapin
+box turtle
+banded gecko
+green iguana
+Carolina anole
+desert grassland whiptail lizard
+agama
+frilled-necked lizard
+alligator lizard
+Gila monster
+European green lizard
+chameleon
+Komodo dragon
+Nile crocodile
+American alligator
+triceratops
+worm snake
+ring-necked snake
+eastern hog-nosed snake
+smooth green snake
+kingsnake
+garter snake
+water snake
+vine snake
+night snake
+boa constrictor
+African rock python
+Indian cobra
+green mamba
+sea snake
+Saharan horned viper
+eastern diamondback rattlesnake
+sidewinder rattlesnake
+trilobite
+harvestman
+scorpion
+yellow garden spider
+barn spider
+European garden spider
+southern black widow
+tarantula
+wolf spider
+tick
+centipede
+black grouse
+ptarmigan
+ruffed grouse
+prairie grouse
+peafowl
+quail
+partridge
+african grey parrot
+macaw
+sulphur-crested cockatoo
+lorikeet
+coucal
+bee eater
+hornbill
+hummingbird
+jacamar
+toucan
+duck
+red-breasted merganser
+goose
+black swan
+tusker
+echidna
+platypus
+wallaby
+koala
+wombat
+jellyfish
+sea anemone
+brain coral
+flatworm
+nematode
+conch
+snail
+slug
+sea slug
+chiton
+chambered nautilus
+Dungeness crab
+rock crab
+fiddler crab
+red king crab
+American lobster
+spiny lobster
+crayfish
+hermit crab
+isopod
+white stork
+black stork
+spoonbill
+flamingo
+little blue heron
+great egret
+bittern bird
+crane bird
+limpkin
+common gallinule
+American coot
+bustard
+ruddy turnstone
+dunlin
+common redshank
+dowitcher
+oystercatcher
+pelican
+king penguin
+albatross
+grey whale
+killer whale
+dugong
+sea lion
+Chihuahua
+Japanese Chin
+Maltese
+Pekingese
+Shih Tzu
+King Charles Spaniel
+Papillon
+toy terrier
+Rhodesian Ridgeback
+Afghan Hound
+Basset Hound
+Beagle
+Bloodhound
+Bluetick Coonhound
+Black and Tan Coonhound
+Treeing Walker Coonhound
+English foxhound
+Redbone Coonhound
+borzoi
+Irish Wolfhound
+Italian Greyhound
+Whippet
+Ibizan Hound
+Norwegian Elkhound
+Otterhound
+Saluki
+Scottish Deerhound
+Weimaraner
+Staffordshire Bull Terrier
+American Staffordshire Terrier
+Bedlington Terrier
+Border Terrier
+Kerry Blue Terrier
+Irish Terrier
+Norfolk Terrier
+Norwich Terrier
+Yorkshire Terrier
+Wire Fox Terrier
+Lakeland Terrier
+Sealyham Terrier
+Airedale Terrier
+Cairn Terrier
+Australian Terrier
+Dandie Dinmont Terrier
+Boston Terrier
+Miniature Schnauzer
+Giant Schnauzer
+Standard Schnauzer
+Scottish Terrier
+Tibetan Terrier
+Australian Silky Terrier
+Soft-coated Wheaten Terrier
+West Highland White Terrier
+Lhasa Apso
+Flat-Coated Retriever
+Curly-coated Retriever
+Golden Retriever
+Labrador Retriever
+Chesapeake Bay Retriever
+German Shorthaired Pointer
+Vizsla
+English Setter
+Irish Setter
+Gordon Setter
+Brittany dog
+Clumber Spaniel
+English Springer Spaniel
+Welsh Springer Spaniel
+Cocker Spaniel
+Sussex Spaniel
+Irish Water Spaniel
+Kuvasz
+Schipperke
+Groenendael dog
+Malinois
+Briard
+Australian Kelpie
+Komondor
+Old English Sheepdog
+Shetland Sheepdog
+collie
+Border Collie
+Bouvier des Flandres dog
+Rottweiler
+German Shepherd Dog
+Dobermann
+Miniature Pinscher
+Greater Swiss Mountain Dog
+Bernese Mountain Dog
+Appenzeller Sennenhund
+Entlebucher Sennenhund
+Boxer
+Bullmastiff
+Tibetan Mastiff
+French Bulldog
+Great Dane
+St. Bernard
+husky
+Alaskan Malamute
+Siberian Husky
+Dalmatian
+Affenpinscher
+Basenji
+pug
+Leonberger
+Newfoundland dog
+Great Pyrenees dog
+Samoyed
+Pomeranian
+Chow Chow
+Keeshond
+brussels griffon
+Pembroke Welsh Corgi
+Cardigan Welsh Corgi
+Toy Poodle
+Miniature Poodle
+Standard Poodle
+Mexican hairless dog (xoloitzcuintli)
+grey wolf
+Alaskan tundra wolf
+red wolf or maned wolf
+coyote
+dingo
+dhole
+African wild dog
+hyena
+red fox
+kit fox
+Arctic fox
+grey fox
+tabby cat
+tiger cat
+Persian cat
+Siamese cat
+Egyptian Mau
+cougar
+lynx
+leopard
+snow leopard
+jaguar
+lion
+tiger
+cheetah
+brown bear
+American black bear
+polar bear
+sloth bear
+mongoose
+meerkat
+tiger beetle
+ladybug
+ground beetle
+longhorn beetle
+leaf beetle
+dung beetle
+rhinoceros beetle
+weevil
+fly
+bee
+ant
+grasshopper
+cricket insect
+stick insect
+cockroach
+praying mantis
+cicada
+leafhopper
+lacewing
+dragonfly
+damselfly
+red admiral butterfly
+ringlet butterfly
+monarch butterfly
+small white butterfly
+sulphur butterfly
+gossamer-winged butterfly
+starfish
+sea urchin
+sea cucumber
+cottontail rabbit
+hare
+Angora rabbit
+hamster
+porcupine
+fox squirrel
+marmot
+beaver
+guinea pig
+common sorrel horse
+zebra
+pig
+wild boar
+warthog
+hippopotamus
+ox
+water buffalo
+bison
+ram (adult male sheep)
+bighorn sheep
+Alpine ibex
+hartebeest
+impala (antelope)
+gazelle
+arabian camel
+llama
+weasel
+mink
+European polecat
+black-footed ferret
+otter
+skunk
+badger
+armadillo
+three-toed sloth
+orangutan
+gorilla
+chimpanzee
+gibbon
+siamang
+guenon
+patas monkey
+baboon
+macaque
+langur
+black-and-white colobus
+proboscis monkey
+marmoset
+white-headed capuchin
+howler monkey
+titi monkey
+Geoffroy's spider monkey
+common squirrel monkey
+ring-tailed lemur
+indri
+Asian elephant
+African bush elephant
+red panda
+giant panda
+snoek fish
+eel
+silver salmon
+rock beauty fish
+clownfish
+sturgeon
+gar fish
+lionfish
+pufferfish
+abacus
+abaya
+academic gown
+accordion
+acoustic guitar
+aircraft carrier
+airliner
+airship
+altar
+ambulance
+amphibious vehicle
+analog clock
+apiary
+apron
+trash can
+assault rifle
+backpack
+bakery
+balance beam
+balloon
+ballpoint pen
+Band-Aid
+banjo
+baluster / handrail
+barbell
+barber chair
+barbershop
+barn
+barometer
+barrel
+wheelbarrow
+baseball
+basketball
+bassinet
+bassoon
+swimming cap
+bath towel
+bathtub
+station wagon
+lighthouse
+beaker
+military hat (bearskin or shako)
+beer bottle
+beer glass
+bell tower
+baby bib
+tandem bicycle
+bikini
+ring binder
+binoculars
+birdhouse
+boathouse
+bobsleigh
+bolo tie
+poke bonnet
+bookcase
+bookstore
+bottle cap
+hunting bow
+bow tie
+brass memorial plaque
+bra
+breakwater
+breastplate
+broom
+bucket
+buckle
+bulletproof vest
+high-speed train
+butcher shop
+taxicab
+cauldron
+candle
+cannon
+canoe
+can opener
+cardigan
+car mirror
+carousel
+tool kit
+cardboard box / carton
+car wheel
+automated teller machine
+cassette
+cassette player
+castle
+catamaran
+CD player
+cello
+mobile phone
+chain
+chain-link fence
+chain mail
+chainsaw
+storage chest
+chiffonier
+bell or wind chime
+china cabinet
+Christmas stocking
+church
+movie theater
+cleaver
+cliff dwelling
+cloak
+clogs
+cocktail shaker
+coffee mug
+coffeemaker
+spiral or coil
+combination lock
+computer keyboard
+candy store
+container ship
+convertible
+corkscrew
+cornet
+cowboy boot
+cowboy hat
+cradle
+construction crane
+crash helmet
+crate
+infant bed
+Crock Pot
+croquet ball
+crutch
+cuirass
+dam
+desk
+desktop computer
+rotary dial telephone
+diaper
+digital clock
+digital watch
+dining table
+dishcloth
+dishwasher
+disc brake
+dock
+dog sled
+dome
+doormat
+drilling rig
+drum
+drumstick
+dumbbell
+Dutch oven
+electric fan
+electric guitar
+electric locomotive
+entertainment center
+envelope
+espresso machine
+face powder
+feather boa
+filing cabinet
+fireboat
+fire truck
+fire screen
+flagpole
+flute
+folding chair
+football helmet
+forklift
+fountain
+fountain pen
+four-poster bed
+freight car
+French horn
+frying pan
+fur coat
+garbage truck
+gas mask or respirator
+gas pump
+goblet
+go-kart
+golf ball
+golf cart
+gondola
+gong
+gown
+grand piano
+greenhouse
+radiator grille
+grocery store
+guillotine
+hair clip
+hair spray
+half-track
+hammer
+hamper
+hair dryer
+hand-held computer
+handkerchief
+hard disk drive
+harmonica
+harp
+combine harvester
+hatchet
+holster
+home theater
+honeycomb
+hook
+hoop skirt
+gymnastic horizontal bar
+horse-drawn vehicle
+hourglass
+iPod
+clothes iron
+carved pumpkin
+jeans
+jeep
+T-shirt
+jigsaw puzzle
+rickshaw
+joystick
+kimono
+knee pad
+knot
+lab coat
+ladle
+lampshade
+laptop computer
+lawn mower
+lens cap
+letter opener
+library
+lifeboat
+lighter
+limousine
+ocean liner
+lipstick
+slip-on shoe
+lotion
+music speaker
+loupe magnifying glass
+sawmill
+magnetic compass
+messenger bag
+mailbox
+tights
+one-piece bathing suit
+manhole cover
+maraca
+marimba
+mask
+matchstick
+maypole
+maze
+measuring cup
+medicine cabinet
+megalith
+microphone
+microwave oven
+military uniform
+milk can
+minibus
+miniskirt
+minivan
+missile
+mitten
+mixing bowl
+mobile home
+ford model t
+modem
+monastery
+monitor
+moped
+mortar and pestle
+graduation cap
+mosque
+mosquito net
+vespa
+mountain bike
+tent
+computer mouse
+mousetrap
+moving van
+muzzle
+metal nail
+neck brace
+necklace
+baby pacifier
+notebook computer
+obelisk
+oboe
+ocarina
+odometer
+oil filter
+pipe organ
+oscilloscope
+overskirt
+bullock cart
+oxygen mask
+product packet / packaging
+paddle
+paddle wheel
+padlock
+paintbrush
+pajamas
+palace
+pan flute
+paper towel
+parachute
+parallel bars
+park bench
+parking meter
+railroad car
+patio
+payphone
+pedestal
+pencil case
+pencil sharpener
+perfume
+Petri dish
+photocopier
+plectrum
+Pickelhaube
+picket fence
+pickup truck
+pier
+piggy bank
+pill bottle
+pillow
+ping-pong ball
+pinwheel
+pirate ship
+drink pitcher
+block plane
+planetarium
+plastic bag
+plate rack
+farm plow
+plunger
+Polaroid camera
+pole
+police van
+poncho
+pool table
+soda bottle
+plant pot
+potter's wheel
+power drill
+prayer rug
+printer
+prison
+missile
+projector
+hockey puck
+punching bag
+purse
+quill
+quilt
+race car
+racket
+radiator
+radio
+radio telescope
+rain barrel
+recreational vehicle
+fishing casting reel
+reflex camera
+refrigerator
+remote control
+restaurant
+revolver
+rifle
+rocking chair
+rotisserie
+eraser
+rugby ball
+ruler measuring stick
+sneaker
+safe
+safety pin
+salt shaker
+sandal
+sarong
+saxophone
+scabbard
+weighing scale
+school bus
+schooner
+scoreboard
+CRT monitor
+screw
+screwdriver
+seat belt
+sewing machine
+shield
+shoe store
+shoji screen / room divider
+shopping basket
+shopping cart
+shovel
+shower cap
+shower curtain
+ski
+balaclava ski mask
+sleeping bag
+slide rule
+sliding door
+slot machine
+snorkel
+snowmobile
+snowplow
+soap dispenser
+soccer ball
+sock
+solar thermal collector
+sombrero
+soup bowl
+keyboard space bar
+space heater
+space shuttle
+spatula
+motorboat
+spider web
+spindle
+sports car
+spotlight
+stage
+steam locomotive
+through arch bridge
+steel drum
+stethoscope
+scarf
+stone wall
+stopwatch
+stove
+strainer
+tram
+stretcher
+couch
+stupa
+submarine
+suit
+sundial
+sunglasses
+sunglasses
+sunscreen
+suspension bridge
+mop
+sweatshirt
+swim trunks / shorts
+swing
+electrical switch
+syringe
+table lamp
+tank
+tape player
+teapot
+teddy bear
+television
+tennis ball
+thatched roof
+front curtain
+thimble
+threshing machine
+throne
+tile roof
+toaster
+tobacco shop
+toilet seat
+torch
+totem pole
+tow truck
+toy store
+tractor
+semi-trailer truck
+tray
+trench coat
+tricycle
+trimaran
+tripod
+triumphal arch
+trolleybus
+trombone
+hot tub
+turnstile
+typewriter keyboard
+umbrella
+unicycle
+upright piano
+vacuum cleaner
+vase
+vaulted or arched ceiling
+velvet fabric
+vending machine
+vestment
+viaduct
+violin
+volleyball
+waffle iron
+wall clock
+wallet
+wardrobe
+military aircraft
+sink
+washing machine
+water bottle
+water jug
+water tower
+whiskey jug
+whistle
+hair wig
+window screen
+window shade
+Windsor tie
+wine bottle
+airplane wing
+wok
+wooden spoon
+wool
+split-rail fence
+shipwreck
+sailboat
+yurt
+website
+comic book
+crossword
+traffic or street sign
+traffic light
+dust jacket
+menu
+plate
+guacamole
+consomme
+hot pot
+trifle
+ice cream
+popsicle
+baguette
+bagel
+pretzel
+cheeseburger
+hot dog
+mashed potatoes
+cabbage
+broccoli
+cauliflower
+zucchini
+spaghetti squash
+acorn squash
+butternut squash
+cucumber
+artichoke
+bell pepper
+cardoon
+mushroom
+Granny Smith apple
+strawberry
+orange
+lemon
+fig
+pineapple
+banana
+jackfruit
+cherimoya (custard apple)
+pomegranate
+hay
+carbonara
+chocolate syrup
+dough
+meatloaf
+pizza
+pot pie
+burrito
+red wine
+espresso
+tea cup
+eggnog
+mountain
+bubble
+cliff
+coral reef
+geyser
+lakeshore
+promontory
+sandbar
+beach
+valley
+volcano
+baseball player
+bridegroom
+scuba diver
+rapeseed
+daisy
+yellow lady's slipper
+corn
+acorn
+rose hip
+horse chestnut seed
+coral fungus
+agaric
+gyromitra
+stinkhorn mushroom
+earth star fungus
+hen of the woods mushroom
+bolete
+corn cob
+toilet paper

recognize-anything/datasets/openimages_common_214/imgs/.gitkeep ADDED Viewed

File without changes

recognize-anything/datasets/openimages_common_214/openimages_common_214_ram_annots.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

recognize-anything/datasets/openimages_common_214/openimages_common_214_ram_taglist.txt ADDED Viewed

	@@ -0,0 +1,214 @@

+accident
+accordion
+plane
+airport
+antelope
+apple
+art gallery
+eggplant
+auditorium
+autumn
+baboon
+backpack
+bakery
+bamboo
+banana
+barbecue
+bed
+bedroom
+clock
+bicycle
+bikini
+birthday cake
+blackberry
+blueberry
+pig
+bookcase
+bridge
+broccoli
+bus
+butterfly
+calculator
+calendar
+camping
+candle
+candy
+cannon
+canyon
+car
+carousel
+cat
+cave
+ceiling
+cheese
+cheetah
+chef
+chicken
+christmas
+christmas tree
+clover
+coral
+corn
+courtyard
+crab
+lobster
+crocodile
+crosswalk
+crow
+cucumber
+cup
+currency
+dachshund
+deer
+desert
+die
+dinosaur
+dog
+dolphin
+doodle
+dragonfly
+drum
+duck
+dumbbell
+easter egg
+egg
+elephant
+faucet
+ferris wheel
+fire
+fireman
+firework
+flamingo
+flower
+football
+fountain
+fox
+fridge
+frog
+ham
+gas stove
+giraffe
+glacier
+glove
+goat
+goose
+gorilla
+grape
+guitar
+gull
+gym
+halloween
+hamburger
+hamster
+handbag
+hedgehog
+helicopter
+horse
+hummingbird
+jellyfish
+kangaroo
+kimono
+kite
+ladybird
+laptop
+leg
+mailbox
+library
+lightning
+lily
+lion
+lizard
+luggage
+mannequin
+map
+mask
+mattress
+microphone
+microwave
+monkey
+moon
+mosque
+mouse
+mushroom
+nebula
+sea
+ostrich
+palm tree
+paper
+pasta
+patient
+pavilion
+pear
+pebble
+penguin
+pet
+piano
+picture frame
+pine
+pineapple
+pizza
+police car
+pomegranate
+poodle
+popcorn
+stamp
+power station
+printer
+pumpkin
+raccoon
+rainbow
+rat
+restroom
+ring
+run
+salad
+sandwich
+sausage
+shark
+sheet music
+shrine
+snowboard
+snake
+sparrow
+squirrel
+stage
+starfish
+statue
+steering wheel
+stream
+street art
+street light
+submarine
+suite
+surfboard
+sushi
+swan
+tattoo
+teddy
+tennis court
+tennis racket
+tiger
+toast
+toilet bowl
+toy
+tractor
+train
+trampoline
+treadmill
+truck
+tunnel
+turkey
+vending machine
+waffle
+walnut
+washing machine
+water buffalo
+waterfall
+watermelon
+wheat
+wheelchair
+windmill
+winter
+wolf
+woodpecker
+zebra

recognize-anything/datasets/openimages_common_214/openimages_common_214_tag2text_idannots.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

recognize-anything/datasets/openimages_common_214/openimages_common_214_tag2text_tagidlist.txt ADDED Viewed

	@@ -0,0 +1,214 @@

+3
+8
+16
+19
+21
+33
+44
+50
+58
+61
+71
+77
+84
+96
+117
+139
+142
+147
+180
+200
+202
+206
+244
+267
+317
+321
+347
+361
+380
+387
+398
+407
+471
+486
+489
+509
+514
+530
+568
+590
+595
+612
+622
+626
+654
+658
+664
+684
+699
+704
+717
+720
+727
+760
+773
+786
+787
+812
+814
+817
+843
+855
+856
+907
+950
+955
+957
+1023
+1042
+1056
+1066
+1091
+1094
+1108
+1141
+1148
+1152
+1168
+1174
+1187
+1231
+1235
+1246
+1276
+1277
+1305
+1308
+1344
+1359
+1362
+1393
+1394
+1410
+1411
+1468
+1504
+1524
+1536
+1540
+1542
+1546
+1553
+1572
+1574
+1606
+1610
+1615
+1655
+1672
+1680
+1682
+1687
+1691
+1692
+1711
+1712
+1713
+1719
+1727
+1733
+1761
+1770
+1782
+1784
+1786
+1803
+1812
+1816
+1820
+1829
+1831
+1841
+1845
+1878
+1882
+1931
+1940
+1944
+1947
+1974
+1975
+1977
+2009
+2031
+2035
+2052
+2065
+2110
+2113
+2138
+2149
+2154
+2157
+2174
+2178
+2184
+2185
+2202
+2222
+2233
+2291
+2301
+2302
+2317
+2320
+2351
+2354
+2373
+2383
+2393
+2403
+2413
+2415
+2417
+2423
+2449
+2454
+2455
+2472
+2494
+2495
+2528
+2541
+2543
+2553
+2563
+2589
+2603
+2654
+2656
+2658
+2676
+2690
+2693
+2700
+2708
+2720
+2721
+2729
+2732
+2734
+2756
+2786
+2792
+2801
+2821
+2851
+2887
+2906
+2909
+2924
+2929
+2966
+2980

recognize-anything/datasets/openimages_rare_200/imgs/.gitkeep ADDED Viewed

File without changes

recognize-anything/datasets/openimages_rare_200/openimages_rare_200_llm_tag_descriptions.json ADDED Viewed

The diff for this file is too large to render. See raw diff

recognize-anything/datasets/openimages_rare_200/openimages_rare_200_ram_annots.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

recognize-anything/datasets/openimages_rare_200/openimages_rare_200_ram_taglist.txt ADDED Viewed

	@@ -0,0 +1,200 @@

+Aerial photography
+Aircraft engine
+Ale
+Aloe
+Amphibian
+Angling
+Anole
+Antique car
+Arcade game
+Arthropod
+Assault rifle
+Athletic shoe
+Auto racing
+Backlighting
+Bagpipes
+Ball game
+Barbecue chicken
+Barechested
+Barquentine
+Beef tenderloin
+Billiard room
+Billiards
+Bird of prey
+Black swan
+Black-and-white
+Blond
+Boating
+Bonbon
+Bottled water
+Bouldering
+Bovine
+Bratwurst
+Breadboard
+Briefs
+Brisket
+Brochette
+Calabaza
+Camera operator
+Canola
+Childbirth
+Chordophone
+Church bell
+Classical sculpture
+Close-up
+Cobblestone
+Coca-cola
+Combat sport
+Comics
+Compact car
+Computer speaker
+Cookies and crackers
+Coral reef fish
+Corn on the cob
+Cosmetics
+Crocodilia
+Digital camera
+Dishware
+Divemaster
+Dobermann
+Dog walking
+Domestic rabbit
+Domestic short-haired cat
+Double-decker bus
+Drums
+Electric guitar
+Electric piano
+Electronic instrument
+Equestrianism
+Equitation
+Erinaceidae
+Extreme sport
+Falafel
+Figure skating
+Filling station
+Fire apparatus
+Firearm
+Flatbread
+Floristry
+Forklift truck
+Freight transport
+Fried food
+Fried noodles
+Frigate
+Frozen yogurt
+Frying
+Full moon
+Galleon
+Glacial landform
+Gliding
+Go-kart
+Goats
+Grappling
+Great white shark
+Gumbo
+Gun turret
+Hair coloring
+Halter
+Headphones
+Heavy cruiser
+Herding
+High-speed rail
+Holding hands
+Horse and buggy
+Horse racing
+Hound
+Hunting knife
+Hurdling
+Inflatable
+Jackfruit
+Jeans
+Jiaozi
+Junk food
+Khinkali
+Kitesurfing
+Lawn game
+Leaf vegetable
+Lechon
+Lifebuoy
+Locust
+Lumpia
+Luxury vehicle
+Machine tool
+Medical imaging
+Melee weapon
+Microcontroller
+Middle ages
+Military person
+Military vehicle
+Milky way
+Miniature Poodle
+Modern dance
+Molluscs
+Monoplane
+Motorcycling
+Musical theatre
+Narcissus
+Nest box
+Newsagent's shop
+Nile crocodile
+Nordic skiing
+Nuclear power plant
+Orator
+Outdoor shoe
+Parachuting
+Pasta salad
+Peafowl
+Pelmeni
+Perching bird
+Performance car
+Personal water craft
+Pit bull
+Plant stem
+Pork chop
+Portrait photography
+Primate
+Procyonidae
+Prosciutto
+Public speaking
+Racewalking
+Ramen
+Rear-view mirror
+Residential area
+Ribs
+Rice ball
+Road cycling
+Roller skating
+Roman temple
+Rowing
+Rural area
+Sailboat racing
+Scaled reptile
+Scuba diving
+Senior citizen
+Shallot
+Shinto shrine
+Shooting range
+Siberian husky
+Sledding
+Soba
+Solar energy
+Sport climbing
+Sport utility vehicle
+Steamed rice
+Stemware
+Sumo
+Surfing Equipment
+Team sport
+Touring car
+Toy block
+Trampolining
+Underwater diving
+Vegetarian food
+Wallaby
+Water polo
+Watercolor paint
+Whiskers
+Wind wave
+Woodwind instrument
+Yakitori
+Zeppelin