tuandunghcmut commited on
Commit
d756736
·
verified ·
1 Parent(s): ff5d469

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +25 -0
  2. a_mllm_notebooks/openai/image.jpg +3 -0
  3. a_mllm_notebooks/openai/infer.py +167 -0
  4. a_mllm_notebooks/openai/infer.sh +4 -0
  5. a_mllm_notebooks/openai/langchain_openai_api.ipynb +0 -0
  6. a_mllm_notebooks/openai/load_synth_pedes.ipynb +96 -0
  7. a_mllm_notebooks/openai/ping_server.ipynb +416 -0
  8. a_mllm_notebooks/openai/proxy.sh +10 -0
  9. a_mllm_notebooks/openai/serve.sh +60 -0
  10. a_mllm_notebooks/openai/temp.json +0 -0
  11. a_mllm_notebooks/openai/temp.sh +25 -0
  12. a_mllm_notebooks/tensorrt-llm/clone_folder.ipynb +78 -0
  13. a_mllm_notebooks/vllm/Untitled.ipynb +68 -0
  14. a_mllm_notebooks/vllm/cat.jpg +3 -0
  15. a_mllm_notebooks/vllm/cli.md +405 -0
  16. a_mllm_notebooks/vllm/download_md.ipynb +213 -0
  17. a_mllm_notebooks/vllm/florence_2.ipynb +355 -0
  18. a_mllm_notebooks/vllm/serve.sh +452 -0
  19. a_mllm_notebooks/vllm/start.ipynb +432 -0
  20. mlruns/0/meta.yaml +6 -0
  21. recognize-anything/.ipynb_checkpoints/README-checkpoint.md +601 -0
  22. recognize-anything/.ipynb_checkpoints/recognize_anything_demo-checkpoint.ipynb +0 -0
  23. recognize-anything/datasets/hico/hico_600_annots.txt +0 -0
  24. recognize-anything/datasets/hico/hico_600_taglist.txt +600 -0
  25. recognize-anything/datasets/imagenet_multi/imagenet_multi_1000_annots.txt +0 -0
  26. recognize-anything/datasets/imagenet_multi/imagenet_multi_1000_taglist.txt +1000 -0
  27. recognize-anything/datasets/openimages_common_214/imgs/.gitkeep +0 -0
  28. recognize-anything/datasets/openimages_common_214/openimages_common_214_ram_annots.txt +0 -0
  29. recognize-anything/datasets/openimages_common_214/openimages_common_214_ram_taglist.txt +214 -0
  30. recognize-anything/datasets/openimages_common_214/openimages_common_214_tag2text_idannots.txt +0 -0
  31. recognize-anything/datasets/openimages_common_214/openimages_common_214_tag2text_tagidlist.txt +214 -0
  32. recognize-anything/datasets/openimages_rare_200/imgs/.gitkeep +0 -0
  33. recognize-anything/datasets/openimages_rare_200/openimages_rare_200_llm_tag_descriptions.json +0 -0
  34. recognize-anything/datasets/openimages_rare_200/openimages_rare_200_ram_annots.txt +0 -0
  35. recognize-anything/datasets/openimages_rare_200/openimages_rare_200_ram_taglist.txt +200 -0
  36. recognize-anything/images/.ipynb_checkpoints/ram_plus_framework-checkpoint.jpg +3 -0
  37. recognize-anything/images/.ipynb_checkpoints/ram_plus_visualization-checkpoint.jpg +3 -0
  38. recognize-anything/images/.ipynb_checkpoints/tag2text_retrieval_visualization-checkpoint.png +3 -0
  39. recognize-anything/images/1641173_2291260800.jpg +3 -0
  40. recognize-anything/images/demo/.ipynb_checkpoints/demo2-checkpoint.jpg +3 -0
  41. recognize-anything/images/demo/.ipynb_checkpoints/demo4-checkpoint.jpg +3 -0
  42. recognize-anything/images/demo/demo1.jpg +3 -0
  43. recognize-anything/images/demo/demo2.jpg +3 -0
  44. recognize-anything/images/demo/demo3.jpg +3 -0
  45. recognize-anything/images/demo/demo4.jpg +3 -0
  46. recognize-anything/images/experiment_comparison.png +3 -0
  47. recognize-anything/images/localization_and_recognition.jpg +3 -0
  48. recognize-anything/images/openset_example.jpg +3 -0
  49. recognize-anything/images/ram_grounded_sam.jpg +3 -0
  50. recognize-anything/images/ram_plus_compare.jpg +3 -0
.gitattributes CHANGED
@@ -427,3 +427,28 @@ VILA/inference_test/test_data/painting_1.png filter=lfs diff=lfs merge=lfs -text
427
  VILA/inference_test/test_data/palm_e_3.png filter=lfs diff=lfs merge=lfs -text
428
  VILA/tests/sample_data/llava_arch_test_images/23/image.png filter=lfs diff=lfs merge=lfs -text
429
  VILA/tests/sample_data/llava_arch_test_images/5/image.png filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
427
  VILA/inference_test/test_data/palm_e_3.png filter=lfs diff=lfs merge=lfs -text
428
  VILA/tests/sample_data/llava_arch_test_images/23/image.png filter=lfs diff=lfs merge=lfs -text
429
  VILA/tests/sample_data/llava_arch_test_images/5/image.png filter=lfs diff=lfs merge=lfs -text
430
+ recognize-anything/images/ram_grounded_sam.jpg filter=lfs diff=lfs merge=lfs -text
431
+ recognize-anything/images/ram_plus_experiment.png filter=lfs diff=lfs merge=lfs -text
432
+ recognize-anything/images/ram_plus_compare.jpg filter=lfs diff=lfs merge=lfs -text
433
+ recognize-anything/images/experiment_comparison.png filter=lfs diff=lfs merge=lfs -text
434
+ recognize-anything/images/1641173_2291260800.jpg filter=lfs diff=lfs merge=lfs -text
435
+ recognize-anything/images/tag2text_framework.png filter=lfs diff=lfs merge=lfs -text
436
+ recognize-anything/images/tag2text_retrieval_visualization.png filter=lfs diff=lfs merge=lfs -text
437
+ recognize-anything/images/tag2text_visualization.png filter=lfs diff=lfs merge=lfs -text
438
+ recognize-anything/images/tagging_results.jpg filter=lfs diff=lfs merge=lfs -text
439
+ recognize-anything/images/localization_and_recognition.jpg filter=lfs diff=lfs merge=lfs -text
440
+ recognize-anything/images/ram_plus_visualization.jpg filter=lfs diff=lfs merge=lfs -text
441
+ recognize-anything/images/openset_example.jpg filter=lfs diff=lfs merge=lfs -text
442
+ recognize-anything/images/ram_plus_framework.jpg filter=lfs diff=lfs merge=lfs -text
443
+ recognize-anything/images/tag2text_grounded_sam.jpg filter=lfs diff=lfs merge=lfs -text
444
+ recognize-anything/images/demo/demo3.jpg filter=lfs diff=lfs merge=lfs -text
445
+ recognize-anything/images/demo/demo1.jpg filter=lfs diff=lfs merge=lfs -text
446
+ recognize-anything/images/demo/demo2.jpg filter=lfs diff=lfs merge=lfs -text
447
+ recognize-anything/images/demo/demo4.jpg filter=lfs diff=lfs merge=lfs -text
448
+ recognize-anything/images/.ipynb_checkpoints/tag2text_retrieval_visualization-checkpoint.png filter=lfs diff=lfs merge=lfs -text
449
+ recognize-anything/images/.ipynb_checkpoints/ram_plus_visualization-checkpoint.jpg filter=lfs diff=lfs merge=lfs -text
450
+ recognize-anything/images/.ipynb_checkpoints/ram_plus_framework-checkpoint.jpg filter=lfs diff=lfs merge=lfs -text
451
+ recognize-anything/images/demo/.ipynb_checkpoints/demo4-checkpoint.jpg filter=lfs diff=lfs merge=lfs -text
452
+ recognize-anything/images/demo/.ipynb_checkpoints/demo2-checkpoint.jpg filter=lfs diff=lfs merge=lfs -text
453
+ a_mllm_notebooks/vllm/cat.jpg filter=lfs diff=lfs merge=lfs -text
454
+ a_mllm_notebooks/openai/image.jpg filter=lfs diff=lfs merge=lfs -text
a_mllm_notebooks/openai/image.jpg ADDED

Git LFS Details

  • SHA256: dea9e7ef97386345f7cff32f9055da4982da5471c48d575146c796ab4563b04e
  • Pointer size: 131 Bytes
  • Size of remote file: 173 kB
a_mllm_notebooks/openai/infer.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # !pip install openai
2
+ from openai import OpenAI
3
+ from tqdm import tqdm
4
+ client = OpenAI(api_key="YOUR_API_KEY", base_url="http://0.0.0.0:7089/v1")
5
+ model_name = client.models.list().data[0].id
6
+
7
+
8
+
9
+ NUM_MODEL = len(client.models.list().data)
10
+ NUM_THREAD = min(int(NUM_MODEL * 1.5), 32)
11
+
12
+ import datasets, huggingface_hub
13
+ disk_path ='/dscilab_dungvo/workspace/BA-PRE_THESIS/dataset_pretraining/SYNTH-PEDES/annotation_english_vietnamese_processed'
14
+ dataset = datasets.load_from_disk(disk_path)
15
+
16
+ # Dataset({
17
+ # features: ['image_name', 'person_id', 'caption_0', 'caption_1', 'attributes', 'prompt_caption', 'image', 'viet_captions', 'viet_prompt_caption'],
18
+ # num_rows: 4791127
19
+ # })
20
+
21
+ # {'image_name': 'Part1/1/0.jpg',
22
+ # 'person_id': 1,
23
+ # 'caption_0': 'A woman with black hair and she is wearing a black jacket with blue jeans paired with black shoes.',
24
+ # 'caption_1': '',
25
+ # 'attributes': 'woman,short hair,black jacket,blue denim jeans,black sneakers,black backpack',
26
+ # 'prompt_caption': 'The woman has short hair. She is wearing a black jacket, blue denim jeans and black sneakers. She is carrying a black backpack. ',
27
+ # 'image': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=59x129>,
28
+ # 'viet_captions': ['Một người phụ nữ với mái tóc đen và cô ấy đang mặc một chiếc áo khoác màu đen với quần jean màu xanh kết hợp với giày đen.'],
29
+ # 'viet_prompt_caption': ['Người phụ nữ có mái tóc ngắn. Cô đang mặc một chiếc áo khoác màu đen, quần jean denim màu xanh và giày thể thao màu đen. Cô đang mang theo một ba lô màu đen.']}
30
+
31
+
32
+
33
+ def get_output(english_text):
34
+ response = client.chat.completions.create(
35
+ model=model_name,
36
+ messages=[
37
+ {
38
+ "role": "system",
39
+ "content": "You are a helpful assistant who is proficient in translating English to Chinese.",
40
+ },
41
+ {
42
+ "role": "user",
43
+ "content": "Please translate and paraphrase the following sentence into natural, fluent Chinese: " + english_text,
44
+ },
45
+ ],
46
+ temperature=0.7,
47
+ top_p=0.9,
48
+ )
49
+ return response.choices[0].message.content
50
+
51
+
52
+ output_root_folder = './output_chinese'
53
+ import os
54
+ # make dir
55
+ os.makedirs(output_root_folder, exist_ok=True)
56
+
57
+ # multithread: NUM_THREAD threads
58
+
59
+ import threading
60
+ import time
61
+
62
+ # def get_list_partition_index(n, num_partition):
63
+ # partition_size = n // num_partition
64
+ # partition_index = []
65
+ # for i in range(num_partition):
66
+ # if i == num_partition - 1:
67
+ # partition_index.append((i * partition_size, n))
68
+ # else:
69
+ # partition_index.append((i * partition_size, (i + 1) * partition_size))
70
+ # return partition_index
71
+
72
+ # /dscilab_dungvo/workspace/vlm_clone/a_mllm_notebooks/openai/output_chinese/thread_32/4509280.json
73
+ def get_uninferenced_indices(total_indices, output_dir):
74
+ inferenced_indices = set()
75
+ for thread_folder in os.listdir(output_dir):
76
+ if 'thread' not in thread_folder:
77
+ continue
78
+ thread_path = os.path.join(output_dir, thread_folder)
79
+ if os.path.isdir(thread_path):
80
+ for json_file in os.listdir(thread_path):
81
+ try:
82
+ index = json_file.split('.')[0]
83
+ index = int(index)
84
+ except:
85
+ print(f"Error: {json_file}")
86
+ continue
87
+ inferenced_indices.add(index)
88
+ uninferenced_indices = [index for index in total_indices if index not in inferenced_indices]
89
+ return uninferenced_indices
90
+
91
+ total_indices = list(range(len(dataset)))
92
+ REMAIN_INDEXES = get_uninferenced_indices(total_indices, output_root_folder)
93
+
94
+ def get_list_partition_from_list_index(list_index, num_partition):
95
+ n = len(list_index)
96
+ partition_size = n // num_partition
97
+ partition_index = []
98
+ for i in range(num_partition):
99
+ if i == num_partition - 1:
100
+ partition_index.append(list_index[i * partition_size:])
101
+ else:
102
+ partition_index.append(list_index[i * partition_size:(i + 1) * partition_size])
103
+ return partition_index
104
+
105
+
106
+ # LIST_PARTITION_INDEX is list of list of index
107
+ LIST_PARTITION_INDEX = get_list_partition_from_list_index(REMAIN_INDEXES, NUM_THREAD)
108
+ import json
109
+
110
+ # Each thread do a loop in its partition index. for each index, get the chinese translation for: prompt_caption, caption_0, caption_1
111
+
112
+ def thread_function(thread_id):
113
+ # make output folder for this thread
114
+ os.makedirs(os.path.join(output_root_folder, f"thread_{thread_id}"), exist_ok=True)
115
+
116
+ list_index = LIST_PARTITION_INDEX[thread_id]
117
+
118
+ for i in tqdm(range(len(list_index))):
119
+ if i % 1000 == 0:
120
+ print(f"Thread {thread_id}: {i}/{len(list_index)}")
121
+
122
+ index = list_index[i]
123
+ item = dataset[index]
124
+ dump_item = {}
125
+
126
+ for key in ['prompt_caption', 'caption_0', 'caption_1']:
127
+ english_text = item[key]
128
+
129
+ if english_text == '':
130
+ chinese_text = ''
131
+ else:
132
+ chinese_text = get_output(english_text)
133
+ dump_item[key + '_chinese'] = chinese_text
134
+
135
+ # dump to json file
136
+ with open(os.path.join(output_root_folder, f"thread_{thread_id}", f"{index}.json"), 'w') as f:
137
+ json.dump(dump_item, f)
138
+
139
+ print(f"Thread {thread_id}: Done")
140
+
141
+ threads = []
142
+ # for i, (start, end) in enumerate(LIST_PARTITION_INDEX):
143
+ for i in range(NUM_THREAD):
144
+ x = threading.Thread(target=thread_function, args=(i,))
145
+ threads.append(x)
146
+ x.start()
147
+ time.sleep(1)
148
+
149
+ for thread in threads:
150
+ thread.join()
151
+
152
+ print("Done")
153
+
154
+ # # Combine all json files in each thread folder to a single json file
155
+ # import os
156
+ # import json
157
+ # list_json_files = []
158
+ # for thread_folder in os.listdir(output_file):
159
+ # for json_file in os.listdir(os.path.join(output_file, thread_folder)):
160
+ # list_json_files.append(os.path.join(output_file, thread_folder, json_file))
161
+
162
+ # output_json_file = './output_chinese.json'
163
+ # with open(output_json_file, 'w') as f:
164
+ # for json_file in list_json_files:
165
+ # with open(json_file, 'r') as f_json:
166
+ # json.dump(json.load(f_json), f)
167
+ # f.write('\n')
a_mllm_notebooks/openai/infer.sh ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ eval "$(conda shell.bash hook)"
2
+ conda activate lmdeploy
3
+
4
+ python infer.py &
a_mllm_notebooks/openai/langchain_openai_api.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
a_mllm_notebooks/openai/load_synth_pedes.ipynb ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import datasets, huggingface_hub\n",
10
+ "# huggingface_hub.login(token=\"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\")\n",
11
+ "disk_path ='/dscilab_dungvo/workspace/BA-PRE_THESIS/dataset_pretraining/SYNTH-PEDES/annotation_english_vietnamese_processed'\n",
12
+ "dataset = datasets.load_from_disk(disk_path)\n",
13
+ "# dataset = dataset.cast_column('image', datasets.Image(decode=True))"
14
+ ]
15
+ },
16
+ {
17
+ "cell_type": "code",
18
+ "execution_count": 5,
19
+ "metadata": {},
20
+ "outputs": [
21
+ {
22
+ "data": {
23
+ "text/plain": [
24
+ "Dataset({\n",
25
+ " features: ['image_name', 'person_id', 'caption_0', 'caption_1', 'attributes', 'prompt_caption', 'image', 'viet_captions', 'viet_prompt_caption'],\n",
26
+ " num_rows: 4791127\n",
27
+ "})"
28
+ ]
29
+ },
30
+ "execution_count": 5,
31
+ "metadata": {},
32
+ "output_type": "execute_result"
33
+ }
34
+ ],
35
+ "source": [
36
+ "dataset"
37
+ ]
38
+ },
39
+ {
40
+ "cell_type": "code",
41
+ "execution_count": 4,
42
+ "metadata": {},
43
+ "outputs": [
44
+ {
45
+ "data": {
46
+ "text/plain": [
47
+ "{'image_name': 'Part1/1/0.jpg',\n",
48
+ " 'person_id': 1,\n",
49
+ " 'caption_0': 'A woman with black hair and she is wearing a black jacket with blue jeans paired with black shoes.',\n",
50
+ " 'caption_1': '',\n",
51
+ " 'attributes': 'woman,short hair,black jacket,blue denim jeans,black sneakers,black backpack',\n",
52
+ " 'prompt_caption': 'The woman has short hair. She is wearing a black jacket, blue denim jeans and black sneakers. She is carrying a black backpack. ',\n",
53
+ " 'image': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=59x129>,\n",
54
+ " 'viet_captions': ['Một người phụ nữ với mái tóc đen và cô ấy đang mặc một chiếc áo khoác màu đen với quần jean màu xanh kết hợp với giày đen.'],\n",
55
+ " 'viet_prompt_caption': ['Người phụ nữ có mái tóc ngắn. Cô đang mặc một chiếc áo khoác màu đen, quần jean denim màu xanh và giày thể thao màu đen. Cô đang mang theo một ba lô màu đen.']}"
56
+ ]
57
+ },
58
+ "execution_count": 4,
59
+ "metadata": {},
60
+ "output_type": "execute_result"
61
+ }
62
+ ],
63
+ "source": [
64
+ "dataset[0]"
65
+ ]
66
+ },
67
+ {
68
+ "cell_type": "code",
69
+ "execution_count": null,
70
+ "metadata": {},
71
+ "outputs": [],
72
+ "source": []
73
+ }
74
+ ],
75
+ "metadata": {
76
+ "kernelspec": {
77
+ "display_name": "Python 3 (ipykernel)",
78
+ "language": "python",
79
+ "name": "python3"
80
+ },
81
+ "language_info": {
82
+ "codemirror_mode": {
83
+ "name": "ipython",
84
+ "version": 3
85
+ },
86
+ "file_extension": ".py",
87
+ "mimetype": "text/x-python",
88
+ "name": "python",
89
+ "nbconvert_exporter": "python",
90
+ "pygments_lexer": "ipython3",
91
+ "version": "3.12.2"
92
+ }
93
+ },
94
+ "nbformat": 4,
95
+ "nbformat_minor": 4
96
+ }
a_mllm_notebooks/openai/ping_server.ipynb ADDED
@@ -0,0 +1,416 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "# !pip install openai\n",
10
+ "from openai import OpenAI\n",
11
+ "\n",
12
+ "client = OpenAI(api_key=\"YOUR_API_KEY\", base_url=\"http://0.0.0.0:8092/v1\")\n",
13
+ "model_name = client.models.list().data[0].id\n",
14
+ "# response = client.chat.completions.create(\n",
15
+ "# model=model_name,\n",
16
+ "# messages=[\n",
17
+ "# {\n",
18
+ "# \"role\": \"system\",\n",
19
+ "# \"content\": \"You are a helpful assistant who is proficient in translating English to Chinese.\",\n",
20
+ "# },\n",
21
+ "# {\n",
22
+ "# \"role\": \"user\",\n",
23
+ "# \"content\": \"Please translate and paraphrase the following sentence into natural, fluent Chinese: \",\n",
24
+ "# },\n",
25
+ "# ],\n",
26
+ "# temperature=0.8,\n",
27
+ "# top_p=0.9,\n",
28
+ "# )\n",
29
+ "# print(response)"
30
+ ]
31
+ },
32
+ {
33
+ "cell_type": "code",
34
+ "execution_count": 2,
35
+ "metadata": {},
36
+ "outputs": [
37
+ {
38
+ "data": {
39
+ "text/plain": [
40
+ "1"
41
+ ]
42
+ },
43
+ "execution_count": 2,
44
+ "metadata": {},
45
+ "output_type": "execute_result"
46
+ }
47
+ ],
48
+ "source": [
49
+ "len(client.models.list().data)"
50
+ ]
51
+ },
52
+ {
53
+ "cell_type": "code",
54
+ "execution_count": 3,
55
+ "metadata": {},
56
+ "outputs": [
57
+ {
58
+ "data": {
59
+ "text/plain": [
60
+ "'这个人穿着红色的衬衫和蓝色的牛仔裤。'"
61
+ ]
62
+ },
63
+ "execution_count": 3,
64
+ "metadata": {},
65
+ "output_type": "execute_result"
66
+ }
67
+ ],
68
+ "source": [
69
+ "def get_output(english_text):\n",
70
+ " response = client.chat.completions.create(\n",
71
+ " model=model_name,\n",
72
+ " messages=[\n",
73
+ " {\n",
74
+ " \"role\": \"system\",\n",
75
+ " \"content\": \"You are a helpful assistant who is proficient in translating English to Chinese.\",\n",
76
+ " },\n",
77
+ " {\n",
78
+ " \"role\": \"user\",\n",
79
+ " \"content\": \"Please translate and paraphrase the following sentence into natural, fluent Chinese: \" + english_text,\n",
80
+ " },\n",
81
+ " ],\n",
82
+ " temperature=0.7,\n",
83
+ " top_p=0.9,\n",
84
+ " )\n",
85
+ " return response.choices[0].message.content\n",
86
+ "\n",
87
+ "o = get_output(\"The man is wearing a red shirt and blue jeans.\" * 1)\n",
88
+ "o"
89
+ ]
90
+ },
91
+ {
92
+ "cell_type": "code",
93
+ "execution_count": 13,
94
+ "metadata": {},
95
+ "outputs": [
96
+ {
97
+ "data": {
98
+ "text/plain": [
99
+ "'```\\nparaphrase:\\n- 1: The man is walking. He is wearing a dark grey jacket that fits closely. His black trousers are tight. He is wearing brown shoes.\\n- 2: As he walks, the man is dressed in a dark grey jacket that hugs his body. His black trousers are snug, and he is wearing brown shoes.\\n- 3: The man is out for a walk, wearing a dark grey jacket that fits snugly. His black trousers are tight, and he is wearing brown shoes.\\n```'"
100
+ ]
101
+ },
102
+ "execution_count": 13,
103
+ "metadata": {},
104
+ "output_type": "execute_result"
105
+ }
106
+ ],
107
+ "source": [
108
+ "def get_output(english_text):\n",
109
+ " response = client.chat.completions.create(\n",
110
+ " model=model_name,\n",
111
+ " messages=[\n",
112
+ " {\n",
113
+ " \"role\": \"system\",\n",
114
+ " \"content\": \"You are a helpful assistant who is proficient in paraphrase text\",\n",
115
+ " },\n",
116
+ " {\n",
117
+ " \"role\": \"user\",\n",
118
+ " \"content\": english_text,\n",
119
+ " },\n",
120
+ " ],\n",
121
+ " temperature=0.7,\n",
122
+ " top_p=0.9,\n",
123
+ " )\n",
124
+ " return response.choices[0].message.content\n",
125
+ "\n",
126
+ "o = get_output('''\n",
127
+ "Please correct the grammar of this text, then paraphrase it into 3 different ways:\n",
128
+ "The man is walking. He is wearing a dark grey jacket that is close. His trousers is black. He has a pair of brown shoes. \n",
129
+ "The man is walking. He is wearing a dark grey jacket that is close. His trousers is black. He has a pair of brown shoes.\n",
130
+ "The man is walking. He is wearing a dark grey jacket that is close. His trousers is black. He has a pair of brown shoes.\n",
131
+ "The man is walking. He is wearing a dark grey jacket that is close. His trousers is black. He has a pair of brown shoes.\n",
132
+ "The man is walking. He is wearing a dark grey jacket that is close. His trousers is black. He has a pair of brown shoes.\n",
133
+ "The man is walking. He is wearing a dark grey jacket that is close. His trousers is black. He has a pair of brown shoes.\n",
134
+ "The man is walking. He is wearing a dark grey jacket that is close. His trousers is black. He has a pair of brown shoes.\n",
135
+ "\n",
136
+ "\n",
137
+ "Return your answer in YAML format without comment or explaining anything.\n",
138
+ "Example:\n",
139
+ "```\n",
140
+ "paraphrase:\n",
141
+ "- 1: ...\n",
142
+ "- 2: ...\n",
143
+ "- 3: ...\n",
144
+ "```\n",
145
+ "''')\n",
146
+ "o"
147
+ ]
148
+ },
149
+ {
150
+ "cell_type": "code",
151
+ "execution_count": 15,
152
+ "metadata": {},
153
+ "outputs": [
154
+ {
155
+ "name": "stdout",
156
+ "output_type": "stream",
157
+ "text": [
158
+ "1.06 s ± 39.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
159
+ ]
160
+ }
161
+ ],
162
+ "source": [
163
+ "%%timeit\n",
164
+ "o = get_output('''\n",
165
+ "Please correct the grammar of this text, then paraphrase it into 3 different ways:\n",
166
+ "The man is walking. He is wearing a dark grey jacket that is close. His trousers is black. He has a pair of brown shoes.\n",
167
+ "The man is walking. He is wearing a dark grey jacket that is close. His trousers is black. He has a pair of brown shoes.\n",
168
+ "The man is walking. He is wearing a dark grey jacket that is close. His trousers is black. He has a pair of brown shoes.\n",
169
+ "The man is walking. He is wearing a dark grey jacket that is close. His trousers is black. He has a pair of brown shoes.\n",
170
+ "\n",
171
+ "Return your answer in YAML format without comment or explaining anything.\n",
172
+ "Example:\n",
173
+ "```\n",
174
+ "paraphrase:\n",
175
+ "- 1: ...\n",
176
+ "- 2: ...\n",
177
+ "- 3: ...\n",
178
+ "```\n",
179
+ "''')\n",
180
+ "o"
181
+ ]
182
+ },
183
+ {
184
+ "cell_type": "code",
185
+ "execution_count": 9,
186
+ "metadata": {},
187
+ "outputs": [
188
+ {
189
+ "data": {
190
+ "text/plain": [
191
+ "{'paraphrase': [{1: 'The man is walking in a dark grey jacket and black trousers, wearing a pair of brown shoes.'},\n",
192
+ " {2: 'As he walks, the man is dressed in a dark grey jacket, black trousers, and brown shoes.'},\n",
193
+ " {3: 'The man is on his walk, wearing a dark grey jacket, black trousers, and brown shoes.'}]}"
194
+ ]
195
+ },
196
+ "execution_count": 9,
197
+ "metadata": {},
198
+ "output_type": "execute_result"
199
+ }
200
+ ],
201
+ "source": [
202
+ "import yaml\n",
203
+ "\n",
204
+ "def load_yaml_string(yaml_string):\n",
205
+ " # Remove the triple backticks and any leading/trailing whitespace\n",
206
+ " yaml_string = yaml_string.strip('```').strip()\n",
207
+ " \n",
208
+ " # Load the YAML string into a Python dictionary\n",
209
+ " data = yaml.safe_load(yaml_string)\n",
210
+ " \n",
211
+ " return data\n",
212
+ "load_yaml_string(o)"
213
+ ]
214
+ },
215
+ {
216
+ "cell_type": "code",
217
+ "execution_count": 5,
218
+ "metadata": {},
219
+ "outputs": [
220
+ {
221
+ "data": {
222
+ "text/plain": [
223
+ "21"
224
+ ]
225
+ },
226
+ "execution_count": 5,
227
+ "metadata": {},
228
+ "output_type": "execute_result"
229
+ }
230
+ ],
231
+ "source": []
232
+ },
233
+ {
234
+ "cell_type": "code",
235
+ "execution_count": 1,
236
+ "metadata": {},
237
+ "outputs": [],
238
+ "source": [
239
+ "# !ps aux|grep infer|grep -v grep | awk '{print $2}'|xargs kill -9"
240
+ ]
241
+ },
242
+ {
243
+ "cell_type": "code",
244
+ "execution_count": 5,
245
+ "metadata": {},
246
+ "outputs": [
247
+ {
248
+ "ename": "APIConnectionError",
249
+ "evalue": "Connection error.",
250
+ "output_type": "error",
251
+ "traceback": [
252
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
253
+ "\u001b[0;31mConnectError\u001b[0m Traceback (most recent call last)",
254
+ "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/httpx/_transports/default.py:101\u001b[0m, in \u001b[0;36mmap_httpcore_exceptions\u001b[0;34m()\u001b[0m\n\u001b[1;32m 100\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 101\u001b[0m \u001b[38;5;28;01myield\u001b[39;00m\n\u001b[1;32m 102\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n",
255
+ "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/httpx/_transports/default.py:250\u001b[0m, in \u001b[0;36mHTTPTransport.handle_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m 249\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m map_httpcore_exceptions():\n\u001b[0;32m--> 250\u001b[0m resp \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_pool\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mhandle_request\u001b[49m\u001b[43m(\u001b[49m\u001b[43mreq\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 252\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(resp\u001b[38;5;241m.\u001b[39mstream, typing\u001b[38;5;241m.\u001b[39mIterable)\n",
256
+ "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/httpcore/_sync/connection_pool.py:256\u001b[0m, in \u001b[0;36mConnectionPool.handle_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m 255\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_close_connections(closing)\n\u001b[0;32m--> 256\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exc \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 258\u001b[0m \u001b[38;5;66;03m# Return the response. Note that in this case we still have to manage\u001b[39;00m\n\u001b[1;32m 259\u001b[0m \u001b[38;5;66;03m# the point at which the response is closed.\u001b[39;00m\n",
257
+ "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/httpcore/_sync/connection_pool.py:236\u001b[0m, in \u001b[0;36mConnectionPool.handle_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m 234\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 235\u001b[0m \u001b[38;5;66;03m# Send the request on the assigned connection.\u001b[39;00m\n\u001b[0;32m--> 236\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mconnection\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mhandle_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 237\u001b[0m \u001b[43m \u001b[49m\u001b[43mpool_request\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\n\u001b[1;32m 238\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 239\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ConnectionNotAvailable:\n\u001b[1;32m 240\u001b[0m \u001b[38;5;66;03m# In some cases a connection may initially be available to\u001b[39;00m\n\u001b[1;32m 241\u001b[0m \u001b[38;5;66;03m# handle a request, but then become unavailable.\u001b[39;00m\n\u001b[1;32m 242\u001b[0m \u001b[38;5;66;03m#\u001b[39;00m\n\u001b[1;32m 243\u001b[0m \u001b[38;5;66;03m# In this case we clear the connection and try again.\u001b[39;00m\n",
258
+ "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/httpcore/_sync/connection.py:101\u001b[0m, in \u001b[0;36mHTTPConnection.handle_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m 100\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_connect_failed \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[0;32m--> 101\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exc\n\u001b[1;32m 103\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_connection\u001b[38;5;241m.\u001b[39mhandle_request(request)\n",
259
+ "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/httpcore/_sync/connection.py:78\u001b[0m, in \u001b[0;36mHTTPConnection.handle_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m 77\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_connection \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m---> 78\u001b[0m stream \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_connect\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 80\u001b[0m ssl_object \u001b[38;5;241m=\u001b[39m stream\u001b[38;5;241m.\u001b[39mget_extra_info(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mssl_object\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
260
+ "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/httpcore/_sync/connection.py:124\u001b[0m, in \u001b[0;36mHTTPConnection._connect\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m Trace(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mconnect_tcp\u001b[39m\u001b[38;5;124m\"\u001b[39m, logger, request, kwargs) \u001b[38;5;28;01mas\u001b[39;00m trace:\n\u001b[0;32m--> 124\u001b[0m stream \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_network_backend\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconnect_tcp\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 125\u001b[0m trace\u001b[38;5;241m.\u001b[39mreturn_value \u001b[38;5;241m=\u001b[39m stream\n",
261
+ "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/httpcore/_backends/sync.py:207\u001b[0m, in \u001b[0;36mSyncBackend.connect_tcp\u001b[0;34m(self, host, port, timeout, local_address, socket_options)\u001b[0m\n\u001b[1;32m 202\u001b[0m exc_map: ExceptionMapping \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 203\u001b[0m socket\u001b[38;5;241m.\u001b[39mtimeout: ConnectTimeout,\n\u001b[1;32m 204\u001b[0m \u001b[38;5;167;01mOSError\u001b[39;00m: ConnectError,\n\u001b[1;32m 205\u001b[0m }\n\u001b[0;32m--> 207\u001b[0m \u001b[43m\u001b[49m\u001b[38;5;28;43;01mwith\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mmap_exceptions\u001b[49m\u001b[43m(\u001b[49m\u001b[43mexc_map\u001b[49m\u001b[43m)\u001b[49m\u001b[43m:\u001b[49m\n\u001b[1;32m 208\u001b[0m \u001b[43m \u001b[49m\u001b[43msock\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43msocket\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcreate_connection\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 209\u001b[0m \u001b[43m \u001b[49m\u001b[43maddress\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 210\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 211\u001b[0m \u001b[43m \u001b[49m\u001b[43msource_address\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msource_address\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 212\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
262
+ "File \u001b[0;32m/usr/lib/python3.11/contextlib.py:155\u001b[0m, in \u001b[0;36m_GeneratorContextManager.__exit__\u001b[0;34m(self, typ, value, traceback)\u001b[0m\n\u001b[1;32m 154\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 155\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgen\u001b[38;5;241m.\u001b[39mthrow(typ, value, traceback)\n\u001b[1;32m 156\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[1;32m 157\u001b[0m \u001b[38;5;66;03m# Suppress StopIteration *unless* it's the same exception that\u001b[39;00m\n\u001b[1;32m 158\u001b[0m \u001b[38;5;66;03m# was passed to throw(). This prevents a StopIteration\u001b[39;00m\n\u001b[1;32m 159\u001b[0m \u001b[38;5;66;03m# raised inside the \"with\" statement from being suppressed.\u001b[39;00m\n",
263
+ "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/httpcore/_exceptions.py:14\u001b[0m, in \u001b[0;36mmap_exceptions\u001b[0;34m(map)\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(exc, from_exc):\n\u001b[0;32m---> 14\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m to_exc(exc) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mexc\u001b[39;00m\n\u001b[1;32m 15\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m\n",
264
+ "\u001b[0;31mConnectError\u001b[0m: [Errno 111] Connection refused",
265
+ "\nThe above exception was the direct cause of the following exception:\n",
266
+ "\u001b[0;31mConnectError\u001b[0m Traceback (most recent call last)",
267
+ "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/openai/_base_client.py:993\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m 992\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 993\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_client\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 994\u001b[0m \u001b[43m \u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 995\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_should_stream_response_body\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 996\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 997\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 998\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m httpx\u001b[38;5;241m.\u001b[39mTimeoutException \u001b[38;5;28;01mas\u001b[39;00m err:\n",
268
+ "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/httpx/_client.py:914\u001b[0m, in \u001b[0;36mClient.send\u001b[0;34m(self, request, stream, auth, follow_redirects)\u001b[0m\n\u001b[1;32m 912\u001b[0m auth \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_build_request_auth(request, auth)\n\u001b[0;32m--> 914\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_send_handling_auth\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 915\u001b[0m \u001b[43m \u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 916\u001b[0m \u001b[43m \u001b[49m\u001b[43mauth\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mauth\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 917\u001b[0m \u001b[43m \u001b[49m\u001b[43mfollow_redirects\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfollow_redirects\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 918\u001b[0m \u001b[43m \u001b[49m\u001b[43mhistory\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 919\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 920\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n",
269
+ "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/httpx/_client.py:942\u001b[0m, in \u001b[0;36mClient._send_handling_auth\u001b[0;34m(self, request, auth, follow_redirects, history)\u001b[0m\n\u001b[1;32m 941\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m--> 942\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_send_handling_redirects\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 943\u001b[0m \u001b[43m \u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 944\u001b[0m \u001b[43m \u001b[49m\u001b[43mfollow_redirects\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfollow_redirects\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 945\u001b[0m \u001b[43m \u001b[49m\u001b[43mhistory\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhistory\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 946\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 947\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n",
270
+ "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/httpx/_client.py:979\u001b[0m, in \u001b[0;36mClient._send_handling_redirects\u001b[0;34m(self, request, follow_redirects, history)\u001b[0m\n\u001b[1;32m 977\u001b[0m hook(request)\n\u001b[0;32m--> 979\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_send_single_request\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 980\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n",
271
+ "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/httpx/_client.py:1014\u001b[0m, in \u001b[0;36mClient._send_single_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m 1013\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m request_context(request\u001b[38;5;241m=\u001b[39mrequest):\n\u001b[0;32m-> 1014\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mtransport\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mhandle_request\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1016\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(response\u001b[38;5;241m.\u001b[39mstream, SyncByteStream)\n",
272
+ "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/httpx/_transports/default.py:249\u001b[0m, in \u001b[0;36mHTTPTransport.handle_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m 237\u001b[0m req \u001b[38;5;241m=\u001b[39m httpcore\u001b[38;5;241m.\u001b[39mRequest(\n\u001b[1;32m 238\u001b[0m method\u001b[38;5;241m=\u001b[39mrequest\u001b[38;5;241m.\u001b[39mmethod,\n\u001b[1;32m 239\u001b[0m url\u001b[38;5;241m=\u001b[39mhttpcore\u001b[38;5;241m.\u001b[39mURL(\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 247\u001b[0m extensions\u001b[38;5;241m=\u001b[39mrequest\u001b[38;5;241m.\u001b[39mextensions,\n\u001b[1;32m 248\u001b[0m )\n\u001b[0;32m--> 249\u001b[0m \u001b[43m\u001b[49m\u001b[38;5;28;43;01mwith\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mmap_httpcore_exceptions\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m:\u001b[49m\n\u001b[1;32m 250\u001b[0m \u001b[43m \u001b[49m\u001b[43mresp\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_pool\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mhandle_request\u001b[49m\u001b[43m(\u001b[49m\u001b[43mreq\u001b[49m\u001b[43m)\u001b[49m\n",
273
+ "File \u001b[0;32m/usr/lib/python3.11/contextlib.py:155\u001b[0m, in \u001b[0;36m_GeneratorContextManager.__exit__\u001b[0;34m(self, typ, value, traceback)\u001b[0m\n\u001b[1;32m 154\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 155\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgen\u001b[38;5;241m.\u001b[39mthrow(typ, value, traceback)\n\u001b[1;32m 156\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[1;32m 157\u001b[0m \u001b[38;5;66;03m# Suppress StopIteration *unless* it's the same exception that\u001b[39;00m\n\u001b[1;32m 158\u001b[0m \u001b[38;5;66;03m# was passed to throw(). This prevents a StopIteration\u001b[39;00m\n\u001b[1;32m 159\u001b[0m \u001b[38;5;66;03m# raised inside the \"with\" statement from being suppressed.\u001b[39;00m\n",
274
+ "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/httpx/_transports/default.py:118\u001b[0m, in \u001b[0;36mmap_httpcore_exceptions\u001b[0;34m()\u001b[0m\n\u001b[1;32m 117\u001b[0m message \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mstr\u001b[39m(exc)\n\u001b[0;32m--> 118\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m mapped_exc(message) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mexc\u001b[39;00m\n",
275
+ "\u001b[0;31mConnectError\u001b[0m: [Errno 111] Connection refused",
276
+ "\nThe above exception was the direct cause of the following exception:\n",
277
+ "\u001b[0;31mAPIConnectionError\u001b[0m Traceback (most recent call last)",
278
+ "Cell \u001b[0;32mIn[5], line 6\u001b[0m\n\u001b[1;32m 3\u001b[0m port \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m2000\u001b[39m\n\u001b[1;32m 5\u001b[0m client \u001b[38;5;241m=\u001b[39m OpenAI(api_key\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mYOUR_API_KEY\u001b[39m\u001b[38;5;124m\"\u001b[39m, base_url\u001b[38;5;241m=\u001b[39m\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhttp://0.0.0.0:\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mport\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/v1\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m----> 6\u001b[0m model_name \u001b[38;5;241m=\u001b[39m \u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodels\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlist\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mdata[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39mid\n\u001b[1;32m 7\u001b[0m response \u001b[38;5;241m=\u001b[39m client\u001b[38;5;241m.\u001b[39mchat\u001b[38;5;241m.\u001b[39mcompletions\u001b[38;5;241m.\u001b[39mcreate(\n\u001b[1;32m 8\u001b[0m model\u001b[38;5;241m=\u001b[39mmodel_name,\n\u001b[1;32m 9\u001b[0m messages\u001b[38;5;241m=\u001b[39m[\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 27\u001b[0m top_p\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0.8\u001b[39m,\n\u001b[1;32m 28\u001b[0m )\n\u001b[1;32m 29\u001b[0m \u001b[38;5;28mprint\u001b[39m(response)\n",
279
+ "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/openai/resources/models.py:91\u001b[0m, in \u001b[0;36mModels.list\u001b[0;34m(self, extra_headers, extra_query, extra_body, timeout)\u001b[0m\n\u001b[1;32m 77\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mlist\u001b[39m(\n\u001b[1;32m 78\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 79\u001b[0m \u001b[38;5;241m*\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 85\u001b[0m timeout: \u001b[38;5;28mfloat\u001b[39m \u001b[38;5;241m|\u001b[39m httpx\u001b[38;5;241m.\u001b[39mTimeout \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m|\u001b[39m NotGiven \u001b[38;5;241m=\u001b[39m NOT_GIVEN,\n\u001b[1;32m 86\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m SyncPage[Model]:\n\u001b[1;32m 87\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 88\u001b[0m \u001b[38;5;124;03m Lists the currently available models, and provides basic information about each\u001b[39;00m\n\u001b[1;32m 89\u001b[0m \u001b[38;5;124;03m one such as the owner and availability.\u001b[39;00m\n\u001b[1;32m 90\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 91\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_api_list\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 92\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/models\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 93\u001b[0m \u001b[43m \u001b[49m\u001b[43mpage\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mSyncPage\u001b[49m\u001b[43m[\u001b[49m\u001b[43mModel\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 94\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmake_request_options\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 95\u001b[0m \u001b[43m \u001b[49m\u001b[43mextra_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_headers\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_query\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_query\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_body\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_body\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\n\u001b[1;32m 96\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 97\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mModel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 98\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
280
+ "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/openai/_base_client.py:1329\u001b[0m, in \u001b[0;36mSyncAPIClient.get_api_list\u001b[0;34m(self, path, model, page, body, options, method)\u001b[0m\n\u001b[1;32m 1318\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget_api_list\u001b[39m(\n\u001b[1;32m 1319\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 1320\u001b[0m path: \u001b[38;5;28mstr\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1326\u001b[0m method: \u001b[38;5;28mstr\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mget\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 1327\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m SyncPageT:\n\u001b[1;32m 1328\u001b[0m opts \u001b[38;5;241m=\u001b[39m FinalRequestOptions\u001b[38;5;241m.\u001b[39mconstruct(method\u001b[38;5;241m=\u001b[39mmethod, url\u001b[38;5;241m=\u001b[39mpath, json_data\u001b[38;5;241m=\u001b[39mbody, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39moptions)\n\u001b[0;32m-> 1329\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request_api_list\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpage\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mopts\u001b[49m\u001b[43m)\u001b[49m\n",
281
+ "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/openai/_base_client.py:1180\u001b[0m, in \u001b[0;36mSyncAPIClient._request_api_list\u001b[0;34m(self, model, page, options)\u001b[0m\n\u001b[1;32m 1176\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m resp\n\u001b[1;32m 1178\u001b[0m options\u001b[38;5;241m.\u001b[39mpost_parser \u001b[38;5;241m=\u001b[39m _parser\n\u001b[0;32m-> 1180\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpage\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\n",
282
+ "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/openai/_base_client.py:957\u001b[0m, in \u001b[0;36mSyncAPIClient.request\u001b[0;34m(self, cast_to, options, remaining_retries, stream, stream_cls)\u001b[0m\n\u001b[1;32m 954\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 955\u001b[0m retries_taken \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[0;32m--> 957\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 958\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 959\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 960\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 961\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 962\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 963\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
283
+ "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/openai/_base_client.py:1017\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1014\u001b[0m log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mEncountered Exception\u001b[39m\u001b[38;5;124m\"\u001b[39m, exc_info\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 1016\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m remaining_retries \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[0;32m-> 1017\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_retry_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1018\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1019\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1020\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1021\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1022\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1023\u001b[0m \u001b[43m \u001b[49m\u001b[43mresponse_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 1024\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1026\u001b[0m log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRaising connection error\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 1027\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m APIConnectionError(request\u001b[38;5;241m=\u001b[39mrequest) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merr\u001b[39;00m\n",
284
+ "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/openai/_base_client.py:1095\u001b[0m, in \u001b[0;36mSyncAPIClient._retry_request\u001b[0;34m(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1091\u001b[0m \u001b[38;5;66;03m# In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a\u001b[39;00m\n\u001b[1;32m 1092\u001b[0m \u001b[38;5;66;03m# different thread if necessary.\u001b[39;00m\n\u001b[1;32m 1093\u001b[0m time\u001b[38;5;241m.\u001b[39msleep(timeout)\n\u001b[0;32m-> 1095\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1096\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1097\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1098\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1099\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1100\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1101\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
285
+ "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/openai/_base_client.py:1017\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1014\u001b[0m log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mEncountered Exception\u001b[39m\u001b[38;5;124m\"\u001b[39m, exc_info\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 1016\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m remaining_retries \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[0;32m-> 1017\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_retry_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1018\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1019\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1020\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1021\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1022\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1023\u001b[0m \u001b[43m \u001b[49m\u001b[43mresponse_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 1024\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1026\u001b[0m log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRaising connection error\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 1027\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m APIConnectionError(request\u001b[38;5;241m=\u001b[39mrequest) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merr\u001b[39;00m\n",
286
+ "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/openai/_base_client.py:1095\u001b[0m, in \u001b[0;36mSyncAPIClient._retry_request\u001b[0;34m(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1091\u001b[0m \u001b[38;5;66;03m# In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a\u001b[39;00m\n\u001b[1;32m 1092\u001b[0m \u001b[38;5;66;03m# different thread if necessary.\u001b[39;00m\n\u001b[1;32m 1093\u001b[0m time\u001b[38;5;241m.\u001b[39msleep(timeout)\n\u001b[0;32m-> 1095\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1096\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1097\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1098\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1099\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1100\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1101\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
287
+ "File \u001b[0;32m/usr/local/lib/python3.11/dist-packages/openai/_base_client.py:1027\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1017\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_retry_request(\n\u001b[1;32m 1018\u001b[0m input_options,\n\u001b[1;32m 1019\u001b[0m cast_to,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1023\u001b[0m response_headers\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 1024\u001b[0m )\n\u001b[1;32m 1026\u001b[0m log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRaising connection error\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 1027\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m APIConnectionError(request\u001b[38;5;241m=\u001b[39mrequest) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merr\u001b[39;00m\n\u001b[1;32m 1029\u001b[0m log\u001b[38;5;241m.\u001b[39mdebug(\n\u001b[1;32m 1030\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mHTTP Response: \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m%i\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 1031\u001b[0m request\u001b[38;5;241m.\u001b[39mmethod,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1035\u001b[0m response\u001b[38;5;241m.\u001b[39mheaders,\n\u001b[1;32m 1036\u001b[0m )\n\u001b[1;32m 1037\u001b[0m log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrequest_id: \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m, response\u001b[38;5;241m.\u001b[39mheaders\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mx-request-id\u001b[39m\u001b[38;5;124m\"\u001b[39m))\n",
288
+ "\u001b[0;31mAPIConnectionError\u001b[0m: Connection error."
289
+ ]
290
+ }
291
+ ],
292
+ "source": [
293
+ "from openai import OpenAI\n",
294
+ "\n",
295
+ "port = 2000\n",
296
+ "\n",
297
+ "client = OpenAI(api_key=\"YOUR_API_KEY\", base_url=f\"http://0.0.0.0:{port}/v1\")\n",
298
+ "model_name = client.models.list().data[0].id\n",
299
+ "response = client.chat.completions.create(\n",
300
+ " model=model_name,\n",
301
+ " messages=[\n",
302
+ " {\n",
303
+ " \"role\": \"user\",\n",
304
+ " \"content\": [\n",
305
+ " {\n",
306
+ " \"type\": \"text\",\n",
307
+ " \"text\": \"Miêu tả bức tranh giùm coi\",\n",
308
+ " },\n",
309
+ " {\n",
310
+ " \"type\": \"image_url\",\n",
311
+ " \"image_url\": {\n",
312
+ " \"url\": \"https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/tests/data/tiger.jpeg\",\n",
313
+ " },\n",
314
+ " },\n",
315
+ " ],\n",
316
+ " }\n",
317
+ " ],\n",
318
+ " temperature=0.8,\n",
319
+ " top_p=0.8,\n",
320
+ ")\n",
321
+ "print(response)"
322
+ ]
323
+ },
324
+ {
325
+ "cell_type": "code",
326
+ "execution_count": null,
327
+ "metadata": {},
328
+ "outputs": [],
329
+ "source": [
330
+ "model_name"
331
+ ]
332
+ },
333
+ {
334
+ "cell_type": "code",
335
+ "execution_count": null,
336
+ "metadata": {},
337
+ "outputs": [],
338
+ "source": [
339
+ "response.choices[0].message.content"
340
+ ]
341
+ },
342
+ {
343
+ "cell_type": "code",
344
+ "execution_count": null,
345
+ "metadata": {},
346
+ "outputs": [],
347
+ "source": []
348
+ },
349
+ {
350
+ "cell_type": "code",
351
+ "execution_count": 12,
352
+ "metadata": {},
353
+ "outputs": [
354
+ {
355
+ "name": "stderr",
356
+ "output_type": "stream",
357
+ "text": [
358
+ " % Total % Received % Xferd Average Speed Time Time Time Current\n",
359
+ " Dload Upload Total Spent Left Speed\n",
360
+ "100 617 100 404 100 213 5970 3147 --:--:-- --:--:-- --:--:-- 9208\n"
361
+ ]
362
+ },
363
+ {
364
+ "name": "stdout",
365
+ "output_type": "stream",
366
+ "text": [
367
+ "{\"id\":\"chatcmpl-8b3b1360415d4805a44f33bd81fc3447\",\"object\":\"chat.completion\",\"created\":1734879441,\"model\":\"Qwen/Qwen2.5-1.5B-Instruct\",\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"巴黎\",\"tool_calls\":[]},\"logprobs\":null,\"finish_reason\":\"stop\",\"stop_reason\":null}],\"usage\":{\"prompt_tokens\":48,\"total_tokens\":50,\"completion_tokens\":2,\"prompt_tokens_details\":null},\"prompt_logprobs\":null}"
368
+ ]
369
+ }
370
+ ],
371
+ "source": [
372
+ "%%bash\n",
373
+ "# Call the server using curl:\n",
374
+ "curl -X POST \"http://localhost:8000/v1/chat/completions\" \\\n",
375
+ "\t-H \"Content-Type: application/json\" \\\n",
376
+ "\t--data '{\n",
377
+ "\t\t\"model\": \"Qwen/Qwen2.5-1.5B-Instruct\",\n",
378
+ "\t\t\"messages\": [\n",
379
+ "\t\t\t{\n",
380
+ "\t\t\t\t\"role\": \"user\",\n",
381
+ "\t\t\t\t\"content\": \"What is the capital of France? You must answer in Chinese without adding any comment or explanation.\"\n",
382
+ "\t\t\t}\n",
383
+ "\t\t]\n",
384
+ "\t}'"
385
+ ]
386
+ },
387
+ {
388
+ "cell_type": "code",
389
+ "execution_count": null,
390
+ "metadata": {},
391
+ "outputs": [],
392
+ "source": []
393
+ }
394
+ ],
395
+ "metadata": {
396
+ "kernelspec": {
397
+ "display_name": "lmdeploy",
398
+ "language": "python",
399
+ "name": "lmdeploy"
400
+ },
401
+ "language_info": {
402
+ "codemirror_mode": {
403
+ "name": "ipython",
404
+ "version": 3
405
+ },
406
+ "file_extension": ".py",
407
+ "mimetype": "text/x-python",
408
+ "name": "python",
409
+ "nbconvert_exporter": "python",
410
+ "pygments_lexer": "ipython3",
411
+ "version": "3.8.19"
412
+ }
413
+ },
414
+ "nbformat": 4,
415
+ "nbformat_minor": 4
416
+ }
a_mllm_notebooks/openai/proxy.sh ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ eval "$(conda shell.bash hook)"
2
+ conda activate lmdeploy
3
+
4
+ MODEL_NAME=Qwen/Qwen2.5-1.5B-Instruct-AWQ
5
+
6
+
7
+ PROXY_URL=0.0.0.0
8
+ lmdeploy serve proxy --server-name $PROXY_URL --server-port 7089 --strategy \
9
+ "min_expected_latency" \
10
+ &
a_mllm_notebooks/openai/serve.sh ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ eval "$(conda shell.bash hook)"
2
+ conda activate lmdeploy
3
+ # # MODEL_NAME=OpenGVLab/InternVL2_5-8B-AWQ
4
+ MODEL_NAME=OpenGVLab/InternVL2_5-4B-MPO-AWQ
5
+
6
+
7
+ PORT_LIST=( $(seq 5011 1 5011) )
8
+ for PORT in "${PORT_LIST[@]}"; do
9
+ # get random device id from 0 to 3
10
+ # RANDOM_DEVICE_ID=$((RANDOM % 3))
11
+ # RANDOM_DEVICE_ID=3
12
+ # CUDA_VISIBLE_DEVICES=0,1 \
13
+ # CUDA_VISIBLE_DEVICES=2,3 \
14
+ CUDA_VISIBLE_DEVICES=0 \
15
+ lmdeploy serve api_server $MODEL_NAME \
16
+ --server-port $PORT \
17
+ --backend turbomind \
18
+ --dtype float16 --proxy-url http://0.0.0.0:7089 \
19
+ --vision-max-batch-size 64 &
20
+ # --cache-max-entry-count 0.4 &
21
+ # --tp 1 &
22
+ # &
23
+ done
24
+
25
+ PORT_LIST=( $(seq 5972 1 5972) )
26
+ for PORT in "${PORT_LIST[@]}"; do
27
+ # get random device id from 0 to 3
28
+ # RANDOM_DEVICE_ID=$((RANDOM % 3))
29
+ # RANDOM_DEVICE_ID=3
30
+ # CUDA_VISIBLE_DEVICES=0,1 \
31
+ # CUDA_VISIBLE_DEVICES=2,3 \
32
+ CUDA_VISIBLE_DEVICES=2 \
33
+ lmdeploy serve api_server $MODEL_NAME \
34
+ --server-port $PORT \
35
+ --backend turbomind \
36
+ --dtype float16 --proxy-url http://0.0.0.0:7089 &
37
+ # --vision-max-batch-size 64 &
38
+ # --cache-max-entry-count 0.4 &
39
+ # --tp 1 &
40
+ # &
41
+ done
42
+
43
+ PORT_LIST=( $(seq 5171 1 5171) )
44
+ for PORT in "${PORT_LIST[@]}"; do
45
+ # get random device id from 0 to 3
46
+ # RANDOM_DEVICE_ID=$((RANDOM % 3))
47
+ # RANDOM_DEVICE_ID=3
48
+ # CUDA_VISIBLE_DEVICES=0,1 \
49
+ # CUDA_VISIBLE_DEVICES=2,3 \
50
+ CUDA_VISIBLE_DEVICES=1 \
51
+ lmdeploy serve api_server $MODEL_NAME \
52
+ --server-port $PORT \
53
+ --backend turbomind \
54
+ --dtype float16 --proxy-url http://0.0.0.0:7089 \
55
+ --vision-max-batch-size 64 &
56
+ # --cache-max-entry-count 0.4 &
57
+ # --tp 1 &
58
+ # &
59
+ done
60
+
a_mllm_notebooks/openai/temp.json ADDED
The diff for this file is too large to render. See raw diff
 
a_mllm_notebooks/openai/temp.sh ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ eval "$(conda shell.bash hook)"
2
+ conda activate lmdeploy
3
+ MODEL_NAME=Qwen/Qwen2.5-1.5B-Instruct-AWQ
4
+ PORT_LIST=( $(seq 3162 1 3162) )
5
+ for PORT in "${PORT_LIST[@]}"; do
6
+ CUDA_VISIBLE_DEVICES=0,1,2,3 \
7
+ lmdeploy serve api_server $MODEL_NAME \
8
+ --server-port $PORT \
9
+ --backend turbomind \
10
+ --dtype float16 --proxy-url http://0.0.0.0:8082 \
11
+ --cache-max-entry-count 0.0075 --tp 3 &
12
+ done
13
+
14
+
15
+ # # PORT_LIST from 3063 to 3099
16
+ # PORT_LIST=( $(seq 9000 1 9000) )
17
+ # # PORT_LIST=(9898)
18
+ # for PORT in "${PORT_LIST[@]}"; do
19
+ # CUDA_VISIBLE_DEVICES=3 \
20
+ # lmdeploy serve api_server $MODEL_NAME \
21
+ # --server-port $PORT \
22
+ # --backend turbomind \
23
+ # --dtype float16 --proxy-url http://0.0.0.0:8082 \
24
+ # --cache-max-entry-count 0.025 --tp 1 &
25
+ # done
a_mllm_notebooks/tensorrt-llm/clone_folder.ipynb ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "# https://github.com/NVIDIA/TensorRT-LLM/tree/main/examples/bert"
10
+ ]
11
+ },
12
+ {
13
+ "cell_type": "code",
14
+ "execution_count": 16,
15
+ "metadata": {},
16
+ "outputs": [
17
+ {
18
+ "name": "stdout",
19
+ "output_type": "stream",
20
+ "text": [
21
+ "apps\t\t generate_checkpoint_config.py multimodal\n",
22
+ "arctic\t\t gpt\t\t\t nemotron\n",
23
+ "baichuan\t gptj\t\t\t openai_triton\n",
24
+ "bert\t\t gptneox\t\t\t opt\n",
25
+ "bindings\t grok\t\t\t phi\n",
26
+ "blip2\t\t hf_lora_convert.py\t quantization\n",
27
+ "bloom\t\t infinitebench\t\t qwen\n",
28
+ "chatglm\t\t internlm\t\t\t qwenvl\n",
29
+ "cogvlm\t\t internlm2\t\t\t recurrentgemma\n",
30
+ "cpp\t\t jais\t\t\t redrafter\n",
31
+ "cpp_library\t llama\t\t\t run.py\n",
32
+ "dbrx\t\t llm-api\t\t\t sample_weight_stripping\n",
33
+ "dit\t\t mamba\t\t\t skywork\n",
34
+ "enc_dec\t\t medusa\t\t\t smaug\n",
35
+ "eval_long_context.py mixtral\t\t\t summarize.py\n",
36
+ "exaone\t\t mmlu.py\t\t\t utils.py\n",
37
+ "falcon\t\t model_api\t\t\t whisper\n",
38
+ "gemma\t\t mpt\n"
39
+ ]
40
+ }
41
+ ],
42
+ "source": [
43
+ "!ls ../../TensorRT-LLM/examples\n",
44
+ "!cp ../../TensorRT-LLM/examples/bert . -r\n",
45
+ "!cp ../../TensorRT-LLM/examples/blip2 . -r\n",
46
+ "!cp ../../TensorRT-LLM/examples/multimodal . -r"
47
+ ]
48
+ },
49
+ {
50
+ "cell_type": "code",
51
+ "execution_count": null,
52
+ "metadata": {},
53
+ "outputs": [],
54
+ "source": []
55
+ }
56
+ ],
57
+ "metadata": {
58
+ "kernelspec": {
59
+ "display_name": "tensorrt-llm",
60
+ "language": "python",
61
+ "name": "python3"
62
+ },
63
+ "language_info": {
64
+ "codemirror_mode": {
65
+ "name": "ipython",
66
+ "version": 3
67
+ },
68
+ "file_extension": ".py",
69
+ "mimetype": "text/x-python",
70
+ "name": "python",
71
+ "nbconvert_exporter": "python",
72
+ "pygments_lexer": "ipython3",
73
+ "version": "3.10.14"
74
+ }
75
+ },
76
+ "nbformat": 4,
77
+ "nbformat_minor": 2
78
+ }
a_mllm_notebooks/vllm/Untitled.ipynb ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 2,
6
+ "id": "9b7fb01b-8c7b-4213-b2b8-fb750a4c55a8",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "name": "stderr",
11
+ "output_type": "stream",
12
+ "text": [
13
+ " % Total % Received % Xferd Average Speed Time Time Time Current\n",
14
+ " Dload Upload Total Spent Left Speed\n",
15
+ "100 123 100 88 100 35 22768 9055 --:--:-- --:--:-- --:--:-- 41000\n"
16
+ ]
17
+ },
18
+ {
19
+ "name": "stdout",
20
+ "output_type": "stream",
21
+ "text": [
22
+ "\"{\\\"error_code\\\": 10402, \\\"text\\\": \\\"Failed to get response after a period of time\\\"}\\n\""
23
+ ]
24
+ }
25
+ ],
26
+ "source": [
27
+ "%%bash\n",
28
+ "\n",
29
+ "curl -X 'POST' \\\n",
30
+ " 'http://localhost:8082/nodes/add' \\\n",
31
+ " -H 'accept: application/json' \\\n",
32
+ " -H 'Content-Type: application/json' \\\n",
33
+ " -d '{\n",
34
+ " \"url\": \"http://0.0.0.0:19400\"\n",
35
+ "}'\n"
36
+ ]
37
+ },
38
+ {
39
+ "cell_type": "code",
40
+ "execution_count": null,
41
+ "id": "55fc29ba-43e2-4bdb-b499-cf47266b9c3e",
42
+ "metadata": {},
43
+ "outputs": [],
44
+ "source": []
45
+ }
46
+ ],
47
+ "metadata": {
48
+ "kernelspec": {
49
+ "display_name": "lmdeploy",
50
+ "language": "python",
51
+ "name": "lmdeploy"
52
+ },
53
+ "language_info": {
54
+ "codemirror_mode": {
55
+ "name": "ipython",
56
+ "version": 3
57
+ },
58
+ "file_extension": ".py",
59
+ "mimetype": "text/x-python",
60
+ "name": "python",
61
+ "nbconvert_exporter": "python",
62
+ "pygments_lexer": "ipython3",
63
+ "version": "3.8.19"
64
+ }
65
+ },
66
+ "nbformat": 4,
67
+ "nbformat_minor": 5
68
+ }
a_mllm_notebooks/vllm/cat.jpg ADDED

Git LFS Details

  • SHA256: a8e76cfc63e1a0d0eafbbf5ae038c3a4a8ec727b4a57e4de2a9749c0180b8e86
  • Pointer size: 132 Bytes
  • Size of remote file: 2.52 MB
a_mllm_notebooks/vllm/cli.md ADDED
@@ -0,0 +1,405 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ```bash
2
+ usage: vllm serve <model_tag> [options]
3
+
4
+ positional arguments:
5
+ model_tag The model tag to serve
6
+
7
+ options:
8
+ --allow-credentials allow credentials
9
+ --allowed-headers ALLOWED_HEADERS
10
+ allowed headers
11
+ --allowed-local-media-path ALLOWED_LOCAL_MEDIA_PATH
12
+ Allowing API requests to read local images or videos from directories
13
+ specified by the server file system. This is a security risk. Should only
14
+ be enabled in trusted environments.
15
+ --allowed-methods ALLOWED_METHODS
16
+ allowed methods
17
+ --allowed-origins ALLOWED_ORIGINS
18
+ allowed origins
19
+ --api-key API_KEY If provided, the server will require this key to be presented in the
20
+ header.
21
+ --block-size {8,16,32,64,128}
22
+ Token block size for contiguous chunks of tokens. This is ignored on
23
+ neuron devices and set to max-model-len
24
+ --chat-template CHAT_TEMPLATE
25
+ The file path to the chat template, or the template in single-line form
26
+ for the specified model
27
+ --chat-template-content-format {auto,string,openai}
28
+ The format to render message content within a chat template. * "string"
29
+ will render the content as a string. Example: "Hello World" * "openai"
30
+ will render the content as a list of dictionaries, similar to OpenAI
31
+ schema. Example: [{"type": "text", "text": "Hello world!"}]
32
+ --code-revision CODE_REVISION
33
+ The specific revision to use for the model code on Hugging Face Hub. It
34
+ can be a branch name, a tag name, or a commit id. If unspecified, will use
35
+ the default version.
36
+ --collect-detailed-traces COLLECT_DETAILED_TRACES
37
+ Valid choices are model,worker,all. It makes sense to set this only if
38
+ --otlp-traces-endpoint is set. If set, it will collect detailed traces for
39
+ the specified modules. This involves use of possibly costly and or
40
+ blocking operations and hence might have a performance impact.
41
+ --compilation-config COMPILATION_CONFIG, -O COMPILATION_CONFIG
42
+ torch.compile configuration for the model.When it is a number (0, 1, 2,
43
+ 3), it will be interpreted as the optimization level. NOTE: level 0 is the
44
+ default level without any optimization. level 1 and 2 are for internal
45
+ testing only. level 3 is the recommended level for production. To specify
46
+ the full compilation config, use a JSON string. Following the convention
47
+ of traditional compilers, using -O without space is also supported. -O3 is
48
+ equivalent to -O 3.
49
+ --config CONFIG Read CLI options from a config file.Must be a YAML with the following opti
50
+ ons:https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html#c
51
+ ommand-line-arguments-for-the-server
52
+ --config-format {auto,hf,mistral}
53
+ The format of the model config to load. * "auto" will try to load the
54
+ config in hf format if available else it will try to load in mistral
55
+ format
56
+ --cpu-offload-gb CPU_OFFLOAD_GB
57
+ The space in GiB to offload to CPU, per GPU. Default is 0, which means no
58
+ offloading. Intuitively, this argument can be seen as a virtual way to
59
+ increase the GPU memory size. For example, if you have one 24 GB GPU and
60
+ set this to 10, virtually you can think of it as a 34 GB GPU. Then you can
61
+ load a 13B model with BF16 weight, which requires at least 26GB GPU
62
+ memory. Note that this requires fast CPU-GPU interconnect, as part of the
63
+ model is loaded from CPU memory to GPU memory on the fly in each model
64
+ forward pass.
65
+ --device {auto,cuda,neuron,cpu,openvino,tpu,xpu,hpu}
66
+ Device type for vLLM execution.
67
+ --disable-async-output-proc
68
+ Disable async output processing. This may result in lower performance.
69
+ --disable-custom-all-reduce
70
+ See ParallelConfig.
71
+ --disable-fastapi-docs
72
+ Disable FastAPI's OpenAPI schema, Swagger UI, and ReDoc endpoint
73
+ --disable-frontend-multiprocessing
74
+ If specified, will run the OpenAI frontend server in the same process as
75
+ the model serving engine.
76
+ --disable-log-requests
77
+ Disable logging requests.
78
+ --disable-log-stats Disable logging statistics.
79
+ --disable-logprobs-during-spec-decoding [DISABLE_LOGPROBS_DURING_SPEC_DECODING]
80
+ If set to True, token log probabilities are not returned during
81
+ speculative decoding. If set to False, log probabilities are returned
82
+ according to the settings in SamplingParams. If not specified, it defaults
83
+ to True. Disabling log probabilities during speculative decoding reduces
84
+ latency by skipping logprob calculation in proposal sampling, target
85
+ sampling, and after accepted tokens are determined.
86
+ --disable-sliding-window
87
+ Disables sliding window, capping to sliding window size
88
+ --distributed-executor-backend {ray,mp}
89
+ Backend to use for distributed model workers, either "ray" or "mp"
90
+ (multiprocessing). If the product of pipeline_parallel_size and
91
+ tensor_parallel_size is less than or equal to the number of GPUs
92
+ available, "mp" will be used to keep processing on a single host.
93
+ Otherwise, this will default to "ray" if Ray is installed and fail
94
+ otherwise. Note that tpu and hpu only support Ray for distributed
95
+ inference.
96
+ --download-dir DOWNLOAD_DIR
97
+ Directory to download and load the weights, default to the default cache
98
+ dir of huggingface.
99
+ --dtype {auto,half,float16,bfloat16,float,float32}
100
+ Data type for model weights and activations. * "auto" will use FP16
101
+ precision for FP32 and FP16 models, and BF16 precision for BF16 models. *
102
+ "half" for FP16. Recommended for AWQ quantization. * "float16" is the same
103
+ as "half". * "bfloat16" for a balance between precision and range. *
104
+ "float" is shorthand for FP32 precision. * "float32" for FP32 precision.
105
+ --enable-auto-tool-choice
106
+ Enable auto tool choice for supported models. Use --tool-call-parser to
107
+ specify which parser to use
108
+ --enable-chunked-prefill [ENABLE_CHUNKED_PREFILL]
109
+ If set, the prefill requests can be chunked based on the
110
+ max_num_batched_tokens.
111
+ --enable-lora If True, enable handling of LoRA adapters.
112
+ --enable-lora-bias If True, enable bias for LoRA adapters.
113
+ --enable-prefix-caching, --no-enable-prefix-caching
114
+ Enables automatic prefix caching. Use --no-enable-prefix-caching to
115
+ disable explicitly.
116
+ --enable-prompt-adapter
117
+ If True, enable handling of PromptAdapters.
118
+ --enable-prompt-tokens-details
119
+ If set to True, enable prompt_tokens_details in usage.
120
+ --enforce-eager Always use eager-mode PyTorch. If False, will use eager mode and CUDA
121
+ graph in hybrid for maximal performance and flexibility.
122
+ --fully-sharded-loras
123
+ By default, only half of the LoRA computation is sharded with tensor
124
+ parallelism. Enabling this will use the fully sharded layers. At high
125
+ sequence length, max rank or tensor parallel size, this is likely faster.
126
+ --gpu-memory-utilization GPU_MEMORY_UTILIZATION
127
+ The fraction of GPU memory to be used for the model executor, which can
128
+ range from 0 to 1. For example, a value of 0.5 would imply 50% GPU memory
129
+ utilization. If unspecified, will use the default value of 0.9. This is a
130
+ global gpu memory utilization limit, for example if 50% of the gpu memory
131
+ is already used before vLLM starts and --gpu-memory-utilization is set to
132
+ 0.9, then only 40% of the gpu memory will be allocated to the model
133
+ executor.
134
+ --guided-decoding-backend {outlines,lm-format-enforcer,xgrammar}
135
+ Which engine will be used for guided decoding (JSON schema / regex etc) by
136
+ default. Currently support https://github.com/outlines-
137
+ dev/outlines,https://github.com/mlc-ai/xgrammar, and
138
+ https://github.com/noamgat/lm-format-enforcer. Can be overridden per
139
+ request via guided_decoding_backend parameter.
140
+ --hf-overrides HF_OVERRIDES
141
+ Extra arguments for the HuggingFace config. This should be a JSON string
142
+ that will be parsed into a dictionary.
143
+ --host HOST host name
144
+ --ignore-patterns IGNORE_PATTERNS
145
+ The pattern(s) to ignore when loading the model.Default to `original/**/*`
146
+ to avoid repeated loading of llama's checkpoints.
147
+ --kv-cache-dtype {auto,fp8,fp8_e5m2,fp8_e4m3}
148
+ Data type for kv cache storage. If "auto", will use model data type. CUDA
149
+ 11.8+ supports fp8 (=fp8_e4m3) and fp8_e5m2. ROCm (AMD GPU) supports fp8
150
+ (=fp8_e4m3)
151
+ --kv-transfer-config KV_TRANSFER_CONFIG
152
+ The configurations for distributed KV cache transfer. Should be a JSON
153
+ string.
154
+ --limit-mm-per-prompt LIMIT_MM_PER_PROMPT
155
+ For each multimodal plugin, limit how many input instances to allow for
156
+ each prompt. Expects a comma-separated list of items, e.g.:
157
+ `image=16,video=2` allows a maximum of 16 images and 2 videos per prompt.
158
+ Defaults to 1 for each modality.
159
+ --load-format {auto,pt,safetensors,npcache,dummy,tensorizer,sharded_state,gguf,bitsandbytes,mistral}
160
+ The format of the model weights to load. * "auto" will try to load the
161
+ weights in the safetensors format and fall back to the pytorch bin format
162
+ if safetensors format is not available. * "pt" will load the weights in
163
+ the pytorch bin format. * "safetensors" will load the weights in the
164
+ safetensors format. * "npcache" will load the weights in pytorch format
165
+ and store a numpy cache to speed up the loading. * "dummy" will initialize
166
+ the weights with random values, which is mainly for profiling. *
167
+ "tensorizer" will load the weights using tensorizer from CoreWeave. See
168
+ the Tensorize vLLM Model script in the Examples section for more
169
+ information. * "bitsandbytes" will load the weights using bitsandbytes
170
+ quantization.
171
+ --long-lora-scaling-factors LONG_LORA_SCALING_FACTORS
172
+ Specify multiple scaling factors (which can be different from base model
173
+ scaling factor - see eg. Long LoRA) to allow for multiple LoRA adapters
174
+ trained with those scaling factors to be used at the same time. If not
175
+ specified, only adapters trained with the base model scaling factor are
176
+ allowed.
177
+ --lora-dtype {auto,float16,bfloat16}
178
+ Data type for LoRA. If auto, will default to base model dtype.
179
+ --lora-extra-vocab-size LORA_EXTRA_VOCAB_SIZE
180
+ Maximum size of extra vocabulary that can be present in a LoRA adapter
181
+ (added to the base model vocabulary).
182
+ --lora-modules LORA_MODULES [LORA_MODULES ...]
183
+ LoRA module configurations in either 'name=path' formator JSON format.
184
+ Example (old format): 'name=path' Example (new format): '{"name": "name",
185
+ "local_path": "path", "base_model_name": "id"}'
186
+ --max-cpu-loras MAX_CPU_LORAS
187
+ Maximum number of LoRAs to store in CPU memory. Must be >= than max_loras.
188
+ Defaults to max_loras.
189
+ --max-log-len MAX_LOG_LEN
190
+ Max number of prompt characters or prompt ID numbers being printed in log.
191
+ Default: Unlimited
192
+ --max-logprobs MAX_LOGPROBS
193
+ Max number of log probs to return logprobs is specified in SamplingParams.
194
+ --max-lora-rank MAX_LORA_RANK
195
+ Max LoRA rank.
196
+ --max-loras MAX_LORAS
197
+ Max number of LoRAs in a single batch.
198
+ --max-model-len MAX_MODEL_LEN
199
+ Model context length. If unspecified, will be automatically derived from
200
+ the model config.
201
+ --max-num-batched-tokens MAX_NUM_BATCHED_TOKENS
202
+ Maximum number of batched tokens per iteration.
203
+ --max-num-seqs MAX_NUM_SEQS
204
+ Maximum number of sequences per iteration.
205
+ --max-parallel-loading-workers MAX_PARALLEL_LOADING_WORKERS
206
+ Load model sequentially in multiple batches, to avoid RAM OOM when using
207
+ tensor parallel and large models.
208
+ --max-prompt-adapter-token MAX_PROMPT_ADAPTER_TOKEN
209
+ Max number of PromptAdapters tokens
210
+ --max-prompt-adapters MAX_PROMPT_ADAPTERS
211
+ Max number of PromptAdapters in a batch.
212
+ --max-seq-len-to-capture MAX_SEQ_LEN_TO_CAPTURE
213
+ Maximum sequence length covered by CUDA graphs. When a sequence has
214
+ context length larger than this, we fall back to eager mode. Additionally
215
+ for encoder-decoder models, if the sequence length of the encoder input is
216
+ larger than this, we fall back to the eager mode.
217
+ --middleware MIDDLEWARE
218
+ Additional ASGI middleware to apply to the app. We accept multiple
219
+ --middleware arguments. The value should be an import path. If a function
220
+ is provided, vLLM will add it to the server using @app.middleware('http').
221
+ If a class is provided, vLLM will add it to the server using
222
+ app.add_middleware().
223
+ --mm-processor-kwargs MM_PROCESSOR_KWARGS
224
+ Overrides for the multimodal input mapping/processing, e.g., image
225
+ processor. For example: {"num_crops": 4}.
226
+ --model MODEL Name or path of the huggingface model to use.
227
+ --model-loader-extra-config MODEL_LOADER_EXTRA_CONFIG
228
+ Extra config for model loader. This will be passed to the model loader
229
+ corresponding to the chosen load_format. This should be a JSON string that
230
+ will be parsed into a dictionary.
231
+ --multi-step-stream-outputs [MULTI_STEP_STREAM_OUTPUTS]
232
+ If False, then multi-step will stream outputs at the end of all steps
233
+ --ngram-prompt-lookup-max NGRAM_PROMPT_LOOKUP_MAX
234
+ Max size of window for ngram prompt lookup in speculative decoding.
235
+ --ngram-prompt-lookup-min NGRAM_PROMPT_LOOKUP_MIN
236
+ Min size of window for ngram prompt lookup in speculative decoding.
237
+ --num-gpu-blocks-override NUM_GPU_BLOCKS_OVERRIDE
238
+ If specified, ignore GPU profiling result and use this number of GPU
239
+ blocks. Used for testing preemption.
240
+ --num-lookahead-slots NUM_LOOKAHEAD_SLOTS
241
+ Experimental scheduling config necessary for speculative decoding. This
242
+ will be replaced by speculative config in the future; it is present to
243
+ enable correctness tests until then.
244
+ --num-scheduler-steps NUM_SCHEDULER_STEPS
245
+ Maximum number of forward steps per scheduler call.
246
+ --num-speculative-tokens NUM_SPECULATIVE_TOKENS
247
+ The number of speculative tokens to sample from the draft model in
248
+ speculative decoding.
249
+ --otlp-traces-endpoint OTLP_TRACES_ENDPOINT
250
+ Target URL to which OpenTelemetry traces will be sent.
251
+ --override-neuron-config OVERRIDE_NEURON_CONFIG
252
+ Override or set neuron device configuration. e.g. {"cast_logits_dtype":
253
+ "bloat16"}.'
254
+ --override-pooler-config OVERRIDE_POOLER_CONFIG
255
+ Override or set the pooling method in the embedding model. e.g.
256
+ {"pooling_type": "mean", "normalize": false}.'
257
+ --pipeline-parallel-size PIPELINE_PARALLEL_SIZE, -pp PIPELINE_PARALLEL_SIZE
258
+ Number of pipeline stages.
259
+ --port PORT port number
260
+ --preemption-mode PREEMPTION_MODE
261
+ If 'recompute', the engine performs preemption by recomputing; If 'swap',
262
+ the engine performs preemption by block swapping.
263
+ --prompt-adapters PROMPT_ADAPTERS [PROMPT_ADAPTERS ...]
264
+ Prompt adapter configurations in the format name=path. Multiple adapters
265
+ can be specified.
266
+ --qlora-adapter-name-or-path QLORA_ADAPTER_NAME_OR_PATH
267
+ Name or path of the QLoRA adapter.
268
+ --quantization {aqlm,awq,deepspeedfp,tpu_int8,fp8,fbgemm_fp8,modelopt,marlin,gguf,gptq_marlin_24,gptq_marlin,awq_marlin,gptq,compressed-tensors,bitsandbytes,qqq,hqq,experts_int8,neuron_quant,ipex,None}, -q {aqlm,awq,deepspeedfp,tpu_int8,fp8,fbgemm_fp8,modelopt,marlin,gguf,gptq_marlin_24,gptq_marlin,awq_marlin,gptq,compressed-tensors,bitsandbytes,qqq,hqq,experts_int8,neuron_quant,ipex,None}
269
+ Method used to quantize the weights. If None, we first check the
270
+ `quantization_config` attribute in the model config file. If that is None,
271
+ we assume the model weights are not quantized and use `dtype` to determine
272
+ the data type of the weights.
273
+ --quantization-param-path QUANTIZATION_PARAM_PATH
274
+ Path to the JSON file containing the KV cache scaling factors. This should
275
+ generally be supplied, when KV cache dtype is FP8. Otherwise, KV cache
276
+ scaling factors default to 1.0, which may cause accuracy issues. FP8_E5M2
277
+ (without scaling) is only supported on cuda version greater than 11.8. On
278
+ ROCm (AMD GPU), FP8_E4M3 is instead supported for common inference
279
+ criteria.
280
+ --ray-workers-use-nsight
281
+ If specified, use nsight to profile Ray workers.
282
+ --response-role RESPONSE_ROLE
283
+ The role name to return if `request.add_generation_prompt=true`.
284
+ --return-tokens-as-token-ids
285
+ When --max-logprobs is specified, represents single tokens as strings of
286
+ the form 'token_id:{token_id}' so that tokens that are not JSON-encodable
287
+ can be identified.
288
+ --revision REVISION The specific model version to use. It can be a branch name, a tag name, or
289
+ a commit id. If unspecified, will use the default version.
290
+ --root-path ROOT_PATH
291
+ FastAPI root_path when app is behind a path based routing proxy
292
+ --rope-scaling ROPE_SCALING
293
+ RoPE scaling configuration in JSON format. For example,
294
+ {"rope_type":"dynamic","factor":2.0}
295
+ --rope-theta ROPE_THETA
296
+ RoPE theta. Use with `rope_scaling`. In some cases, changing the RoPE
297
+ theta improves the performance of the scaled model.
298
+ --scheduler-delay-factor SCHEDULER_DELAY_FACTOR
299
+ Apply a delay (of delay factor multiplied by previous prompt latency)
300
+ before scheduling next prompt.
301
+ --scheduling-policy {fcfs,priority}
302
+ The scheduling policy to use. "fcfs" (first come first served, i.e.
303
+ requests are handled in order of arrival; default) or "priority" (requests
304
+ are handled based on given priority (lower value means earlier handling)
305
+ and time of arrival deciding any ties).
306
+ --seed SEED Random seed for operations.
307
+ --served-model-name SERVED_MODEL_NAME [SERVED_MODEL_NAME ...]
308
+ The model name(s) used in the API. If multiple names are provided, the
309
+ server will respond to any of the provided names. The model name in the
310
+ model field of a response will be the first name in this list. If not
311
+ specified, the model name will be the same as the `--model` argument.
312
+ Noted that this name(s) will also be used in `model_name` tag content of
313
+ prometheus metrics, if multiple names provided, metrics tag will take the
314
+ first one.
315
+ --skip-tokenizer-init
316
+ Skip initialization of tokenizer and detokenizer
317
+ --spec-decoding-acceptance-method {rejection_sampler,typical_acceptance_sampler}
318
+ Specify the acceptance method to use during draft token verification in
319
+ speculative decoding. Two types of acceptance routines are supported: 1)
320
+ RejectionSampler which does not allow changing the acceptance rate of
321
+ draft tokens, 2) TypicalAcceptanceSampler which is configurable, allowing
322
+ for a higher acceptance rate at the cost of lower quality, and vice versa.
323
+ --speculative-disable-by-batch-size SPECULATIVE_DISABLE_BY_BATCH_SIZE
324
+ Disable speculative decoding for new incoming requests if the number of
325
+ enqueue requests is larger than this value.
326
+ --speculative-disable-mqa-scorer
327
+ If set to True, the MQA scorer will be disabled in speculative and fall
328
+ back to batch expansion
329
+ --speculative-draft-tensor-parallel-size SPECULATIVE_DRAFT_TENSOR_PARALLEL_SIZE, -spec-draft-tp SPECULATIVE_DRAFT_TENSOR_PARALLEL_SIZE
330
+ Number of tensor parallel replicas for the draft model in speculative
331
+ decoding.
332
+ --speculative-max-model-len SPECULATIVE_MAX_MODEL_LEN
333
+ The maximum sequence length supported by the draft model. Sequences over
334
+ this length will skip speculation.
335
+ --speculative-model SPECULATIVE_MODEL
336
+ The name of the draft model to be used in speculative decoding.
337
+ --speculative-model-quantization {aqlm,awq,deepspeedfp,tpu_int8,fp8,fbgemm_fp8,modelopt,marlin,gguf,gptq_marlin_24,gptq_marlin,awq_marlin,gptq,compressed-tensors,bitsandbytes,qqq,hqq,experts_int8,neuron_quant,ipex,None}
338
+ Method used to quantize the weights of speculative model. If None, we
339
+ first check the `quantization_config` attribute in the model config file.
340
+ If that is None, we assume the model weights are not quantized and use
341
+ `dtype` to determine the data type of the weights.
342
+ --ssl-ca-certs SSL_CA_CERTS
343
+ The CA certificates file
344
+ --ssl-cert-reqs SSL_CERT_REQS
345
+ Whether client certificate is required (see stdlib ssl module's)
346
+ --ssl-certfile SSL_CERTFILE
347
+ The file path to the SSL cert file
348
+ --ssl-keyfile SSL_KEYFILE
349
+ The file path to the SSL key file
350
+ --swap-space SWAP_SPACE
351
+ CPU swap space size (GiB) per GPU.
352
+ --task {auto,generate,embedding}
353
+ The task to use the model for. Each vLLM instance only supports one task,
354
+ even if the same model can be used for multiple tasks. When the model only
355
+ supports one task, "auto" can be used to select it; otherwise, you must
356
+ specify explicitly which task to use.
357
+ --tensor-parallel-size TENSOR_PARALLEL_SIZE, -tp TENSOR_PARALLEL_SIZE
358
+ Number of tensor parallel replicas.
359
+ --tokenizer TOKENIZER
360
+ Name or path of the huggingface tokenizer to use. If unspecified, model
361
+ name or path will be used.
362
+ --tokenizer-mode {auto,slow,mistral}
363
+ The tokenizer mode. * "auto" will use the fast tokenizer if available. *
364
+ "slow" will always use the slow tokenizer. * "mistral" will always use the
365
+ `mistral_common` tokenizer.
366
+ --tokenizer-pool-extra-config TOKENIZER_POOL_EXTRA_CONFIG
367
+ Extra config for tokenizer pool. This should be a JSON string that will be
368
+ parsed into a dictionary. Ignored if tokenizer_pool_size is 0.
369
+ --tokenizer-pool-size TOKENIZER_POOL_SIZE
370
+ Size of tokenizer pool to use for asynchronous tokenization. If 0, will
371
+ use synchronous tokenization.
372
+ --tokenizer-pool-type TOKENIZER_POOL_TYPE
373
+ Type of tokenizer pool to use for asynchronous tokenization. Ignored if
374
+ tokenizer_pool_size is 0.
375
+ --tokenizer-revision TOKENIZER_REVISION
376
+ Revision of the huggingface tokenizer to use. It can be a branch name, a
377
+ tag name, or a commit id. If unspecified, will use the default version.
378
+ --tool-call-parser {granite-20b-fc,granite,hermes,internlm,jamba,llama3_json,mistral,pythonic} or name registered in --tool-parser-plugin
379
+ Select the tool call parser depending on the model that you're using. This
380
+ is used to parse the model-generated tool call into OpenAI API format.
381
+ Required for --enable-auto-tool-choice.
382
+ --tool-parser-plugin TOOL_PARSER_PLUGIN
383
+ Special the tool parser plugin write to parse the model-generated tool
384
+ into OpenAI API format, the name register in this plugin can be used in
385
+ --tool-call-parser.
386
+ --trust-remote-code Trust remote code from huggingface.
387
+ --typical-acceptance-sampler-posterior-alpha TYPICAL_ACCEPTANCE_SAMPLER_POSTERIOR_ALPHA
388
+ A scaling factor for the entropy-based threshold for token acceptance in
389
+ the TypicalAcceptanceSampler. Typically defaults to sqrt of --typical-
390
+ acceptance-sampler-posterior-threshold i.e. 0.3
391
+ --typical-acceptance-sampler-posterior-threshold TYPICAL_ACCEPTANCE_SAMPLER_POSTERIOR_THRESHOLD
392
+ Set the lower bound threshold for the posterior probability of a token to
393
+ be accepted. This threshold is used by the TypicalAcceptanceSampler to
394
+ make sampling decisions during speculative decoding. Defaults to 0.09
395
+ --use-v2-block-manager
396
+ [DEPRECATED] block manager v1 has been removed and
397
+ SelfAttnBlockSpaceManager (i.e. block manager v2) is now the default.
398
+ Setting this flag to True or False has no effect on vLLM behavior.
399
+ --uvicorn-log-level {debug,info,warning,error,critical,trace}
400
+ log level for uvicorn
401
+ --worker-cls WORKER_CLS
402
+ The worker class to use for distributed execution.
403
+ --worker-use-ray Deprecated, use --distributed-executor-backend=ray.
404
+ -h, --help show this help message and exit
405
+ ```
a_mllm_notebooks/vllm/download_md.ipynb ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 5,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stdout",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "Requirement already satisfied: wget in /dscilab_dungvo/workspace/bin/envs/lmdeploy/lib/python3.8/site-packages (3.2)\n",
13
+ "Collecting jupytext\n",
14
+ " Using cached jupytext-1.16.6-py3-none-any.whl.metadata (13 kB)\n",
15
+ "Requirement already satisfied: markdown-it-py>=1.0 in /dscilab_dungvo/workspace/bin/envs/lmdeploy/lib/python3.8/site-packages (from jupytext) (3.0.0)\n",
16
+ "Collecting mdit-py-plugins (from jupytext)\n",
17
+ " Downloading mdit_py_plugins-0.4.2-py3-none-any.whl.metadata (2.8 kB)\n",
18
+ "Collecting nbformat (from jupytext)\n",
19
+ " Using cached nbformat-5.10.4-py3-none-any.whl.metadata (3.6 kB)\n",
20
+ "Requirement already satisfied: packaging in /dscilab_dungvo/workspace/bin/envs/lmdeploy/lib/python3.8/site-packages (from jupytext) (24.1)\n",
21
+ "Requirement already satisfied: pyyaml in /dscilab_dungvo/workspace/bin/envs/lmdeploy/lib/python3.8/site-packages (from jupytext) (6.0.2)\n",
22
+ "Requirement already satisfied: tomli in /dscilab_dungvo/workspace/bin/envs/lmdeploy/lib/python3.8/site-packages (from jupytext) (2.0.1)\n",
23
+ "Requirement already satisfied: mdurl~=0.1 in /dscilab_dungvo/workspace/bin/envs/lmdeploy/lib/python3.8/site-packages (from markdown-it-py>=1.0->jupytext) (0.1.2)\n",
24
+ "Collecting fastjsonschema>=2.15 (from nbformat->jupytext)\n",
25
+ " Using cached fastjsonschema-2.21.1-py3-none-any.whl.metadata (2.2 kB)\n",
26
+ "Requirement already satisfied: jsonschema>=2.6 in /dscilab_dungvo/workspace/bin/envs/lmdeploy/lib/python3.8/site-packages (from nbformat->jupytext) (4.23.0)\n",
27
+ "Requirement already satisfied: jupyter-core!=5.0.*,>=4.12 in /dscilab_dungvo/workspace/bin/envs/lmdeploy/lib/python3.8/site-packages (from nbformat->jupytext) (5.7.2)\n",
28
+ "Requirement already satisfied: traitlets>=5.1 in /dscilab_dungvo/workspace/bin/envs/lmdeploy/lib/python3.8/site-packages (from nbformat->jupytext) (5.14.3)\n",
29
+ "Requirement already satisfied: attrs>=22.2.0 in /dscilab_dungvo/workspace/bin/envs/lmdeploy/lib/python3.8/site-packages (from jsonschema>=2.6->nbformat->jupytext) (24.2.0)\n",
30
+ "Requirement already satisfied: importlib-resources>=1.4.0 in /dscilab_dungvo/workspace/bin/envs/lmdeploy/lib/python3.8/site-packages (from jsonschema>=2.6->nbformat->jupytext) (6.4.5)\n",
31
+ "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /dscilab_dungvo/workspace/bin/envs/lmdeploy/lib/python3.8/site-packages (from jsonschema>=2.6->nbformat->jupytext) (2023.12.1)\n",
32
+ "Requirement already satisfied: pkgutil-resolve-name>=1.3.10 in /dscilab_dungvo/workspace/bin/envs/lmdeploy/lib/python3.8/site-packages (from jsonschema>=2.6->nbformat->jupytext) (1.3.10)\n",
33
+ "Requirement already satisfied: referencing>=0.28.4 in /dscilab_dungvo/workspace/bin/envs/lmdeploy/lib/python3.8/site-packages (from jsonschema>=2.6->nbformat->jupytext) (0.35.1)\n",
34
+ "Requirement already satisfied: rpds-py>=0.7.1 in /dscilab_dungvo/workspace/bin/envs/lmdeploy/lib/python3.8/site-packages (from jsonschema>=2.6->nbformat->jupytext) (0.20.0)\n",
35
+ "Requirement already satisfied: platformdirs>=2.5 in /dscilab_dungvo/workspace/bin/envs/lmdeploy/lib/python3.8/site-packages (from jupyter-core!=5.0.*,>=4.12->nbformat->jupytext) (4.3.6)\n",
36
+ "Requirement already satisfied: zipp>=3.1.0 in /dscilab_dungvo/workspace/bin/envs/lmdeploy/lib/python3.8/site-packages (from importlib-resources>=1.4.0->jsonschema>=2.6->nbformat->jupytext) (3.20.2)\n",
37
+ "Using cached jupytext-1.16.6-py3-none-any.whl (154 kB)\n",
38
+ "Downloading mdit_py_plugins-0.4.2-py3-none-any.whl (55 kB)\n",
39
+ "Using cached nbformat-5.10.4-py3-none-any.whl (78 kB)\n",
40
+ "Using cached fastjsonschema-2.21.1-py3-none-any.whl (23 kB)\n",
41
+ "Installing collected packages: fastjsonschema, mdit-py-plugins, nbformat, jupytext\n",
42
+ "Successfully installed fastjsonschema-2.21.1 jupytext-1.16.6 mdit-py-plugins-0.4.2 nbformat-5.10.4\n",
43
+ "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager, possibly rendering your system unusable.It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv. Use the --root-user-action option if you know what you are doing and want to suppress this warning.\u001b[0m\u001b[33m\n",
44
+ "\u001b[0m"
45
+ ]
46
+ }
47
+ ],
48
+ "source": [
49
+ "# https://github.com/InternLM/lmdeploy/blob/main/docs/en/llm/pipeline.md\n",
50
+ "\n",
51
+ "# download then convert to jupyter notebook\n",
52
+ "!pip install wget jupytext\n",
53
+ "import os\n",
54
+ "import sys\n",
55
+ "import json\n",
56
+ "import requests\n",
57
+ "# import jupyter_text\n",
58
+ "\n",
59
+ "\n",
60
+ "def download_markdown_and_save(url, filename):\n",
61
+ " # remove existing file\n",
62
+ " if os.path.exists(filename):\n",
63
+ " os.remove(filename)\n",
64
+ " \n",
65
+ " import wget \n",
66
+ " # preprocess url to downloadable url\n",
67
+ " url = url.replace(\"github.com\", \"raw.githubusercontent.com\")\n",
68
+ " url = url.replace(\"blob/\", \"\")\n",
69
+ " print(f\"Downloading {url}\")\n",
70
+ " wget.download(url, filename)\n",
71
+ " print(f\"Downloaded {filename}\")\n",
72
+ " \n",
73
+ " \n",
74
+ " \n",
75
+ "# !jupytext --to notebook your_markdown_file.md\n",
76
+ "\n",
77
+ "def convert_markdown_to_jupyter_notebook(filename):\n",
78
+ " os.system(f\"jupytext --to notebook {filename}\")\n",
79
+ " print(f\"Converted {filename} to jupyter notebook.\")\n",
80
+ " \n",
81
+ " \n",
82
+ "def markdown2jupyter(url, filename):\n",
83
+ " download_markdown_and_save(url, filename)\n",
84
+ " convert_markdown_to_jupyter_notebook(filename)\n",
85
+ "\n",
86
+ "\n",
87
+ "# def main():\n",
88
+ "# url = \"https://raw.githubusercontent.com/InternLM/lmdeploy/main/docs/en/llm/pipeline.md\"\n",
89
+ "# filename = \"pipeline.md\"\n",
90
+ "# download_markdown_and_save(url, filename)\n",
91
+ "# convert_markdown_to_jupyter_notebook(filename)\n",
92
+ " \n",
93
+ " \n",
94
+ "# if __name__ == \"__main__\":\n",
95
+ "# main()\n",
96
+ " "
97
+ ]
98
+ },
99
+ {
100
+ "cell_type": "code",
101
+ "execution_count": null,
102
+ "metadata": {},
103
+ "outputs": [
104
+ {
105
+ "name": "stdout",
106
+ "output_type": "stream",
107
+ "text": [
108
+ "Downloading https://raw.githubusercontent.com/vllm-project/vllm/main/docs/source/getting_started/quickstart.md\n",
109
+ "Downloaded quickstart.md\n",
110
+ "[jupytext] Reading quickstart.md in format md\n",
111
+ "[jupytext] Writing quickstart.ipynb\n",
112
+ "Converted quickstart.md to jupyter notebook.\n"
113
+ ]
114
+ }
115
+ ],
116
+ "source": [
117
+ "# markdown2jupyter(\n",
118
+ "# 'https://github.com/vllm-project/vllm/blob/main/docs/source/getting_started/quickstart.md',\n",
119
+ "# 'quickstart.md'\n",
120
+ "# )"
121
+ ]
122
+ },
123
+ {
124
+ "cell_type": "code",
125
+ "execution_count": 11,
126
+ "metadata": {},
127
+ "outputs": [
128
+ {
129
+ "name": "stdout",
130
+ "output_type": "stream",
131
+ "text": [
132
+ "Overwriting links.txt\n"
133
+ ]
134
+ }
135
+ ],
136
+ "source": [
137
+ "%%writefile links.txt\n",
138
+ "'https://github.com/vllm-project/vllm/blob/main/docs/source/serving/distributed_serving.md'"
139
+ ]
140
+ },
141
+ {
142
+ "cell_type": "code",
143
+ "execution_count": 12,
144
+ "metadata": {},
145
+ "outputs": [
146
+ {
147
+ "name": "stdout",
148
+ "output_type": "stream",
149
+ "text": [
150
+ "Downloading https://raw.githubusercontent.com/vllm-project/vllm/main/docs/source/serving/distributed_serving.md\n",
151
+ "Downloaded distributed_serving.md\n",
152
+ "[jupytext] Reading distributed_serving.md in format md\n",
153
+ "[jupytext] Writing distributed_serving.ipynb\n",
154
+ "Converted distributed_serving.md to jupyter notebook.\n"
155
+ ]
156
+ },
157
+ {
158
+ "ename": "",
159
+ "evalue": "",
160
+ "output_type": "error",
161
+ "traceback": [
162
+ "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n",
163
+ "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n",
164
+ "\u001b[1;31mClick <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. \n",
165
+ "\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
166
+ ]
167
+ }
168
+ ],
169
+ "source": [
170
+ "list_url = []\n",
171
+ "with open('links.txt') as f:\n",
172
+ " list_url = f.readlines()\n",
173
+ "for i in range(len(list_url)):\n",
174
+ " list_url[i] = eval(list_url[i])\n",
175
+ "\n",
176
+ "for i in range(len(list_url)):\n",
177
+ " url = list_url[i]\n",
178
+ " name = url.split('/')[-1]\n",
179
+ " markdown2jupyter(url, name)\n",
180
+ " \n",
181
+ "# delete all file{i}.md"
182
+ ]
183
+ },
184
+ {
185
+ "cell_type": "code",
186
+ "execution_count": null,
187
+ "metadata": {},
188
+ "outputs": [],
189
+ "source": []
190
+ }
191
+ ],
192
+ "metadata": {
193
+ "kernelspec": {
194
+ "display_name": "lmdeploy",
195
+ "language": "python",
196
+ "name": "python3"
197
+ },
198
+ "language_info": {
199
+ "codemirror_mode": {
200
+ "name": "ipython",
201
+ "version": 3
202
+ },
203
+ "file_extension": ".py",
204
+ "mimetype": "text/x-python",
205
+ "name": "python",
206
+ "nbconvert_exporter": "python",
207
+ "pygments_lexer": "ipython3",
208
+ "version": "3.8.19"
209
+ }
210
+ },
211
+ "nbformat": 4,
212
+ "nbformat_minor": 2
213
+ }
a_mllm_notebooks/vllm/florence_2.ipynb ADDED
@@ -0,0 +1,355 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 5,
6
+ "id": "054990e3-e0cb-4e36-8783-8af0ed9ebc9a",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "from PIL import Image\n",
11
+ "temp_image = PIL.Image.open('cat.jpg')"
12
+ ]
13
+ },
14
+ {
15
+ "cell_type": "code",
16
+ "execution_count": 6,
17
+ "id": "e1d28b8b-5c71-4681-9b4b-a9ed6834867d",
18
+ "metadata": {},
19
+ "outputs": [
20
+ {
21
+ "name": "stdout",
22
+ "output_type": "stream",
23
+ "text": [
24
+ "INFO 12-31 05:01:23 config.py:2272] Downcasting torch.float32 to torch.bfloat16.\n",
25
+ "INFO 12-31 05:01:31 config.py:510] This model supports multiple tasks: {'reward', 'generate', 'classify', 'embed', 'score'}. Defaulting to 'generate'.\n",
26
+ "INFO 12-31 05:01:31 llm_engine.py:234] Initializing an LLM engine (v0.6.6.post1) with config: model='microsoft/Florence-2-base', speculative_config=None, tokenizer='facebook/bart-base', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.bfloat16, max_seq_len=1024, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='xgrammar'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=0, served_model_name=microsoft/Florence-2-base, num_scheduler_steps=1, multi_step_stream_outputs=True, enable_prefix_caching=False, chunked_prefill_enabled=False, use_async_output_proc=True, disable_mm_preprocessor_cache=False, mm_processor_kwargs=None, pooler_config=None, compilation_config={\"splitting_ops\":[\"vllm.unified_attention\",\"vllm.unified_attention_with_output\"],\"candidate_compile_sizes\":[],\"compile_sizes\":[],\"capture_sizes\":[256,248,240,232,224,216,208,200,192,184,176,168,160,152,144,136,128,120,112,104,96,88,80,72,64,56,48,40,32,24,16,8,4,2,1],\"max_capture_size\":256}, use_cached_outputs=False, \n",
27
+ "INFO 12-31 05:01:35 selector.py:120] Using Flash Attention backend.\n",
28
+ "INFO 12-31 05:01:37 model_runner.py:1094] Starting to load model microsoft/Florence-2-base...\n",
29
+ "INFO 12-31 05:01:37 weight_utils.py:251] Using model weights format ['*.bin']\n"
30
+ ]
31
+ },
32
+ {
33
+ "data": {
34
+ "application/vnd.jupyter.widget-view+json": {
35
+ "model_id": "3359625d0d944cb29803552332d7b5fa",
36
+ "version_major": 2,
37
+ "version_minor": 0
38
+ },
39
+ "text/plain": [
40
+ "Loading pt checkpoint shards: 0% Completed | 0/1 [00:00<?, ?it/s]\n"
41
+ ]
42
+ },
43
+ "metadata": {},
44
+ "output_type": "display_data"
45
+ },
46
+ {
47
+ "name": "stderr",
48
+ "output_type": "stream",
49
+ "text": [
50
+ "/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/vllm/model_executor/model_loader/weight_utils.py:450: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
51
+ " state = torch.load(bin_file, map_location=\"cpu\")\n"
52
+ ]
53
+ },
54
+ {
55
+ "name": "stdout",
56
+ "output_type": "stream",
57
+ "text": [
58
+ "INFO 12-31 05:01:39 model_runner.py:1099] Loading model weights took 0.3440 GB\n",
59
+ "INFO 12-31 05:01:41 worker.py:241] Memory profiling takes 1.83 seconds\n",
60
+ "INFO 12-31 05:01:41 worker.py:241] the current vLLM instance can use total_gpu_memory (39.39GiB) x gpu_memory_utilization (0.90) = 35.45GiB\n",
61
+ "INFO 12-31 05:01:41 worker.py:241] model weights take 0.34GiB; non_torch_memory takes 0.11GiB; PyTorch activation peak memory takes 0.48GiB; the rest of the memory reserved for KV Cache is 34.53GiB.\n",
62
+ "INFO 12-31 05:01:41 gpu_executor.py:76] # GPU blocks: 125715, # CPU blocks: 14563\n",
63
+ "INFO 12-31 05:01:41 gpu_executor.py:80] Maximum concurrency for 1024 tokens per request: 1964.30x\n",
64
+ "INFO 12-31 05:01:46 model_runner.py:1415] Capturing cudagraphs for decoding. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in the CLI. If out-of-memory error occurs during cudagraph capture, consider decreasing `gpu_memory_utilization` or switching to eager mode. You can also reduce the `max_num_seqs` as needed to decrease memory usage.\n"
65
+ ]
66
+ },
67
+ {
68
+ "name": "stderr",
69
+ "output_type": "stream",
70
+ "text": [
71
+ "Capturing CUDA graph shapes: 100%|██████████| 35/35 [00:24<00:00, 1.45it/s]"
72
+ ]
73
+ },
74
+ {
75
+ "name": "stdout",
76
+ "output_type": "stream",
77
+ "text": [
78
+ "INFO 12-31 05:02:10 model_runner.py:1535] Graph capturing finished in 24 secs, took 0.31 GiB\n",
79
+ "INFO 12-31 05:02:10 llm_engine.py:431] init engine (profile, create kv cache, warmup model) took 30.87 seconds\n"
80
+ ]
81
+ },
82
+ {
83
+ "name": "stderr",
84
+ "output_type": "stream",
85
+ "text": [
86
+ "\n"
87
+ ]
88
+ },
89
+ {
90
+ "ename": "TypeError",
91
+ "evalue": "inputs must be a string, TextPrompt, or TokensPrompt",
92
+ "output_type": "error",
93
+ "traceback": [
94
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
95
+ "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
96
+ "Cell \u001b[0;32mIn[6], line 35\u001b[0m\n\u001b[1;32m 25\u001b[0m sampling_params \u001b[38;5;241m=\u001b[39m SamplingParams(\n\u001b[1;32m 26\u001b[0m temperature\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m,\n\u001b[1;32m 27\u001b[0m top_p\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1.0\u001b[39m,\n\u001b[1;32m 28\u001b[0m min_tokens\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m,\n\u001b[1;32m 29\u001b[0m max_tokens\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m20\u001b[39m,\n\u001b[1;32m 30\u001b[0m )\n\u001b[1;32m 32\u001b[0m \u001b[38;5;66;03m# Generate output tokens from the prompts. The output is a list of\u001b[39;00m\n\u001b[1;32m 33\u001b[0m \u001b[38;5;66;03m# RequestOutput objects that contain the prompt, generated\u001b[39;00m\n\u001b[1;32m 34\u001b[0m \u001b[38;5;66;03m# text, and other information.\u001b[39;00m\n\u001b[0;32m---> 35\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[43mllm\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 36\u001b[0m \u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m 37\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mprompts\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mprompts\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 38\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmulti_modal_data\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mimage\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[43mtemp_image\u001b[49m\u001b[43m]\u001b[49m\u001b[43m}\u001b[49m\n\u001b[1;32m 39\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[1;32m 40\u001b[0m \u001b[43m \u001b[49m\u001b[43msampling_params\u001b[49m\n\u001b[1;32m 41\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m 43\u001b[0m \u001b[38;5;66;03m# Print the outputs.\u001b[39;00m\n\u001b[1;32m 44\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m output \u001b[38;5;129;01min\u001b[39;00m outputs:\n",
97
+ "File \u001b[0;32m/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/vllm/utils.py:1021\u001b[0m, in \u001b[0;36mdeprecate_kwargs.<locals>.wrapper.<locals>.inner\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 1014\u001b[0m msg \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m \u001b[39m\u001b[38;5;132;01m{\u001b[39;00madditional_message\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1016\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[1;32m 1017\u001b[0m \u001b[38;5;167;01mDeprecationWarning\u001b[39;00m(msg),\n\u001b[1;32m 1018\u001b[0m stacklevel\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m3\u001b[39m, \u001b[38;5;66;03m# The inner function takes up one level\u001b[39;00m\n\u001b[1;32m 1019\u001b[0m )\n\u001b[0;32m-> 1021\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
98
+ "File \u001b[0;32m/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/vllm/entrypoints/llm.py:454\u001b[0m, in \u001b[0;36mLLM.generate\u001b[0;34m(self, prompts, sampling_params, prompt_token_ids, use_tqdm, lora_request, prompt_adapter_request, guided_options_request, priority)\u001b[0m\n\u001b[1;32m 450\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m sampling_params \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 451\u001b[0m \u001b[38;5;66;03m# Use default sampling params.\u001b[39;00m\n\u001b[1;32m 452\u001b[0m sampling_params \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mget_default_sampling_params()\n\u001b[0;32m--> 454\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_validate_and_add_requests\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 455\u001b[0m \u001b[43m \u001b[49m\u001b[43mprompts\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparsed_prompts\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 456\u001b[0m \u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msampling_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 457\u001b[0m \u001b[43m \u001b[49m\u001b[43mlora_request\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlora_request\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 458\u001b[0m \u001b[43m \u001b[49m\u001b[43mprompt_adapter_request\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mprompt_adapter_request\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 459\u001b[0m \u001b[43m \u001b[49m\u001b[43mguided_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mguided_options_request\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 460\u001b[0m \u001b[43m \u001b[49m\u001b[43mpriority\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpriority\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 462\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_run_engine(use_tqdm\u001b[38;5;241m=\u001b[39muse_tqdm)\n\u001b[1;32m 463\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mengine_class\u001b[38;5;241m.\u001b[39mvalidate_outputs(outputs, RequestOutput)\n",
99
+ "File \u001b[0;32m/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/vllm/entrypoints/llm.py:1175\u001b[0m, in \u001b[0;36mLLM._validate_and_add_requests\u001b[0;34m(self, prompts, params, lora_request, prompt_adapter_request, guided_options, priority)\u001b[0m\n\u001b[1;32m 1173\u001b[0m \u001b[38;5;66;03m# Add requests to the engine.\u001b[39;00m\n\u001b[1;32m 1174\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i, prompt \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(prompts):\n\u001b[0;32m-> 1175\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_add_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1176\u001b[0m \u001b[43m \u001b[49m\u001b[43mprompt\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1177\u001b[0m \u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[43m[\u001b[49m\u001b[43mi\u001b[49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43misinstance\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mSequence\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1178\u001b[0m \u001b[43m \u001b[49m\u001b[43mlora_request\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlora_request\u001b[49m\u001b[43m[\u001b[49m\u001b[43mi\u001b[49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43misinstance\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1179\u001b[0m \u001b[43m \u001b[49m\u001b[43mlora_request\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mSequence\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mlora_request\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1180\u001b[0m \u001b[43m \u001b[49m\u001b[43mprompt_adapter_request\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mprompt_adapter_request\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1181\u001b[0m \u001b[43m \u001b[49m\u001b[43mpriority\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpriority\u001b[49m\u001b[43m[\u001b[49m\u001b[43mi\u001b[49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mpriority\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1182\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
100
+ "File \u001b[0;32m/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/vllm/entrypoints/llm.py:1193\u001b[0m, in \u001b[0;36mLLM._add_request\u001b[0;34m(self, prompt, params, lora_request, prompt_adapter_request, priority)\u001b[0m\n\u001b[1;32m 1184\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_add_request\u001b[39m(\n\u001b[1;32m 1185\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 1186\u001b[0m prompt: PromptType,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1190\u001b[0m priority: \u001b[38;5;28mint\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m,\n\u001b[1;32m 1191\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 1192\u001b[0m request_id \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mstr\u001b[39m(\u001b[38;5;28mnext\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrequest_counter))\n\u001b[0;32m-> 1193\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mllm_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43madd_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1194\u001b[0m \u001b[43m \u001b[49m\u001b[43mrequest_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1195\u001b[0m \u001b[43m \u001b[49m\u001b[43mprompt\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1196\u001b[0m \u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1197\u001b[0m \u001b[43m \u001b[49m\u001b[43mlora_request\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlora_request\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1198\u001b[0m \u001b[43m \u001b[49m\u001b[43mprompt_adapter_request\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mprompt_adapter_request\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1199\u001b[0m \u001b[43m \u001b[49m\u001b[43mpriority\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpriority\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1200\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
101
+ "File \u001b[0;32m/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/vllm/utils.py:1021\u001b[0m, in \u001b[0;36mdeprecate_kwargs.<locals>.wrapper.<locals>.inner\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 1014\u001b[0m msg \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m \u001b[39m\u001b[38;5;132;01m{\u001b[39;00madditional_message\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1016\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[1;32m 1017\u001b[0m \u001b[38;5;167;01mDeprecationWarning\u001b[39;00m(msg),\n\u001b[1;32m 1018\u001b[0m stacklevel\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m3\u001b[39m, \u001b[38;5;66;03m# The inner function takes up one level\u001b[39;00m\n\u001b[1;32m 1019\u001b[0m )\n\u001b[0;32m-> 1021\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
102
+ "File \u001b[0;32m/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/vllm/engine/llm_engine.py:782\u001b[0m, in \u001b[0;36mLLMEngine.add_request\u001b[0;34m(self, request_id, prompt, params, arrival_time, lora_request, trace_headers, prompt_adapter_request, priority, inputs)\u001b[0m\n\u001b[1;32m 777\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtokenizer \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 778\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_validate_token_prompt(\n\u001b[1;32m 779\u001b[0m prompt,\n\u001b[1;32m 780\u001b[0m tokenizer\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mget_tokenizer(lora_request\u001b[38;5;241m=\u001b[39mlora_request))\n\u001b[0;32m--> 782\u001b[0m preprocessed_inputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minput_preprocessor\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpreprocess\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 783\u001b[0m \u001b[43m \u001b[49m\u001b[43mprompt\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 784\u001b[0m \u001b[43m \u001b[49m\u001b[43mrequest_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 785\u001b[0m \u001b[43m \u001b[49m\u001b[43mlora_request\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlora_request\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 786\u001b[0m \u001b[43m \u001b[49m\u001b[43mprompt_adapter_request\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mprompt_adapter_request\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 787\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 788\u001b[0m processed_inputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39minput_processor(preprocessed_inputs)\n\u001b[1;32m 790\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_add_processed_request(\n\u001b[1;32m 791\u001b[0m request_id\u001b[38;5;241m=\u001b[39mrequest_id,\n\u001b[1;32m 792\u001b[0m processed_inputs\u001b[38;5;241m=\u001b[39mprocessed_inputs,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 798\u001b[0m priority\u001b[38;5;241m=\u001b[39mpriority,\n\u001b[1;32m 799\u001b[0m )\n",
103
+ "File \u001b[0;32m/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/vllm/inputs/preprocess.py:640\u001b[0m, in \u001b[0;36mInputPreprocessor.preprocess\u001b[0;34m(self, prompt, request_id, lora_request, prompt_adapter_request)\u001b[0m\n\u001b[1;32m 636\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Preprocess the input prompt.\"\"\"\u001b[39;00m\n\u001b[1;32m 637\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_config\u001b[38;5;241m.\u001b[39mis_encoder_decoder:\n\u001b[1;32m 638\u001b[0m \u001b[38;5;66;03m# Encoder-decoder model requires special mapping of\u001b[39;00m\n\u001b[1;32m 639\u001b[0m \u001b[38;5;66;03m# input prompts to encoder & decoder\u001b[39;00m\n\u001b[0;32m--> 640\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_process_encoder_decoder_prompt\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 641\u001b[0m \u001b[43m \u001b[49m\u001b[43mprompt\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 642\u001b[0m \u001b[43m \u001b[49m\u001b[43mrequest_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 643\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 645\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_explicit_encoder_decoder_prompt(prompt):\n\u001b[1;32m 646\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot pass encoder-decoder prompt \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 647\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mto decoder-only models\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
104
+ "File \u001b[0;32m/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/vllm/inputs/preprocess.py:515\u001b[0m, in \u001b[0;36mInputPreprocessor._process_encoder_decoder_prompt\u001b[0;34m(self, prompt, request_id)\u001b[0m\n\u001b[1;32m 510\u001b[0m decoder_inputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_prompt_to_llm_inputs(\n\u001b[1;32m 511\u001b[0m decoder_input,\n\u001b[1;32m 512\u001b[0m request_id\u001b[38;5;241m=\u001b[39mrequest_id,\n\u001b[1;32m 513\u001b[0m )\n\u001b[1;32m 514\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 515\u001b[0m encoder_inputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_prompt_to_llm_inputs\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 516\u001b[0m \u001b[43m \u001b[49m\u001b[43mprompt\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 517\u001b[0m \u001b[43m \u001b[49m\u001b[43mrequest_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 518\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 520\u001b[0m decoder_inputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 522\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_build_enc_dec_llm_inputs(encoder_inputs, decoder_inputs)\n",
105
+ "File \u001b[0;32m/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/vllm/inputs/preprocess.py:289\u001b[0m, in \u001b[0;36mInputPreprocessor._prompt_to_llm_inputs\u001b[0;34m(self, prompt, request_id, lora_request)\u001b[0m\n\u001b[1;32m 270\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_prompt_to_llm_inputs\u001b[39m(\n\u001b[1;32m 271\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 272\u001b[0m prompt: SingletonPrompt,\n\u001b[1;32m 273\u001b[0m request_id: \u001b[38;5;28mstr\u001b[39m,\n\u001b[1;32m 274\u001b[0m lora_request: Optional[LoRARequest] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 275\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m SingletonInputs:\n\u001b[1;32m 276\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 277\u001b[0m \u001b[38;5;124;03m Extract the singleton inputs from a prompt.\u001b[39;00m\n\u001b[1;32m 278\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 287\u001b[0m \u001b[38;5;124;03m * :class:`SingletonInputs` instance\u001b[39;00m\n\u001b[1;32m 288\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 289\u001b[0m parsed \u001b[38;5;241m=\u001b[39m \u001b[43mparse_singleton_prompt\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprompt\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 291\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m parsed[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtype\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstr\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 292\u001b[0m prompt_text \u001b[38;5;241m=\u001b[39m parsed[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcontent\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n",
106
+ "File \u001b[0;32m/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/vllm/inputs/parse.py:98\u001b[0m, in \u001b[0;36mparse_singleton_prompt\u001b[0;34m(prompt)\u001b[0m\n\u001b[1;32m 95\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mprompt\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m prompt:\n\u001b[1;32m 96\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m ParsedTextPrompt(\u001b[38;5;28mtype\u001b[39m\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext\u001b[39m\u001b[38;5;124m\"\u001b[39m, content\u001b[38;5;241m=\u001b[39mprompt)\n\u001b[0;32m---> 98\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minputs must be a string, TextPrompt, or TokensPrompt\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
107
+ "\u001b[0;31mTypeError\u001b[0m: inputs must be a string, TextPrompt, or TokensPrompt"
108
+ ]
109
+ }
110
+ ],
111
+ "source": [
112
+ "'''\n",
113
+ "Demonstrate prompting of text-to-text\n",
114
+ "encoder/decoder models, specifically Florence-2\n",
115
+ "'''\n",
116
+ "# TODO(Isotr0py):\n",
117
+ "# Move to offline_inference_vision_language.py after porting vision backbone\n",
118
+ "from vllm import LLM, SamplingParams\n",
119
+ "\n",
120
+ "dtype = \"bfloat16\"\n",
121
+ "\n",
122
+ "# Create a Florence-2 encoder/decoder model instance\n",
123
+ "llm = LLM(\n",
124
+ " model=\"microsoft/Florence-2-base\",\n",
125
+ " tokenizer=\"facebook/bart-base\",\n",
126
+ " dtype=dtype,\n",
127
+ " trust_remote_code=True,\n",
128
+ ")\n",
129
+ "\n",
130
+ "prompts = [\n",
131
+ " \"<CAPTION>\", \"<DETAILED_CAPTION>\", \"<MORE_DETAILED_CAPTION>\",\n",
132
+ " \"<CAPTION_TO_PHRASE_GROUNDING>\", \"<OD>\", \"<DENSE_REGION_CAPTION>\",\n",
133
+ " \"<REGION_PROPOSAL>\", \"<OCR>\", \"<OCR_WITH_REGION>\"\n",
134
+ "]\n",
135
+ "# Create a sampling params object.\n",
136
+ "sampling_params = SamplingParams(\n",
137
+ " temperature=0,\n",
138
+ " top_p=1.0,\n",
139
+ " min_tokens=0,\n",
140
+ " max_tokens=20,\n",
141
+ ")\n",
142
+ "\n",
143
+ "# Generate output tokens from the prompts. The output is a list of\n",
144
+ "# RequestOutput objects that contain the prompt, generated\n",
145
+ "# text, and other information.\n",
146
+ "outputs = llm.generate(\n",
147
+ " {\n",
148
+ " \"prompts\": prompts,\n",
149
+ " \"multi_modal_data\": {\"image\": [temp_image]}\n",
150
+ " }, \n",
151
+ " sampling_params\n",
152
+ ")\n",
153
+ "\n",
154
+ "# Print the outputs.\n",
155
+ "for output in outputs:\n",
156
+ " prompt = output.prompt\n",
157
+ " encoder_prompt = output.encoder_prompt\n",
158
+ " generated_text = output.outputs[0].text\n",
159
+ " print(f\"Encoder prompt: {encoder_prompt!r}, \"\n",
160
+ " f\"Decoder prompt: {prompt!r}, \"\n",
161
+ " f\"Generated text: {generated_text!r}\")"
162
+ ]
163
+ },
164
+ {
165
+ "cell_type": "code",
166
+ "execution_count": 7,
167
+ "id": "efdc95be-45af-4099-9110-040622c2689a",
168
+ "metadata": {},
169
+ "outputs": [
170
+ {
171
+ "data": {
172
+ "text/plain": [
173
+ "\u001b[0;31mSignature:\u001b[0m\n",
174
+ "\u001b[0mllm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgenerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
175
+ "\u001b[0;34m\u001b[0m \u001b[0mprompts\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvllm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTextPrompt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvllm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTokensPrompt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvllm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mExplicitEncoderDecoderPrompt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mSequence\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvllm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTextPrompt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvllm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTokensPrompt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvllm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mExplicitEncoderDecoderPrompt\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mNoneType\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
176
+ "\u001b[0;34m\u001b[0m \u001b[0msampling_params\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mvllm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msampling_params\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSamplingParams\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mSequence\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mvllm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msampling_params\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSamplingParams\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mNoneType\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
177
+ "\u001b[0;34m\u001b[0m \u001b[0mprompt_token_ids\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mNoneType\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
178
+ "\u001b[0;34m\u001b[0m \u001b[0muse_tqdm\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mbool\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
179
+ "\u001b[0;34m\u001b[0m \u001b[0mlora_request\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mvllm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlora\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mLoRARequest\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvllm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlora\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mLoRARequest\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mNoneType\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
180
+ "\u001b[0;34m\u001b[0m \u001b[0mprompt_adapter_request\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mvllm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprompt_adapter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mPromptAdapterRequest\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
181
+ "\u001b[0;34m\u001b[0m \u001b[0mguided_options_request\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mvllm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel_executor\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mguided_decoding\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mguided_fields\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mLLMGuidedOptions\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvllm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel_executor\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mguided_decoding\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mguided_fields\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mGuidedDecodingRequest\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mNoneType\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
182
+ "\u001b[0;34m\u001b[0m \u001b[0mpriority\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
183
+ "\u001b[0;34m\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mvllm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moutputs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mRequestOutput\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
184
+ "\u001b[0;31mSource:\u001b[0m \n",
185
+ " \u001b[0;34m@\u001b[0m\u001b[0mdeprecate_kwargs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
186
+ "\u001b[0;34m\u001b[0m \u001b[0;34m\"prompt_token_ids\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
187
+ "\u001b[0;34m\u001b[0m \u001b[0mis_deprecated\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mlambda\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mLLM\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDEPRECATE_LEGACY\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
188
+ "\u001b[0;34m\u001b[0m \u001b[0madditional_message\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"Please use the 'prompts' parameter instead.\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
189
+ "\u001b[0;34m\u001b[0m \u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
190
+ "\u001b[0;34m\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mgenerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
191
+ "\u001b[0;34m\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
192
+ "\u001b[0;34m\u001b[0m \u001b[0mprompts\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mPromptType\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mSequence\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mPromptType\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
193
+ "\u001b[0;34m\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
194
+ "\u001b[0;34m\u001b[0m \u001b[0msampling_params\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mSamplingParams\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
195
+ "\u001b[0;34m\u001b[0m \u001b[0mSequence\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mSamplingParams\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
196
+ "\u001b[0;34m\u001b[0m \u001b[0mprompt_token_ids\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
197
+ "\u001b[0;34m\u001b[0m \u001b[0muse_tqdm\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mbool\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
198
+ "\u001b[0;34m\u001b[0m \u001b[0mlora_request\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mLoRARequest\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mLoRARequest\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
199
+ "\u001b[0;34m\u001b[0m \u001b[0mprompt_adapter_request\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mPromptAdapterRequest\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
200
+ "\u001b[0;34m\u001b[0m \u001b[0mguided_options_request\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mLLMGuidedOptions\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
201
+ "\u001b[0;34m\u001b[0m \u001b[0mGuidedDecodingRequest\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
202
+ "\u001b[0;34m\u001b[0m \u001b[0mpriority\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
203
+ "\u001b[0;34m\u001b[0m \u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mRequestOutput\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
204
+ "\u001b[0;34m\u001b[0m \u001b[0;34m\"\"\"Generates the completions for the input prompts.\u001b[0m\n",
205
+ "\u001b[0;34m\u001b[0m\n",
206
+ "\u001b[0;34m This class automatically batches the given prompts, considering\u001b[0m\n",
207
+ "\u001b[0;34m the memory constraint. For the best performance, put all of your prompts\u001b[0m\n",
208
+ "\u001b[0;34m into a single list and pass it to this method.\u001b[0m\n",
209
+ "\u001b[0;34m\u001b[0m\n",
210
+ "\u001b[0;34m Args:\u001b[0m\n",
211
+ "\u001b[0;34m prompts: The prompts to the LLM. You may pass a sequence of prompts\u001b[0m\n",
212
+ "\u001b[0;34m for batch inference. See :class:`~vllm.inputs.PromptType`\u001b[0m\n",
213
+ "\u001b[0;34m for more details about the format of each prompts.\u001b[0m\n",
214
+ "\u001b[0;34m sampling_params: The sampling parameters for text generation. If\u001b[0m\n",
215
+ "\u001b[0;34m None, we use the default sampling parameters.\u001b[0m\n",
216
+ "\u001b[0;34m When it is a single value, it is applied to every prompt.\u001b[0m\n",
217
+ "\u001b[0;34m When it is a list, the list must have the same length as the\u001b[0m\n",
218
+ "\u001b[0;34m prompts and it is paired one by one with the prompt.\u001b[0m\n",
219
+ "\u001b[0;34m use_tqdm: Whether to use tqdm to display the progress bar.\u001b[0m\n",
220
+ "\u001b[0;34m lora_request: LoRA request to use for generation, if any.\u001b[0m\n",
221
+ "\u001b[0;34m prompt_adapter_request: Prompt Adapter request to use for\u001b[0m\n",
222
+ "\u001b[0;34m generation, if any.\u001b[0m\n",
223
+ "\u001b[0;34m priority: The priority of the requests, if any.\u001b[0m\n",
224
+ "\u001b[0;34m Only applicable when priority scheduling policy is enabled.\u001b[0m\n",
225
+ "\u001b[0;34m\u001b[0m\n",
226
+ "\u001b[0;34m Returns:\u001b[0m\n",
227
+ "\u001b[0;34m A list of ``RequestOutput`` objects containing the\u001b[0m\n",
228
+ "\u001b[0;34m generated completions in the same order as the input prompts.\u001b[0m\n",
229
+ "\u001b[0;34m\u001b[0m\n",
230
+ "\u001b[0;34m Note:\u001b[0m\n",
231
+ "\u001b[0;34m Using ``prompts`` and ``prompt_token_ids`` as keyword parameters is\u001b[0m\n",
232
+ "\u001b[0;34m considered legacy and may be deprecated in the future. You should\u001b[0m\n",
233
+ "\u001b[0;34m instead pass them via the ``inputs`` parameter.\u001b[0m\n",
234
+ "\u001b[0;34m \"\"\"\u001b[0m\u001b[0;34m\u001b[0m\n",
235
+ "\u001b[0;34m\u001b[0m \u001b[0mrunner_type\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mllm_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel_config\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrunner_type\u001b[0m\u001b[0;34m\u001b[0m\n",
236
+ "\u001b[0;34m\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mrunner_type\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;34m\"generate\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
237
+ "\u001b[0;34m\u001b[0m \u001b[0mmessages\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m\u001b[0m\n",
238
+ "\u001b[0;34m\u001b[0m \u001b[0;34m\"LLM.generate() is only supported for (conditional) generation \"\u001b[0m\u001b[0;34m\u001b[0m\n",
239
+ "\u001b[0;34m\u001b[0m \u001b[0;34m\"models (XForCausalLM, XForConditionalGeneration).\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
240
+ "\u001b[0;34m\u001b[0m \u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\n",
241
+ "\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\n",
242
+ "\u001b[0;34m\u001b[0m \u001b[0msupported_runner_types\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mllm_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel_config\u001b[0m \\\n",
243
+ " \u001b[0;34m.\u001b[0m\u001b[0msupported_runner_types\u001b[0m\u001b[0;34m\u001b[0m\n",
244
+ "\u001b[0;34m\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m\"generate\"\u001b[0m \u001b[0;32min\u001b[0m \u001b[0msupported_runner_types\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
245
+ "\u001b[0;34m\u001b[0m \u001b[0mmessages\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
246
+ "\u001b[0;34m\u001b[0m \u001b[0;34m\"Your model supports the 'generate' runner, but is \"\u001b[0m\u001b[0;34m\u001b[0m\n",
247
+ "\u001b[0;34m\u001b[0m \u001b[0;34mf\"currently initialized for the '{runner_type}' runner. \"\u001b[0m\u001b[0;34m\u001b[0m\n",
248
+ "\u001b[0;34m\u001b[0m \u001b[0;34m\"Please initialize vLLM using `--task generate`.\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
249
+ "\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\n",
250
+ "\u001b[0;34m\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\" \"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmessages\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
251
+ "\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\n",
252
+ "\u001b[0;34m\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mprompt_token_ids\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
253
+ "\u001b[0;34m\u001b[0m \u001b[0mparsed_prompts\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_convert_v1_inputs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
254
+ "\u001b[0;34m\u001b[0m \u001b[0mprompts\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcast\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mprompts\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
255
+ "\u001b[0;34m\u001b[0m \u001b[0mprompt_token_ids\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mprompt_token_ids\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
256
+ "\u001b[0;34m\u001b[0m \u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
257
+ "\u001b[0;34m\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
258
+ "\u001b[0;34m\u001b[0m \u001b[0mparsed_prompts\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcast\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mPromptType\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mSequence\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mPromptType\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
259
+ "\u001b[0;34m\u001b[0m \u001b[0mprompts\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
260
+ "\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\n",
261
+ "\u001b[0;34m\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mguided_options_request\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
262
+ "\u001b[0;34m\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mguided_options_request\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
263
+ "\u001b[0;34m\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
264
+ "\u001b[0;34m\u001b[0m \u001b[0;34m\"You can only use one guided decoding but multiple is \"\u001b[0m\u001b[0;34m\u001b[0m\n",
265
+ "\u001b[0;34m\u001b[0m \u001b[0;34mf\"specified: {guided_options_request}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
266
+ "\u001b[0;34m\u001b[0m \u001b[0mguided_options_request\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mGuidedDecodingRequest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
267
+ "\u001b[0;34m\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mguided_options_request\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
268
+ "\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\n",
269
+ "\u001b[0;34m\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0msampling_params\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n",
270
+ "\u001b[0;34m\u001b[0m \u001b[0;31m# Use default sampling params.\u001b[0m\u001b[0;34m\u001b[0m\n",
271
+ "\u001b[0;34m\u001b[0m \u001b[0msampling_params\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_default_sampling_params\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
272
+ "\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\n",
273
+ "\u001b[0;34m\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_validate_and_add_requests\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
274
+ "\u001b[0;34m\u001b[0m \u001b[0mprompts\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mparsed_prompts\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
275
+ "\u001b[0;34m\u001b[0m \u001b[0mparams\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msampling_params\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
276
+ "\u001b[0;34m\u001b[0m \u001b[0mlora_request\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlora_request\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
277
+ "\u001b[0;34m\u001b[0m \u001b[0mprompt_adapter_request\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mprompt_adapter_request\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
278
+ "\u001b[0;34m\u001b[0m \u001b[0mguided_options\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mguided_options_request\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
279
+ "\u001b[0;34m\u001b[0m \u001b[0mpriority\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpriority\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
280
+ "\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\n",
281
+ "\u001b[0;34m\u001b[0m \u001b[0moutputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_run_engine\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0muse_tqdm\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0muse_tqdm\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n",
282
+ "\u001b[0;34m\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mengine_class\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalidate_outputs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mRequestOutput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
283
+ "\u001b[0;31mFile:\u001b[0m /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/vllm/entrypoints/llm.py\n",
284
+ "\u001b[0;31mType:\u001b[0m method"
285
+ ]
286
+ },
287
+ "metadata": {},
288
+ "output_type": "display_data"
289
+ }
290
+ ],
291
+ "source": [
292
+ " llm.generate??"
293
+ ]
294
+ },
295
+ {
296
+ "cell_type": "code",
297
+ "execution_count": null,
298
+ "id": "670b9a7f-f6c6-4b80-a117-5c3359eccfd6",
299
+ "metadata": {},
300
+ "outputs": [],
301
+ "source": [
302
+ "!nvidia-smi"
303
+ ]
304
+ },
305
+ {
306
+ "cell_type": "code",
307
+ "execution_count": null,
308
+ "id": "ce71d417-1fdb-4a14-bf85-31a9d5bb693d",
309
+ "metadata": {},
310
+ "outputs": [],
311
+ "source": [
312
+ "# Generate output tokens from the prompts. The output is a list of\n",
313
+ "# RequestOutput objects that contain the prompt, generated\n",
314
+ "# text, and other information.\n",
315
+ "outputs = llm.generate(\n",
316
+ " {\n",
317
+ " \"prompts\": prompts,\n",
318
+ " \"multi_modal_data\": {\"image\": [temp_image]}\n",
319
+ " }, \n",
320
+ " sampling_params\n",
321
+ ")\n",
322
+ "\n",
323
+ "# Print the outputs.\n",
324
+ "for output in outputs:\n",
325
+ " prompt = output.prompt\n",
326
+ " encoder_prompt = output.encoder_prompt\n",
327
+ " generated_text = output.outputs[0].text\n",
328
+ " print(f\"Encoder prompt: {encoder_prompt!r}, \"\n",
329
+ " f\"Decoder prompt: {prompt!r}, \"\n",
330
+ " f\"Generated text: {generated_text!r}\")"
331
+ ]
332
+ }
333
+ ],
334
+ "metadata": {
335
+ "kernelspec": {
336
+ "display_name": "vllm",
337
+ "language": "python",
338
+ "name": "vllm"
339
+ },
340
+ "language_info": {
341
+ "codemirror_mode": {
342
+ "name": "ipython",
343
+ "version": 3
344
+ },
345
+ "file_extension": ".py",
346
+ "mimetype": "text/x-python",
347
+ "name": "python",
348
+ "nbconvert_exporter": "python",
349
+ "pygments_lexer": "ipython3",
350
+ "version": "3.10.14"
351
+ }
352
+ },
353
+ "nbformat": 4,
354
+ "nbformat_minor": 5
355
+ }
a_mllm_notebooks/vllm/serve.sh ADDED
@@ -0,0 +1,452 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ eval "$(conda shell.bash hook)"
2
+ conda activate vllm
3
+
4
+ # MODEL_NAME=Qwen/Qwen2-VL-7B-Instruct-AWQ
5
+
6
+ MODEL_NAME=Qwen/Qwen2.5-VL-72B-Instruct-AWQ
7
+
8
+ # MODEL_NAME=OpenGVLab/InternVL2_5-8B-AWQ
9
+ # MODEL_NAME=microsoft/Florence-2-large
10
+
11
+ PORT=8001
12
+
13
+ CUDA_VISIBLE_DEVICES=0,1,2,3 \
14
+ vllm serve $MODEL_NAME \
15
+ --port $PORT \
16
+ -tp 4 \
17
+ --trust-remote-code \
18
+ --quantization awq \
19
+ --dtype float16
20
+
21
+
22
+ # --gpu-memory-utilization 0.7 \
23
+ # --cpu-offload-gb 10
24
+
25
+
26
+ # \
27
+ # &
28
+
29
+ # --gpu-memory-utilization 0.2 \
30
+
31
+ # --gpu-memory-utilization 0.8 \
32
+ # --cpu-offload-gb 80 \
33
+
34
+ #
35
+
36
+ # curl -X 'POST' \
37
+ # 'http://localhost:8082/nodes/add' \
38
+ # -H 'accept: application/json' \
39
+ # -H 'Content-Type: application/json' \
40
+ # -d "{
41
+ # \"url\": \"http://0.0.0.0:$PORT\"
42
+ # }"
43
+
44
+
45
+ # --quantization awq \
46
+
47
+
48
+
49
+
50
+ # usage: vllm serve <model_tag> [options]
51
+
52
+ # positional arguments:
53
+ # model_tag The model tag to serve
54
+
55
+ # options:
56
+ # --allow-credentials allow credentials
57
+ # --allowed-headers ALLOWED_HEADERS
58
+ # allowed headers
59
+ # --allowed-local-media-path ALLOWED_LOCAL_MEDIA_PATH
60
+ # Allowing API requests to read local images or videos from directories
61
+ # specified by the server file system. This is a security risk. Should only
62
+ # be enabled in trusted environments.
63
+ # --allowed-methods ALLOWED_METHODS
64
+ # allowed methods
65
+ # --allowed-origins ALLOWED_ORIGINS
66
+ # allowed origins
67
+ # --api-key API_KEY If provided, the server will require this key to be presented in the
68
+ # header.
69
+ # --block-size {8,16,32,64,128}
70
+ # Token block size for contiguous chunks of tokens. This is ignored on
71
+ # neuron devices and set to max-model-len
72
+ # --chat-template CHAT_TEMPLATE
73
+ # The file path to the chat template, or the template in single-line form
74
+ # for the specified model
75
+ # --chat-template-content-format {auto,string,openai}
76
+ # The format to render message content within a chat template. * "string"
77
+ # will render the content as a string. Example: "Hello World" * "openai"
78
+ # will render the content as a list of dictionaries, similar to OpenAI
79
+ # schema. Example: [{"type": "text", "text": "Hello world!"}]
80
+ # --code-revision CODE_REVISION
81
+ # The specific revision to use for the model code on Hugging Face Hub. It
82
+ # can be a branch name, a tag name, or a commit id. If unspecified, will use
83
+ # the default version.
84
+ # --collect-detailed-traces COLLECT_DETAILED_TRACES
85
+ # Valid choices are model,worker,all. It makes sense to set this only if
86
+ # --otlp-traces-endpoint is set. If set, it will collect detailed traces for
87
+ # the specified modules. This involves use of possibly costly and or
88
+ # blocking operations and hence might have a performance impact.
89
+ # --compilation-config COMPILATION_CONFIG, -O COMPILATION_CONFIG
90
+ # torch.compile configuration for the model.When it is a number (0, 1, 2,
91
+ # 3), it will be interpreted as the optimization level. NOTE: level 0 is the
92
+ # default level without any optimization. level 1 and 2 are for internal
93
+ # testing only. level 3 is the recommended level for production. To specify
94
+ # the full compilation config, use a JSON string. Following the convention
95
+ # of traditional compilers, using -O without space is also supported. -O3 is
96
+ # equivalent to -O 3.
97
+ # --config CONFIG Read CLI options from a config file.Must be a YAML with the following opti
98
+ # ons:https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html#c
99
+ # ommand-line-arguments-for-the-server
100
+ # --config-format {auto,hf,mistral}
101
+ # The format of the model config to load. * "auto" will try to load the
102
+ # config in hf format if available else it will try to load in mistral
103
+ # format
104
+ # --cpu-offload-gb CPU_OFFLOAD_GB
105
+ # The space in GiB to offload to CPU, per GPU. Default is 0, which means no
106
+ # offloading. Intuitively, this argument can be seen as a virtual way to
107
+ # increase the GPU memory size. For example, if you have one 24 GB GPU and
108
+ # set this to 10, virtually you can think of it as a 34 GB GPU. Then you can
109
+ # load a 13B model with BF16 weight, which requires at least 26GB GPU
110
+ # memory. Note that this requires fast CPU-GPU interconnect, as part of the
111
+ # model is loaded from CPU memory to GPU memory on the fly in each model
112
+ # forward pass.
113
+ # --device {auto,cuda,neuron,cpu,openvino,tpu,xpu,hpu}
114
+ # Device type for vLLM execution.
115
+ # --disable-async-output-proc
116
+ # Disable async output processing. This may result in lower performance.
117
+ # --disable-custom-all-reduce
118
+ # See ParallelConfig.
119
+ # --disable-fastapi-docs
120
+ # Disable FastAPI's OpenAPI schema, Swagger UI, and ReDoc endpoint
121
+ # --disable-frontend-multiprocessing
122
+ # If specified, will run the OpenAI frontend server in the same process as
123
+ # the model serving engine.
124
+ # --disable-log-requests
125
+ # Disable logging requests.
126
+ # --disable-log-stats Disable logging statistics.
127
+ # --disable-logprobs-during-spec-decoding [DISABLE_LOGPROBS_DURING_SPEC_DECODING]
128
+ # If set to True, token log probabilities are not returned during
129
+ # speculative decoding. If set to False, log probabilities are returned
130
+ # according to the settings in SamplingParams. If not specified, it defaults
131
+ # to True. Disabling log probabilities during speculative decoding reduces
132
+ # latency by skipping logprob calculation in proposal sampling, target
133
+ # sampling, and after accepted tokens are determined.
134
+ # --disable-sliding-window
135
+ # Disables sliding window, capping to sliding window size
136
+ # --distributed-executor-backend {ray,mp}
137
+ # Backend to use for distributed model workers, either "ray" or "mp"
138
+ # (multiprocessing). If the product of pipeline_parallel_size and
139
+ # tensor_parallel_size is less than or equal to the number of GPUs
140
+ # available, "mp" will be used to keep processing on a single host.
141
+ # Otherwise, this will default to "ray" if Ray is installed and fail
142
+ # otherwise. Note that tpu and hpu only support Ray for distributed
143
+ # inference.
144
+ # --download-dir DOWNLOAD_DIR
145
+ # Directory to download and load the weights, default to the default cache
146
+ # dir of huggingface.
147
+ # --dtype {auto,half,float16,bfloat16,float,float32}
148
+ # Data type for model weights and activations. * "auto" will use FP16
149
+ # precision for FP32 and FP16 models, and BF16 precision for BF16 models. *
150
+ # "half" for FP16. Recommended for AWQ quantization. * "float16" is the same
151
+ # as "half". * "bfloat16" for a balance between precision and range. *
152
+ # "float" is shorthand for FP32 precision. * "float32" for FP32 precision.
153
+ # --enable-auto-tool-choice
154
+ # Enable auto tool choice for supported models. Use --tool-call-parser to
155
+ # specify which parser to use
156
+ # --enable-chunked-prefill [ENABLE_CHUNKED_PREFILL]
157
+ # If set, the prefill requests can be chunked based on the
158
+ # max_num_batched_tokens.
159
+ # --enable-lora If True, enable handling of LoRA adapters.
160
+ # --enable-lora-bias If True, enable bias for LoRA adapters.
161
+ # --enable-prefix-caching, --no-enable-prefix-caching
162
+ # Enables automatic prefix caching. Use --no-enable-prefix-caching to
163
+ # disable explicitly.
164
+ # --enable-prompt-adapter
165
+ # If True, enable handling of PromptAdapters.
166
+ # --enable-prompt-tokens-details
167
+ # If set to True, enable prompt_tokens_details in usage.
168
+ # --enforce-eager Always use eager-mode PyTorch. If False, will use eager mode and CUDA
169
+ # graph in hybrid for maximal performance and flexibility.
170
+ # --fully-sharded-loras
171
+ # By default, only half of the LoRA computation is sharded with tensor
172
+ # parallelism. Enabling this will use the fully sharded layers. At high
173
+ # sequence length, max rank or tensor parallel size, this is likely faster.
174
+ # --gpu-memory-utilization GPU_MEMORY_UTILIZATION
175
+ # The fraction of GPU memory to be used for the model executor, which can
176
+ # range from 0 to 1. For example, a value of 0.5 would imply 50% GPU memory
177
+ # utilization. If unspecified, will use the default value of 0.9. This is a
178
+ # global gpu memory utilization limit, for example if 50% of the gpu memory
179
+ # is already used before vLLM starts and --gpu-memory-utilization is set to
180
+ # 0.9, then only 40% of the gpu memory will be allocated to the model
181
+ # executor.
182
+ # --guided-decoding-backend {outlines,lm-format-enforcer,xgrammar}
183
+ # Which engine will be used for guided decoding (JSON schema / regex etc) by
184
+ # default. Currently support https://github.com/outlines-
185
+ # dev/outlines,https://github.com/mlc-ai/xgrammar, and
186
+ # https://github.com/noamgat/lm-format-enforcer. Can be overridden per
187
+ # request via guided_decoding_backend parameter.
188
+ # --hf-overrides HF_OVERRIDES
189
+ # Extra arguments for the HuggingFace config. This should be a JSON string
190
+ # that will be parsed into a dictionary.
191
+ # --host HOST host name
192
+ # --ignore-patterns IGNORE_PATTERNS
193
+ # The pattern(s) to ignore when loading the model.Default to `original/**/*`
194
+ # to avoid repeated loading of llama's checkpoints.
195
+ # --kv-cache-dtype {auto,fp8,fp8_e5m2,fp8_e4m3}
196
+ # Data type for kv cache storage. If "auto", will use model data type. CUDA
197
+ # 11.8+ supports fp8 (=fp8_e4m3) and fp8_e5m2. ROCm (AMD GPU) supports fp8
198
+ # (=fp8_e4m3)
199
+ # --kv-transfer-config KV_TRANSFER_CONFIG
200
+ # The configurations for distributed KV cache transfer. Should be a JSON
201
+ # string.
202
+ # --limit-mm-per-prompt LIMIT_MM_PER_PROMPT
203
+ # For each multimodal plugin, limit how many input instances to allow for
204
+ # each prompt. Expects a comma-separated list of items, e.g.:
205
+ # `image=16,video=2` allows a maximum of 16 images and 2 videos per prompt.
206
+ # Defaults to 1 for each modality.
207
+ # --load-format {auto,pt,safetensors,npcache,dummy,tensorizer,sharded_state,gguf,bitsandbytes,mistral}
208
+ # The format of the model weights to load. * "auto" will try to load the
209
+ # weights in the safetensors format and fall back to the pytorch bin format
210
+ # if safetensors format is not available. * "pt" will load the weights in
211
+ # the pytorch bin format. * "safetensors" will load the weights in the
212
+ # safetensors format. * "npcache" will load the weights in pytorch format
213
+ # and store a numpy cache to speed up the loading. * "dummy" will initialize
214
+ # the weights with random values, which is mainly for profiling. *
215
+ # "tensorizer" will load the weights using tensorizer from CoreWeave. See
216
+ # the Tensorize vLLM Model script in the Examples section for more
217
+ # information. * "bitsandbytes" will load the weights using bitsandbytes
218
+ # quantization.
219
+ # --long-lora-scaling-factors LONG_LORA_SCALING_FACTORS
220
+ # Specify multiple scaling factors (which can be different from base model
221
+ # scaling factor - see eg. Long LoRA) to allow for multiple LoRA adapters
222
+ # trained with those scaling factors to be used at the same time. If not
223
+ # specified, only adapters trained with the base model scaling factor are
224
+ # allowed.
225
+ # --lora-dtype {auto,float16,bfloat16}
226
+ # Data type for LoRA. If auto, will default to base model dtype.
227
+ # --lora-extra-vocab-size LORA_EXTRA_VOCAB_SIZE
228
+ # Maximum size of extra vocabulary that can be present in a LoRA adapter
229
+ # (added to the base model vocabulary).
230
+ # --lora-modules LORA_MODULES [LORA_MODULES ...]
231
+ # LoRA module configurations in either 'name=path' formator JSON format.
232
+ # Example (old format): 'name=path' Example (new format): '{"name": "name",
233
+ # "local_path": "path", "base_model_name": "id"}'
234
+ # --max-cpu-loras MAX_CPU_LORAS
235
+ # Maximum number of LoRAs to store in CPU memory. Must be >= than max_loras.
236
+ # Defaults to max_loras.
237
+ # --max-log-len MAX_LOG_LEN
238
+ # Max number of prompt characters or prompt ID numbers being printed in log.
239
+ # Default: Unlimited
240
+ # --max-logprobs MAX_LOGPROBS
241
+ # Max number of log probs to return logprobs is specified in SamplingParams.
242
+ # --max-lora-rank MAX_LORA_RANK
243
+ # Max LoRA rank.
244
+ # --max-loras MAX_LORAS
245
+ # Max number of LoRAs in a single batch.
246
+ # --max-model-len MAX_MODEL_LEN
247
+ # Model context length. If unspecified, will be automatically derived from
248
+ # the model config.
249
+ # --max-num-batched-tokens MAX_NUM_BATCHED_TOKENS
250
+ # Maximum number of batched tokens per iteration.
251
+ # --max-num-seqs MAX_NUM_SEQS
252
+ # Maximum number of sequences per iteration.
253
+ # --max-parallel-loading-workers MAX_PARALLEL_LOADING_WORKERS
254
+ # Load model sequentially in multiple batches, to avoid RAM OOM when using
255
+ # tensor parallel and large models.
256
+ # --max-prompt-adapter-token MAX_PROMPT_ADAPTER_TOKEN
257
+ # Max number of PromptAdapters tokens
258
+ # --max-prompt-adapters MAX_PROMPT_ADAPTERS
259
+ # Max number of PromptAdapters in a batch.
260
+ # --max-seq-len-to-capture MAX_SEQ_LEN_TO_CAPTURE
261
+ # Maximum sequence length covered by CUDA graphs. When a sequence has
262
+ # context length larger than this, we fall back to eager mode. Additionally
263
+ # for encoder-decoder models, if the sequence length of the encoder input is
264
+ # larger than this, we fall back to the eager mode.
265
+ # --middleware MIDDLEWARE
266
+ # Additional ASGI middleware to apply to the app. We accept multiple
267
+ # --middleware arguments. The value should be an import path. If a function
268
+ # is provided, vLLM will add it to the server using @app.middleware('http').
269
+ # If a class is provided, vLLM will add it to the server using
270
+ # app.add_middleware().
271
+ # --mm-processor-kwargs MM_PROCESSOR_KWARGS
272
+ # Overrides for the multimodal input mapping/processing, e.g., image
273
+ # processor. For example: {"num_crops": 4}.
274
+ # --model MODEL Name or path of the huggingface model to use.
275
+ # --model-loader-extra-config MODEL_LOADER_EXTRA_CONFIG
276
+ # Extra config for model loader. This will be passed to the model loader
277
+ # corresponding to the chosen load_format. This should be a JSON string that
278
+ # will be parsed into a dictionary.
279
+ # --multi-step-stream-outputs [MULTI_STEP_STREAM_OUTPUTS]
280
+ # If False, then multi-step will stream outputs at the end of all steps
281
+ # --ngram-prompt-lookup-max NGRAM_PROMPT_LOOKUP_MAX
282
+ # Max size of window for ngram prompt lookup in speculative decoding.
283
+ # --ngram-prompt-lookup-min NGRAM_PROMPT_LOOKUP_MIN
284
+ # Min size of window for ngram prompt lookup in speculative decoding.
285
+ # --num-gpu-blocks-override NUM_GPU_BLOCKS_OVERRIDE
286
+ # If specified, ignore GPU profiling result and use this number of GPU
287
+ # blocks. Used for testing preemption.
288
+ # --num-lookahead-slots NUM_LOOKAHEAD_SLOTS
289
+ # Experimental scheduling config necessary for speculative decoding. This
290
+ # will be replaced by speculative config in the future; it is present to
291
+ # enable correctness tests until then.
292
+ # --num-scheduler-steps NUM_SCHEDULER_STEPS
293
+ # Maximum number of forward steps per scheduler call.
294
+ # --num-speculative-tokens NUM_SPECULATIVE_TOKENS
295
+ # The number of speculative tokens to sample from the draft model in
296
+ # speculative decoding.
297
+ # --otlp-traces-endpoint OTLP_TRACES_ENDPOINT
298
+ # Target URL to which OpenTelemetry traces will be sent.
299
+ # --override-neuron-config OVERRIDE_NEURON_CONFIG
300
+ # Override or set neuron device configuration. e.g. {"cast_logits_dtype":
301
+ # "bloat16"}.'
302
+ # --override-pooler-config OVERRIDE_POOLER_CONFIG
303
+ # Override or set the pooling method in the embedding model. e.g.
304
+ # {"pooling_type": "mean", "normalize": false}.'
305
+ # --pipeline-parallel-size PIPELINE_PARALLEL_SIZE, -pp PIPELINE_PARALLEL_SIZE
306
+ # Number of pipeline stages.
307
+ # --port PORT port number
308
+ # --preemption-mode PREEMPTION_MODE
309
+ # If 'recompute', the engine performs preemption by recomputing; If 'swap',
310
+ # the engine performs preemption by block swapping.
311
+ # --prompt-adapters PROMPT_ADAPTERS [PROMPT_ADAPTERS ...]
312
+ # Prompt adapter configurations in the format name=path. Multiple adapters
313
+ # can be specified.
314
+ # --qlora-adapter-name-or-path QLORA_ADAPTER_NAME_OR_PATH
315
+ # Name or path of the QLoRA adapter.
316
+ # --quantization {aqlm,awq,deepspeedfp,tpu_int8,fp8,fbgemm_fp8,modelopt,marlin,gguf,gptq_marlin_24,gptq_marlin,awq_marlin,gptq,compressed-tensors,bitsandbytes,qqq,hqq,experts_int8,neuron_quant,ipex,None}, -q {aqlm,awq,deepspeedfp,tpu_int8,fp8,fbgemm_fp8,modelopt,marlin,gguf,gptq_marlin_24,gptq_marlin,awq_marlin,gptq,compressed-tensors,bitsandbytes,qqq,hqq,experts_int8,neuron_quant,ipex,None}
317
+ # Method used to quantize the weights. If None, we first check the
318
+ # `quantization_config` attribute in the model config file. If that is None,
319
+ # we assume the model weights are not quantized and use `dtype` to determine
320
+ # the data type of the weights.
321
+ # --quantization-param-path QUANTIZATION_PARAM_PATH
322
+ # Path to the JSON file containing the KV cache scaling factors. This should
323
+ # generally be supplied, when KV cache dtype is FP8. Otherwise, KV cache
324
+ # scaling factors default to 1.0, which may cause accuracy issues. FP8_E5M2
325
+ # (without scaling) is only supported on cuda version greater than 11.8. On
326
+ # ROCm (AMD GPU), FP8_E4M3 is instead supported for common inference
327
+ # criteria.
328
+ # --ray-workers-use-nsight
329
+ # If specified, use nsight to profile Ray workers.
330
+ # --response-role RESPONSE_ROLE
331
+ # The role name to return if `request.add_generation_prompt=true`.
332
+ # --return-tokens-as-token-ids
333
+ # When --max-logprobs is specified, represents single tokens as strings of
334
+ # the form 'token_id:{token_id}' so that tokens that are not JSON-encodable
335
+ # can be identified.
336
+ # --revision REVISION The specific model version to use. It can be a branch name, a tag name, or
337
+ # a commit id. If unspecified, will use the default version.
338
+ # --root-path ROOT_PATH
339
+ # FastAPI root_path when app is behind a path based routing proxy
340
+ # --rope-scaling ROPE_SCALING
341
+ # RoPE scaling configuration in JSON format. For example,
342
+ # {"rope_type":"dynamic","factor":2.0}
343
+ # --rope-theta ROPE_THETA
344
+ # RoPE theta. Use with `rope_scaling`. In some cases, changing the RoPE
345
+ # theta improves the performance of the scaled model.
346
+ # --scheduler-delay-factor SCHEDULER_DELAY_FACTOR
347
+ # Apply a delay (of delay factor multiplied by previous prompt latency)
348
+ # before scheduling next prompt.
349
+ # --scheduling-policy {fcfs,priority}
350
+ # The scheduling policy to use. "fcfs" (first come first served, i.e.
351
+ # requests are handled in order of arrival; default) or "priority" (requests
352
+ # are handled based on given priority (lower value means earlier handling)
353
+ # and time of arrival deciding any ties).
354
+ # --seed SEED Random seed for operations.
355
+ # --served-model-name SERVED_MODEL_NAME [SERVED_MODEL_NAME ...]
356
+ # The model name(s) used in the API. If multiple names are provided, the
357
+ # server will respond to any of the provided names. The model name in the
358
+ # model field of a response will be the first name in this list. If not
359
+ # specified, the model name will be the same as the `--model` argument.
360
+ # Noted that this name(s) will also be used in `model_name` tag content of
361
+ # prometheus metrics, if multiple names provided, metrics tag will take the
362
+ # first one.
363
+ # --skip-tokenizer-init
364
+ # Skip initialization of tokenizer and detokenizer
365
+ # --spec-decoding-acceptance-method {rejection_sampler,typical_acceptance_sampler}
366
+ # Specify the acceptance method to use during draft token verification in
367
+ # speculative decoding. Two types of acceptance routines are supported: 1)
368
+ # RejectionSampler which does not allow changing the acceptance rate of
369
+ # draft tokens, 2) TypicalAcceptanceSampler which is configurable, allowing
370
+ # for a higher acceptance rate at the cost of lower quality, and vice versa.
371
+ # --speculative-disable-by-batch-size SPECULATIVE_DISABLE_BY_BATCH_SIZE
372
+ # Disable speculative decoding for new incoming requests if the number of
373
+ # enqueue requests is larger than this value.
374
+ # --speculative-disable-mqa-scorer
375
+ # If set to True, the MQA scorer will be disabled in speculative and fall
376
+ # back to batch expansion
377
+ # --speculative-draft-tensor-parallel-size SPECULATIVE_DRAFT_TENSOR_PARALLEL_SIZE, -spec-draft-tp SPECULATIVE_DRAFT_TENSOR_PARALLEL_SIZE
378
+ # Number of tensor parallel replicas for the draft model in speculative
379
+ # decoding.
380
+ # --speculative-max-model-len SPECULATIVE_MAX_MODEL_LEN
381
+ # The maximum sequence length supported by the draft model. Sequences over
382
+ # this length will skip speculation.
383
+ # --speculative-model SPECULATIVE_MODEL
384
+ # The name of the draft model to be used in speculative decoding.
385
+ # --speculative-model-quantization {aqlm,awq,deepspeedfp,tpu_int8,fp8,fbgemm_fp8,modelopt,marlin,gguf,gptq_marlin_24,gptq_marlin,awq_marlin,gptq,compressed-tensors,bitsandbytes,qqq,hqq,experts_int8,neuron_quant,ipex,None}
386
+ # Method used to quantize the weights of speculative model. If None, we
387
+ # first check the `quantization_config` attribute in the model config file.
388
+ # If that is None, we assume the model weights are not quantized and use
389
+ # `dtype` to determine the data type of the weights.
390
+ # --ssl-ca-certs SSL_CA_CERTS
391
+ # The CA certificates file
392
+ # --ssl-cert-reqs SSL_CERT_REQS
393
+ # Whether client certificate is required (see stdlib ssl module's)
394
+ # --ssl-certfile SSL_CERTFILE
395
+ # The file path to the SSL cert file
396
+ # --ssl-keyfile SSL_KEYFILE
397
+ # The file path to the SSL key file
398
+ # --swap-space SWAP_SPACE
399
+ # CPU swap space size (GiB) per GPU.
400
+ # --task {auto,generate,embedding}
401
+ # The task to use the model for. Each vLLM instance only supports one task,
402
+ # even if the same model can be used for multiple tasks. When the model only
403
+ # supports one task, "auto" can be used to select it; otherwise, you must
404
+ # specify explicitly which task to use.
405
+ # --tensor-parallel-size TENSOR_PARALLEL_SIZE, -tp TENSOR_PARALLEL_SIZE
406
+ # Number of tensor parallel replicas.
407
+ # --tokenizer TOKENIZER
408
+ # Name or path of the huggingface tokenizer to use. If unspecified, model
409
+ # name or path will be used.
410
+ # --tokenizer-mode {auto,slow,mistral}
411
+ # The tokenizer mode. * "auto" will use the fast tokenizer if available. *
412
+ # "slow" will always use the slow tokenizer. * "mistral" will always use the
413
+ # `mistral_common` tokenizer.
414
+ # --tokenizer-pool-extra-config TOKENIZER_POOL_EXTRA_CONFIG
415
+ # Extra config for tokenizer pool. This should be a JSON string that will be
416
+ # parsed into a dictionary. Ignored if tokenizer_pool_size is 0.
417
+ # --tokenizer-pool-size TOKENIZER_POOL_SIZE
418
+ # Size of tokenizer pool to use for asynchronous tokenization. If 0, will
419
+ # use synchronous tokenization.
420
+ # --tokenizer-pool-type TOKENIZER_POOL_TYPE
421
+ # Type of tokenizer pool to use for asynchronous tokenization. Ignored if
422
+ # tokenizer_pool_size is 0.
423
+ # --tokenizer-revision TOKENIZER_REVISION
424
+ # Revision of the huggingface tokenizer to use. It can be a branch name, a
425
+ # tag name, or a commit id. If unspecified, will use the default version.
426
+ # --tool-call-parser {granite-20b-fc,granite,hermes,internlm,jamba,llama3_json,mistral,pythonic} or name registered in --tool-parser-plugin
427
+ # Select the tool call parser depending on the model that you're using. This
428
+ # is used to parse the model-generated tool call into OpenAI API format.
429
+ # Required for --enable-auto-tool-choice.
430
+ # --tool-parser-plugin TOOL_PARSER_PLUGIN
431
+ # Special the tool parser plugin write to parse the model-generated tool
432
+ # into OpenAI API format, the name register in this plugin can be used in
433
+ # --tool-call-parser.
434
+ # --trust-remote-code Trust remote code from huggingface.
435
+ # --typical-acceptance-sampler-posterior-alpha TYPICAL_ACCEPTANCE_SAMPLER_POSTERIOR_ALPHA
436
+ # A scaling factor for the entropy-based threshold for token acceptance in
437
+ # the TypicalAcceptanceSampler. Typically defaults to sqrt of --typical-
438
+ # acceptance-sampler-posterior-threshold i.e. 0.3
439
+ # --typical-acceptance-sampler-posterior-threshold TYPICAL_ACCEPTANCE_SAMPLER_POSTERIOR_THRESHOLD
440
+ # Set the lower bound threshold for the posterior probability of a token to
441
+ # be accepted. This threshold is used by the TypicalAcceptanceSampler to
442
+ # make sampling decisions during speculative decoding. Defaults to 0.09
443
+ # --use-v2-block-manager
444
+ # [DEPRECATED] block manager v1 has been removed and
445
+ # SelfAttnBlockSpaceManager (i.e. block manager v2) is now the default.
446
+ # Setting this flag to True or False has no effect on vLLM behavior.
447
+ # --uvicorn-log-level {debug,info,warning,error,critical,trace}
448
+ # log level for uvicorn
449
+ # --worker-cls WORKER_CLS
450
+ # The worker class to use for distributed execution.
451
+ # --worker-use-ray Deprecated, use --distributed-executor-backend=ray.
452
+ # -h, --help show this help message and exit
a_mllm_notebooks/vllm/start.ipynb ADDED
@@ -0,0 +1,432 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 3,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "from vllm import LLM, SamplingParams"
10
+ ]
11
+ },
12
+ {
13
+ "cell_type": "code",
14
+ "execution_count": 9,
15
+ "metadata": {},
16
+ "outputs": [
17
+ {
18
+ "name": "stdout",
19
+ "output_type": "stream",
20
+ "text": [
21
+ "File downloaded successfully: quickstart.html\n"
22
+ ]
23
+ }
24
+ ],
25
+ "source": [
26
+ "import requests\n",
27
+ "\n",
28
+ "def download_file(url, filename):\n",
29
+ " try:\n",
30
+ " r = requests.get(url)\n",
31
+ " r.raise_for_status() # Check if the request was successful\n",
32
+ " with open(filename, 'wb') as f:\n",
33
+ " f.write(r.content)\n",
34
+ " print(f\"File downloaded successfully: {filename}\")\n",
35
+ " except requests.exceptions.RequestException as e:\n",
36
+ " print(f\"Failed to download file: {e}\")\n",
37
+ "\n",
38
+ "download_file('https://docs.vllm.ai/en/stable/getting_started/quickstart.html', 'quickstart.html')"
39
+ ]
40
+ },
41
+ {
42
+ "cell_type": "code",
43
+ "execution_count": null,
44
+ "metadata": {},
45
+ "outputs": [
46
+ {
47
+ "name": "stdout",
48
+ "output_type": "stream",
49
+ "text": [
50
+ "\u001b[33mWARNING: Ignoring invalid distribution -vidia-cublas-cu12 (/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages)\u001b[0m\u001b[33m\n",
51
+ "\u001b[0mCollecting jupyter\n",
52
+ " Downloading jupyter-1.1.1-py2.py3-none-any.whl.metadata (2.0 kB)\n",
53
+ "Collecting notebook (from jupyter)\n",
54
+ " Downloading notebook-7.3.2-py3-none-any.whl.metadata (10 kB)\n",
55
+ "Collecting jupyter-console (from jupyter)\n",
56
+ " Downloading jupyter_console-6.6.3-py3-none-any.whl.metadata (5.8 kB)\n",
57
+ "Collecting nbconvert (from jupyter)\n",
58
+ " Downloading nbconvert-7.16.4-py3-none-any.whl.metadata (8.5 kB)\n",
59
+ "Requirement already satisfied: ipykernel in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from jupyter) (6.29.5)\n",
60
+ "Collecting ipywidgets (from jupyter)\n",
61
+ " Downloading ipywidgets-8.1.5-py3-none-any.whl.metadata (2.3 kB)\n",
62
+ "Collecting jupyterlab (from jupyter)\n",
63
+ " Downloading jupyterlab-4.3.4-py3-none-any.whl.metadata (16 kB)\n",
64
+ "Requirement already satisfied: comm>=0.1.1 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from ipykernel->jupyter) (0.2.2)\n",
65
+ "Requirement already satisfied: debugpy>=1.6.5 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from ipykernel->jupyter) (1.8.9)\n",
66
+ "Requirement already satisfied: ipython>=7.23.1 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from ipykernel->jupyter) (8.30.0)\n",
67
+ "Requirement already satisfied: jupyter-client>=6.1.12 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from ipykernel->jupyter) (8.6.3)\n",
68
+ "Requirement already satisfied: jupyter-core!=5.0.*,>=4.12 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from ipykernel->jupyter) (5.7.2)\n",
69
+ "Requirement already satisfied: matplotlib-inline>=0.1 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from ipykernel->jupyter) (0.1.7)\n",
70
+ "Requirement already satisfied: nest-asyncio in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from ipykernel->jupyter) (1.6.0)\n",
71
+ "Requirement already satisfied: packaging in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from ipykernel->jupyter) (24.1)\n",
72
+ "Requirement already satisfied: psutil in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from ipykernel->jupyter) (6.0.0)\n",
73
+ "Requirement already satisfied: pyzmq>=24 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from ipykernel->jupyter) (26.2.0)\n",
74
+ "Requirement already satisfied: tornado>=6.1 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from ipykernel->jupyter) (6.4.2)\n",
75
+ "Requirement already satisfied: traitlets>=5.4.0 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from ipykernel->jupyter) (5.14.3)\n",
76
+ "Collecting widgetsnbextension~=4.0.12 (from ipywidgets->jupyter)\n",
77
+ " Downloading widgetsnbextension-4.0.13-py3-none-any.whl.metadata (1.6 kB)\n",
78
+ "Collecting jupyterlab-widgets~=3.0.12 (from ipywidgets->jupyter)\n",
79
+ " Downloading jupyterlab_widgets-3.0.13-py3-none-any.whl.metadata (4.1 kB)\n",
80
+ "Requirement already satisfied: prompt-toolkit>=3.0.30 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from jupyter-console->jupyter) (3.0.48)\n",
81
+ "Requirement already satisfied: pygments in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from jupyter-console->jupyter) (2.18.0)\n",
82
+ "Collecting async-lru>=1.0.0 (from jupyterlab->jupyter)\n",
83
+ " Downloading async_lru-2.0.4-py3-none-any.whl.metadata (4.5 kB)\n",
84
+ "Requirement already satisfied: httpx>=0.25.0 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from jupyterlab->jupyter) (0.27.2)\n",
85
+ "Requirement already satisfied: jinja2>=3.0.3 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from jupyterlab->jupyter) (3.1.4)\n",
86
+ "Collecting jupyter-lsp>=2.0.0 (from jupyterlab->jupyter)\n",
87
+ " Downloading jupyter_lsp-2.2.5-py3-none-any.whl.metadata (1.8 kB)\n",
88
+ "Collecting jupyter-server<3,>=2.4.0 (from jupyterlab->jupyter)\n",
89
+ " Downloading jupyter_server-2.15.0-py3-none-any.whl.metadata (8.4 kB)\n",
90
+ "Collecting jupyterlab-server<3,>=2.27.1 (from jupyterlab->jupyter)\n",
91
+ " Downloading jupyterlab_server-2.27.3-py3-none-any.whl.metadata (5.9 kB)\n",
92
+ "Collecting notebook-shim>=0.2 (from jupyterlab->jupyter)\n",
93
+ " Downloading notebook_shim-0.2.4-py3-none-any.whl.metadata (4.0 kB)\n",
94
+ "Requirement already satisfied: setuptools>=40.8.0 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from jupyterlab->jupyter) (72.1.0)\n",
95
+ "Requirement already satisfied: tomli>=1.2.2 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from jupyterlab->jupyter) (2.2.1)\n",
96
+ "Requirement already satisfied: beautifulsoup4 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from nbconvert->jupyter) (4.12.3)\n",
97
+ "Collecting bleach!=5.0.0 (from nbconvert->jupyter)\n",
98
+ " Downloading bleach-6.2.0-py3-none-any.whl.metadata (30 kB)\n",
99
+ "Collecting defusedxml (from nbconvert->jupyter)\n",
100
+ " Downloading defusedxml-0.7.1-py2.py3-none-any.whl.metadata (32 kB)\n",
101
+ "Collecting jupyterlab-pygments (from nbconvert->jupyter)\n",
102
+ " Downloading jupyterlab_pygments-0.3.0-py3-none-any.whl.metadata (4.4 kB)\n",
103
+ "Requirement already satisfied: markupsafe>=2.0 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from nbconvert->jupyter) (2.1.5)\n",
104
+ "Collecting mistune<4,>=2.0.3 (from nbconvert->jupyter)\n",
105
+ " Downloading mistune-3.0.2-py3-none-any.whl.metadata (1.7 kB)\n",
106
+ "Collecting nbclient>=0.5.0 (from nbconvert->jupyter)\n",
107
+ " Downloading nbclient-0.10.2-py3-none-any.whl.metadata (8.3 kB)\n",
108
+ "Requirement already satisfied: nbformat>=5.7 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from nbconvert->jupyter) (5.10.4)\n",
109
+ "Collecting pandocfilters>=1.4.1 (from nbconvert->jupyter)\n",
110
+ " Downloading pandocfilters-1.5.1-py2.py3-none-any.whl.metadata (9.0 kB)\n",
111
+ "Collecting tinycss2 (from nbconvert->jupyter)\n",
112
+ " Downloading tinycss2-1.4.0-py3-none-any.whl.metadata (3.0 kB)\n",
113
+ "Requirement already satisfied: typing-extensions>=4.0.0 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from async-lru>=1.0.0->jupyterlab->jupyter) (4.12.2)\n",
114
+ "Collecting webencodings (from bleach!=5.0.0->nbconvert->jupyter)\n",
115
+ " Downloading webencodings-0.5.1-py2.py3-none-any.whl.metadata (2.1 kB)\n",
116
+ "Requirement already satisfied: anyio in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from httpx>=0.25.0->jupyterlab->jupyter) (4.4.0)\n",
117
+ "Requirement already satisfied: certifi in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from httpx>=0.25.0->jupyterlab->jupyter) (2024.8.30)\n",
118
+ "Requirement already satisfied: httpcore==1.* in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from httpx>=0.25.0->jupyterlab->jupyter) (1.0.5)\n",
119
+ "Requirement already satisfied: idna in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from httpx>=0.25.0->jupyterlab->jupyter) (3.10)\n",
120
+ "Requirement already satisfied: sniffio in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from httpx>=0.25.0->jupyterlab->jupyter) (1.3.1)\n",
121
+ "Requirement already satisfied: h11<0.15,>=0.13 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from httpcore==1.*->httpx>=0.25.0->jupyterlab->jupyter) (0.14.0)\n",
122
+ "Requirement already satisfied: decorator in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyter) (5.1.1)\n",
123
+ "Requirement already satisfied: exceptiongroup in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyter) (1.2.2)\n",
124
+ "Requirement already satisfied: jedi>=0.16 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyter) (0.19.2)\n",
125
+ "Requirement already satisfied: pexpect>4.3 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyter) (4.9.0)\n",
126
+ "Requirement already satisfied: stack_data in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from ipython>=7.23.1->ipykernel->jupyter) (0.6.2)\n",
127
+ "Requirement already satisfied: python-dateutil>=2.8.2 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from jupyter-client>=6.1.12->ipykernel->jupyter) (2.9.0.post0)\n",
128
+ "Requirement already satisfied: platformdirs>=2.5 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from jupyter-core!=5.0.*,>=4.12->ipykernel->jupyter) (4.3.6)\n",
129
+ "Collecting argon2-cffi>=21.1 (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter)\n",
130
+ " Downloading argon2_cffi-23.1.0-py3-none-any.whl.metadata (5.2 kB)\n",
131
+ "Collecting jupyter-events>=0.11.0 (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter)\n",
132
+ " Downloading jupyter_events-0.11.0-py3-none-any.whl.metadata (5.8 kB)\n",
133
+ "Collecting jupyter-server-terminals>=0.4.4 (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter)\n",
134
+ " Downloading jupyter_server_terminals-0.5.3-py3-none-any.whl.metadata (5.6 kB)\n",
135
+ "Collecting overrides>=5.0 (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter)\n",
136
+ " Downloading overrides-7.7.0-py3-none-any.whl.metadata (5.8 kB)\n",
137
+ "Requirement already satisfied: prometheus-client>=0.9 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (0.20.0)\n",
138
+ "Collecting send2trash>=1.8.2 (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter)\n",
139
+ " Downloading Send2Trash-1.8.3-py3-none-any.whl.metadata (4.0 kB)\n",
140
+ "Collecting terminado>=0.8.3 (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter)\n",
141
+ " Downloading terminado-0.18.1-py3-none-any.whl.metadata (5.8 kB)\n",
142
+ "Collecting websocket-client>=1.7 (from jupyter-server<3,>=2.4.0->jupyterlab->jupyter)\n",
143
+ " Downloading websocket_client-1.8.0-py3-none-any.whl.metadata (8.0 kB)\n",
144
+ "Collecting babel>=2.10 (from jupyterlab-server<3,>=2.27.1->jupyterlab->jupyter)\n",
145
+ " Downloading babel-2.16.0-py3-none-any.whl.metadata (1.5 kB)\n",
146
+ "Collecting json5>=0.9.0 (from jupyterlab-server<3,>=2.27.1->jupyterlab->jupyter)\n",
147
+ " Downloading json5-0.10.0-py3-none-any.whl.metadata (34 kB)\n",
148
+ "Requirement already satisfied: jsonschema>=4.18.0 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from jupyterlab-server<3,>=2.27.1->jupyterlab->jupyter) (4.23.0)\n",
149
+ "Requirement already satisfied: requests>=2.31 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from jupyterlab-server<3,>=2.27.1->jupyterlab->jupyter) (2.32.3)\n",
150
+ "Requirement already satisfied: fastjsonschema>=2.15 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from nbformat>=5.7->nbconvert->jupyter) (2.21.1)\n",
151
+ "Requirement already satisfied: wcwidth in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from prompt-toolkit>=3.0.30->jupyter-console->jupyter) (0.2.13)\n",
152
+ "Requirement already satisfied: soupsieve>1.2 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from beautifulsoup4->nbconvert->jupyter) (2.6)\n",
153
+ "Collecting argon2-cffi-bindings (from argon2-cffi>=21.1->jupyter-server<3,>=2.4.0->jupyterlab->jupyter)\n",
154
+ " Downloading argon2_cffi_bindings-21.2.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)\n",
155
+ "Requirement already satisfied: parso<0.9.0,>=0.8.4 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from jedi>=0.16->ipython>=7.23.1->ipykernel->jupyter) (0.8.4)\n",
156
+ "Requirement already satisfied: attrs>=22.2.0 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from jsonschema>=4.18.0->jupyterlab-server<3,>=2.27.1->jupyterlab->jupyter) (24.2.0)\n",
157
+ "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from jsonschema>=4.18.0->jupyterlab-server<3,>=2.27.1->jupyterlab->jupyter) (2023.12.1)\n",
158
+ "Requirement already satisfied: referencing>=0.28.4 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from jsonschema>=4.18.0->jupyterlab-server<3,>=2.27.1->jupyterlab->jupyter) (0.35.1)\n",
159
+ "Requirement already satisfied: rpds-py>=0.7.1 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from jsonschema>=4.18.0->jupyterlab-server<3,>=2.27.1->jupyterlab->jupyter) (0.20.0)\n",
160
+ "Collecting python-json-logger>=2.0.4 (from jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter)\n",
161
+ " Downloading python_json_logger-3.2.1-py3-none-any.whl.metadata (4.1 kB)\n",
162
+ "Requirement already satisfied: pyyaml>=5.3 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter) (6.0.2)\n",
163
+ "Collecting rfc3339-validator (from jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter)\n",
164
+ " Downloading rfc3339_validator-0.1.4-py2.py3-none-any.whl.metadata (1.5 kB)\n",
165
+ "Collecting rfc3986-validator>=0.1.1 (from jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter)\n",
166
+ " Downloading rfc3986_validator-0.1.1-py2.py3-none-any.whl.metadata (1.7 kB)\n",
167
+ "Requirement already satisfied: ptyprocess>=0.5 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from pexpect>4.3->ipython>=7.23.1->ipykernel->jupyter) (0.7.0)\n",
168
+ "Requirement already satisfied: six>=1.5 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from python-dateutil>=2.8.2->jupyter-client>=6.1.12->ipykernel->jupyter) (1.16.0)\n",
169
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from requests>=2.31->jupyterlab-server<3,>=2.27.1->jupyterlab->jupyter) (3.3.2)\n",
170
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from requests>=2.31->jupyterlab-server<3,>=2.27.1->jupyterlab->jupyter) (2.2.3)\n",
171
+ "Requirement already satisfied: executing>=1.2.0 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from stack_data->ipython>=7.23.1->ipykernel->jupyter) (2.1.0)\n",
172
+ "Requirement already satisfied: asttokens>=2.1.0 in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from stack_data->ipython>=7.23.1->ipykernel->jupyter) (3.0.0)\n",
173
+ "Requirement already satisfied: pure-eval in /dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages (from stack_data->ipython>=7.23.1->ipykernel->jupyter) (0.2.3)\n",
174
+ "Collecting fqdn (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter)\n",
175
+ " Downloading fqdn-1.5.1-py3-none-any.whl.metadata (1.4 kB)\n",
176
+ "Collecting isoduration (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter)\n",
177
+ " Downloading isoduration-20.11.0-py3-none-any.whl.metadata (5.7 kB)\n",
178
+ "Collecting jsonpointer>1.13 (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter)\n",
179
+ " Downloading jsonpointer-3.0.0-py2.py3-none-any.whl.metadata (2.3 kB)\n",
180
+ "Collecting uri-template (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter)\n",
181
+ " Downloading uri_template-1.3.0-py3-none-any.whl.metadata (8.8 kB)\n",
182
+ "Collecting webcolors>=24.6.0 (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter)\n",
183
+ " Downloading webcolors-24.11.1-py3-none-any.whl.metadata (2.2 kB)\n",
184
+ "Collecting cffi>=1.0.1 (from argon2-cffi-bindings->argon2-cffi>=21.1->jupyter-server<3,>=2.4.0->jupyterlab->jupyter)\n",
185
+ " Downloading cffi-1.17.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.5 kB)\n",
186
+ "Collecting pycparser (from cffi>=1.0.1->argon2-cffi-bindings->argon2-cffi>=21.1->jupyter-server<3,>=2.4.0->jupyterlab->jupyter)\n",
187
+ " Downloading pycparser-2.22-py3-none-any.whl.metadata (943 bytes)\n",
188
+ "Collecting arrow>=0.15.0 (from isoduration->jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter)\n",
189
+ " Downloading arrow-1.3.0-py3-none-any.whl.metadata (7.5 kB)\n",
190
+ "Collecting types-python-dateutil>=2.8.10 (from arrow>=0.15.0->isoduration->jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.11.0->jupyter-server<3,>=2.4.0->jupyterlab->jupyter)\n",
191
+ " Downloading types_python_dateutil-2.9.0.20241206-py3-none-any.whl.metadata (2.1 kB)\n",
192
+ "Downloading jupyter-1.1.1-py2.py3-none-any.whl (2.7 kB)\n",
193
+ "Downloading ipywidgets-8.1.5-py3-none-any.whl (139 kB)\n",
194
+ "Downloading jupyter_console-6.6.3-py3-none-any.whl (24 kB)\n",
195
+ "Downloading jupyterlab-4.3.4-py3-none-any.whl (11.7 MB)\n",
196
+ "\u001b[2K \u001b[91m━━━━━━━━━━━━━━━━━\u001b[0m\u001b[90m╺\u001b[0m\u001b[90m━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.0/11.7 MB\u001b[0m \u001b[31m14.9 kB/s\u001b[0m eta \u001b[36m0:07:28\u001b[0m\n",
197
+ "\u001b[?25h\u001b[31mERROR: Exception:\n",
198
+ "Traceback (most recent call last):\n",
199
+ " File \"/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/pip/_vendor/urllib3/response.py\", line 438, in _error_catcher\n",
200
+ " yield\n",
201
+ " File \"/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/pip/_vendor/urllib3/response.py\", line 561, in read\n",
202
+ " data = self._fp_read(amt) if not fp_closed else b\"\"\n",
203
+ " File \"/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/pip/_vendor/urllib3/response.py\", line 527, in _fp_read\n",
204
+ " return self._fp.read(amt) if amt is not None else self._fp.read()\n",
205
+ " File \"/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/pip/_vendor/cachecontrol/filewrapper.py\", line 98, in read\n",
206
+ " data: bytes = self.__fp.read(amt)\n",
207
+ " File \"/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/http/client.py\", line 466, in read\n",
208
+ " s = self.fp.read(amt)\n",
209
+ " File \"/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/socket.py\", line 705, in readinto\n",
210
+ " return self._sock.recv_into(b)\n",
211
+ " File \"/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/ssl.py\", line 1307, in recv_into\n",
212
+ " return self.read(nbytes, buffer)\n",
213
+ " File \"/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/ssl.py\", line 1163, in read\n",
214
+ " return self._sslobj.read(len, buffer)\n",
215
+ "TimeoutError: The read operation timed out\n",
216
+ "\n",
217
+ "During handling of the above exception, another exception occurred:\n",
218
+ "\n",
219
+ "Traceback (most recent call last):\n",
220
+ " File \"/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/pip/_internal/cli/base_command.py\", line 105, in _run_wrapper\n",
221
+ " status = _inner_run()\n",
222
+ " File \"/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/pip/_internal/cli/base_command.py\", line 96, in _inner_run\n",
223
+ " return self.run(options, args)\n",
224
+ " File \"/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/pip/_internal/cli/req_command.py\", line 67, in wrapper\n",
225
+ " return func(self, options, args)\n",
226
+ " File \"/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/pip/_internal/commands/install.py\", line 379, in run\n",
227
+ " requirement_set = resolver.resolve(\n",
228
+ " File \"/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/pip/_internal/resolution/resolvelib/resolver.py\", line 179, in resolve\n",
229
+ " self.factory.preparer.prepare_linked_requirements_more(reqs)\n",
230
+ " File \"/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/pip/_internal/operations/prepare.py\", line 554, in prepare_linked_requirements_more\n",
231
+ " self._complete_partial_requirements(\n",
232
+ " File \"/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/pip/_internal/operations/prepare.py\", line 469, in _complete_partial_requirements\n",
233
+ " for link, (filepath, _) in batch_download:\n",
234
+ " File \"/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/pip/_internal/network/download.py\", line 184, in __call__\n",
235
+ " for chunk in chunks:\n",
236
+ " File \"/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/pip/_internal/cli/progress_bars.py\", line 55, in _rich_progress_bar\n",
237
+ " for chunk in iterable:\n",
238
+ " File \"/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/pip/_internal/network/utils.py\", line 65, in response_chunks\n",
239
+ " for chunk in response.raw.stream(\n",
240
+ " File \"/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/pip/_vendor/urllib3/response.py\", line 622, in stream\n",
241
+ " data = self.read(amt=amt, decode_content=decode_content)\n",
242
+ " File \"/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/pip/_vendor/urllib3/response.py\", line 560, in read\n",
243
+ " with self._error_catcher():\n",
244
+ " File \"/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/contextlib.py\", line 153, in __exit__\n",
245
+ " self.gen.throw(typ, value, traceback)\n",
246
+ " File \"/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/site-packages/pip/_vendor/urllib3/response.py\", line 443, in _error_catcher\n",
247
+ " raise ReadTimeoutError(self._pool, None, \"Read timed out.\")\n",
248
+ "pip._vendor.urllib3.exceptions.ReadTimeoutError: HTTPSConnectionPool(host='files.pythonhosted.org', port=443): Read timed out.\u001b[0m\u001b[31m\n",
249
+ "\u001b[0m"
250
+ ]
251
+ },
252
+ {
253
+ "name": "stderr",
254
+ "output_type": "stream",
255
+ "text": [
256
+ "--2024-12-25 08:56:46-- https://docs.vllm.ai/en/stable/getting_started/quickstart.html\n",
257
+ "Resolving docs.vllm.ai (docs.vllm.ai)... 104.21.88.245, 172.67.154.127, 2606:4700:3030::6815:58f5, ...\n",
258
+ "Connecting to docs.vllm.ai (docs.vllm.ai)|104.21.88.245|:443... connected.\n",
259
+ "HTTP request sent, awaiting response... 200 OK\n",
260
+ "Length: unspecified [text/html]\n",
261
+ "Saving to: ‘downloaded_page.html’\n",
262
+ "\n",
263
+ " 0K .......... .......... .......... .......... .......... 75.9M\n",
264
+ " 50K 754G=0.001s\n",
265
+ "\n",
266
+ "2024-12-25 08:56:47 (76.5 MB/s) - ‘downloaded_page.html’ saved [51605]\n",
267
+ "\n",
268
+ "[NbConvertApp] Converting notebook downloaded_page.html to notebook\n",
269
+ "Traceback (most recent call last):\n",
270
+ " File \"/usr/local/lib/python3.11/dist-packages/nbformat/reader.py\", line 20, in parse_json\n",
271
+ " nb_dict = json.loads(s, **kwargs)\n",
272
+ " ^^^^^^^^^^^^^^^^^^^^^^^\n",
273
+ " File \"/usr/lib/python3.11/json/__init__.py\", line 346, in loads\n",
274
+ " return _default_decoder.decode(s)\n",
275
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
276
+ " File \"/usr/lib/python3.11/json/decoder.py\", line 337, in decode\n",
277
+ " obj, end = self.raw_decode(s, idx=_w(s, 0).end())\n",
278
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
279
+ " File \"/usr/lib/python3.11/json/decoder.py\", line 355, in raw_decode\n",
280
+ " raise JSONDecodeError(\"Expecting value\", s, err.value) from None\n",
281
+ "json.decoder.JSONDecodeError: Expecting value: line 3 column 1 (char 2)\n",
282
+ "\n",
283
+ "The above exception was the direct cause of the following exception:\n",
284
+ "\n",
285
+ "Traceback (most recent call last):\n",
286
+ " File \"/usr/local/bin/jupyter-nbconvert\", line 8, in <module>\n",
287
+ " sys.exit(main())\n",
288
+ " ^^^^^^\n",
289
+ " File \"/usr/local/lib/python3.11/dist-packages/jupyter_core/application.py\", line 280, in launch_instance\n",
290
+ " super().launch_instance(argv=argv, **kwargs)\n",
291
+ " File \"/usr/local/lib/python3.11/dist-packages/traitlets/config/application.py\", line 1053, in launch_instance\n",
292
+ " app.start()\n",
293
+ " File \"/usr/local/lib/python3.11/dist-packages/nbconvert/nbconvertapp.py\", line 412, in start\n",
294
+ " self.convert_notebooks()\n",
295
+ " File \"/usr/local/lib/python3.11/dist-packages/nbconvert/nbconvertapp.py\", line 590, in convert_notebooks\n",
296
+ " self.convert_single_notebook(notebook_filename)\n",
297
+ " File \"/usr/local/lib/python3.11/dist-packages/nbconvert/nbconvertapp.py\", line 556, in convert_single_notebook\n",
298
+ " output, resources = self.export_single_notebook(\n",
299
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
300
+ " File \"/usr/local/lib/python3.11/dist-packages/nbconvert/nbconvertapp.py\", line 479, in export_single_notebook\n",
301
+ " output, resources = self.exporter.from_filename(\n",
302
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
303
+ " File \"/usr/local/lib/python3.11/dist-packages/nbconvert/exporters/exporter.py\", line 201, in from_filename\n",
304
+ " return self.from_file(f, resources=resources, **kw)\n",
305
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
306
+ " File \"/usr/local/lib/python3.11/dist-packages/nbconvert/exporters/exporter.py\", line 221, in from_file\n",
307
+ " nbformat.read(file_stream, as_version=4), resources=resources, **kw\n",
308
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
309
+ " File \"/usr/local/lib/python3.11/dist-packages/nbformat/__init__.py\", line 171, in read\n",
310
+ " return reads(buf, as_version, capture_validation_error, **kwargs)\n",
311
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
312
+ " File \"/usr/local/lib/python3.11/dist-packages/nbformat/__init__.py\", line 89, in reads\n",
313
+ " nb = reader.reads(s, **kwargs)\n",
314
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^\n",
315
+ " File \"/usr/local/lib/python3.11/dist-packages/nbformat/reader.py\", line 76, in reads\n",
316
+ " nb_dict = parse_json(s, **kwargs)\n",
317
+ " ^^^^^^^^^^^^^^^^^^^^^^^\n",
318
+ " File \"/usr/local/lib/python3.11/dist-packages/nbformat/reader.py\", line 26, in parse_json\n",
319
+ " raise NotJSONError(message) from e\n",
320
+ "nbformat.reader.NotJSONError: Notebook does not appear to be JSON: '\\n\\n<!DOCTYPE html>\\n\\n\\n<html lang=\"en...\n"
321
+ ]
322
+ },
323
+ {
324
+ "ename": "CalledProcessError",
325
+ "evalue": "Command '['jupyter', 'nbconvert', '--to', 'notebook', '--output', 'quickstart_notebook.ipynb', 'downloaded_page.html']' returned non-zero exit status 1.",
326
+ "output_type": "error",
327
+ "traceback": [
328
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
329
+ "\u001b[0;31mCalledProcessError\u001b[0m Traceback (most recent call last)",
330
+ "Cell \u001b[0;32mIn[5], line 16\u001b[0m\n\u001b[1;32m 14\u001b[0m url \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhttps://docs.vllm.ai/en/stable/getting_started/quickstart.html\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 15\u001b[0m output_notebook \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquickstart_notebook.ipynb\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m---> 16\u001b[0m \u001b[43mdownload_html_and_convert\u001b[49m\u001b[43m(\u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moutput_notebook\u001b[49m\u001b[43m)\u001b[49m\n",
331
+ "Cell \u001b[0;32mIn[5], line 10\u001b[0m, in \u001b[0;36mdownload_html_and_convert\u001b[0;34m(url, output_notebook)\u001b[0m\n\u001b[1;32m 7\u001b[0m subprocess\u001b[38;5;241m.\u001b[39mrun([\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mwget\u001b[39m\u001b[38;5;124m'\u001b[39m, url, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m-O\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdownloaded_page.html\u001b[39m\u001b[38;5;124m'\u001b[39m], check\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 9\u001b[0m \u001b[38;5;66;03m# Convert HTML to notebook\u001b[39;00m\n\u001b[0;32m---> 10\u001b[0m \u001b[43msubprocess\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mjupyter\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mnbconvert\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m--to\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mnotebook\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m--output\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moutput_notebook\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mdownloaded_page.html\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcheck\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n",
332
+ "File \u001b[0;32m/dscilab_dungvo/workspace/bin/envs/vllm/lib/python3.10/subprocess.py:526\u001b[0m, in \u001b[0;36mrun\u001b[0;34m(input, capture_output, timeout, check, *popenargs, **kwargs)\u001b[0m\n\u001b[1;32m 524\u001b[0m retcode \u001b[38;5;241m=\u001b[39m process\u001b[38;5;241m.\u001b[39mpoll()\n\u001b[1;32m 525\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m check \u001b[38;5;129;01mand\u001b[39;00m retcode:\n\u001b[0;32m--> 526\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m CalledProcessError(retcode, process\u001b[38;5;241m.\u001b[39margs,\n\u001b[1;32m 527\u001b[0m output\u001b[38;5;241m=\u001b[39mstdout, stderr\u001b[38;5;241m=\u001b[39mstderr)\n\u001b[1;32m 528\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m CompletedProcess(process\u001b[38;5;241m.\u001b[39margs, retcode, stdout, stderr)\n",
333
+ "\u001b[0;31mCalledProcessError\u001b[0m: Command '['jupyter', 'nbconvert', '--to', 'notebook', '--output', 'quickstart_notebook.ipynb', 'downloaded_page.html']' returned non-zero exit status 1."
334
+ ]
335
+ }
336
+ ],
337
+ "source": [
338
+ "# https://docs.vllm.ai/en/stable/getting_started/quickstart.rst\n",
339
+ "# # !pip install jupyter\n",
340
+ "# import subprocess\n",
341
+ "\n",
342
+ "# def download_html_and_convert(url, output_notebook):\n",
343
+ "# # !pip install jupyter\n",
344
+ "# subprocess.run(['pip', 'install', 'jupyter'], check=True)\n",
345
+ "\n",
346
+ "# # Download HTML\n",
347
+ "# subprocess.run(['wget', url, '-O', 'downloaded_page.html'], check=True)\n",
348
+ " \n",
349
+ "# # Convert HTML to notebook\n",
350
+ "# subprocess.run(['jupyter', 'nbconvert', '--to', 'notebook', '--output', output_notebook, 'downloaded_page.html'], check=True)\n",
351
+ "\n",
352
+ "# # Example usage\n",
353
+ "# if __name__ == \"__main__\":\n",
354
+ "# url = \"https://docs.vllm.ai/en/stable/getting_started/quickstart.html\"\n",
355
+ "# output_notebook = \"quickstart_notebook.ipynb\"\n",
356
+ "# download_html_and_convert(url, output_notebook)"
357
+ ]
358
+ },
359
+ {
360
+ "cell_type": "code",
361
+ "execution_count": null,
362
+ "metadata": {},
363
+ "outputs": [],
364
+ "source": []
365
+ },
366
+ {
367
+ "cell_type": "code",
368
+ "execution_count": null,
369
+ "metadata": {},
370
+ "outputs": [],
371
+ "source": [
372
+ "prompts = [\n",
373
+ " \"Hello, my name is\",\n",
374
+ " \"The president of the United States is\",\n",
375
+ " \"The capital of France is\",\n",
376
+ " \"The future of AI is\",\n",
377
+ "]\n",
378
+ "sampling_params = SamplingParams(temperature=0.8, top_p=0.95)"
379
+ ]
380
+ },
381
+ {
382
+ "cell_type": "code",
383
+ "execution_count": null,
384
+ "metadata": {},
385
+ "outputs": [
386
+ {
387
+ "name": "stdout",
388
+ "output_type": "stream",
389
+ "text": [
390
+ "INFO 12-25 08:56:54 __init__.py:46] No plugins found.\n",
391
+ "INFO 12-25 08:57:09 config.py:403] This model supports multiple tasks: {'embedding', 'generate'}. Defaulting to 'generate'.\n",
392
+ "INFO 12-25 08:57:09 llm_engine.py:249] Initializing an LLM engine (v0.6.4.post2.dev227+gd2bd88b1) with config: model='facebook/opt-125m', speculative_config=None, tokenizer='facebook/opt-125m', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=2048, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='xgrammar'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=0, served_model_name=facebook/opt-125m, num_scheduler_steps=1, chunked_prefill_enabled=False multi_step_stream_outputs=True, enable_prefix_caching=False, use_async_output_proc=True, use_cached_outputs=False, mm_processor_kwargs=None, pooler_config=None,compilation_config=CompilationConfig(level=0, backend='', custom_ops=[], splitting_ops=['vllm.unified_attention', 'vllm.unified_attention_with_output'], use_inductor=True, inductor_specialize_for_cudagraph_no_more_than=None, inductor_compile_sizes=[], inductor_compile_config={}, inductor_passes={}, use_cudagraph=False, cudagraph_num_of_warmups=0, cudagraph_capture_sizes=None, cudagraph_copy_inputs=False, pass_config=PassConfig(dump_graph_stages=[], dump_graph_dir=PosixPath('.'), enable_fusion=True, enable_reshape=True), compile_sizes=[], capture_sizes=[256, 248, 240, 232, 224, 216, 208, 200, 192, 184, 176, 168, 160, 152, 144, 136, 128, 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 4, 2, 1], enabled_custom_ops=Counter(), disabled_custom_ops=Counter(), static_forward_context={})\n",
393
+ "INFO 12-25 08:57:17 selector.py:120] Using Flash Attention backend.\n",
394
+ "INFO 12-25 08:57:18 model_runner.py:1089] Starting to load model facebook/opt-125m...\n",
395
+ "INFO 12-25 08:57:18 weight_utils.py:243] Using model weights format ['*.bin']\n"
396
+ ]
397
+ }
398
+ ],
399
+ "source": [
400
+ "llm = LLM(model=\"facebook/opt-125m\")"
401
+ ]
402
+ },
403
+ {
404
+ "cell_type": "code",
405
+ "execution_count": null,
406
+ "metadata": {},
407
+ "outputs": [],
408
+ "source": []
409
+ }
410
+ ],
411
+ "metadata": {
412
+ "kernelspec": {
413
+ "display_name": "vllm",
414
+ "language": "python",
415
+ "name": "python3"
416
+ },
417
+ "language_info": {
418
+ "codemirror_mode": {
419
+ "name": "ipython",
420
+ "version": 3
421
+ },
422
+ "file_extension": ".py",
423
+ "mimetype": "text/x-python",
424
+ "name": "python",
425
+ "nbconvert_exporter": "python",
426
+ "pygments_lexer": "ipython3",
427
+ "version": "3.10.14"
428
+ }
429
+ },
430
+ "nbformat": 4,
431
+ "nbformat_minor": 2
432
+ }
mlruns/0/meta.yaml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ artifact_location: mlflow-artifacts:/0
2
+ creation_time: 1735239171092
3
+ experiment_id: '0'
4
+ last_update_time: 1735239171092
5
+ lifecycle_stage: active
6
+ name: Default
recognize-anything/.ipynb_checkpoints/README-checkpoint.md ADDED
@@ -0,0 +1,601 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # <font size=8> :label: Recognize Anything Model </font>
2
+
3
+ This project aims to develop a series of open-source and strong fundamental image recognition models.
4
+
5
+ [![Training Dataset](https://img.shields.io/badge/📦-Training%20Dataset-orange.svg)](#open_book-training-datasets)
6
+ [![Tag List](https://img.shields.io/badge/🏷️-4585%20Tags-green.svg)](ram/data/ram_tag_list.txt)
7
+ [![Web Demo](https://img.shields.io/badge/🤗-HuggingFace%20Space-cyan.svg)](https://huggingface.co/spaces/xinyu1205/Recognize_Anything-Tag2Text)
8
+ [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mhd-medfa/recognize-anything/blob/main/recognize_anything_demo.ipynb)
9
+ [![Open in Bohrium](https://cdn.dp.tech/bohrium/web/static/images/open-in-bohrium.svg)](https://bohrium.dp.tech/notebooks/63116114759)
10
+
11
+
12
+ - **Recognize Anything Plus Model (RAM++)** [[Paper](https://arxiv.org/abs/2310.15200)] <br>
13
+
14
+ RAM++ is the next generation of RAM, which can **recognize any category with high accuracy**, including **both predefined common categories and diverse open-set categories**.
15
+
16
+ - **Recognize Anything Model (RAM)** [[Paper](https://arxiv.org/abs/2306.03514)][[Demo](https://huggingface.co/spaces/xinyu1205/recognize-anything)] <br>
17
+
18
+ RAM is an image tagging model, which can **recognize any common category with high accuracy**.
19
+
20
+ RAM is accepted at **CVPR 2024 Multimodal Foundation Models Workshop**.
21
+
22
+ - **Tag2Text (ICLR 2024)** [[Paper](https://arxiv.org/abs/2303.05657)] [[Demo](https://huggingface.co/spaces/xinyu1205/recognize-anything)]<br>
23
+
24
+ Tag2Text is a vision-language model guided by tagging, which can **support tagging and comprehensive captioning simultaneously**.
25
+
26
+ Tag2Text is accepted at **ICLR 2024!** See you in Vienna!
27
+
28
+
29
+
30
+
31
+ ## :bulb: Highlight
32
+
33
+ ### **Superior Image Recognition Capability**
34
+
35
+ RAM++ outperforms existing SOTA image fundamental recognition models on common tag categories, uncommon tag categories, and human-object interaction phrases.
36
+
37
+ <p align="center">
38
+ <table class="tg">
39
+ <tr>
40
+ <td class="tg-c3ow"><img src="images/ram_plus_compare.jpg" align="center" width="700" ></td>
41
+ </tr>
42
+ </table>
43
+ <p align="center">Comparison of zero-shot image recognition performance.</p>
44
+ </p>
45
+
46
+
47
+ ### **Strong Visual Semantic Analysis**
48
+
49
+
50
+ We have combined Tag2Text and RAM with localization models (Grounding-DINO and SAM) and developed a strong visual semantic analysis pipeline in the [Grounded-SAM](https://github.com/IDEA-Research/Grounded-Segment-Anything) project.
51
+
52
+ ![](./images/ram_grounded_sam.jpg)
53
+
54
+
55
+ ## :sunrise: Model Zoo
56
+
57
+ <details>
58
+ <summary><font size="3" style="font-weight:bold;">
59
+ RAM++
60
+ </font></summary>
61
+
62
+ RAM++ is the next generation of RAM, which can recognize any category with high accuracy, including both predefined common categories and diverse open-set categories.
63
+
64
+
65
+ - **For Common Predefined Categoies.** RAM++ exhibits exceptional image tagging capabilities with powerful zero-shot generalization, which maintains the same capabilities as RAM.
66
+ <!-- - RAM++ showcases impressive zero-shot performance, significantly outperforming CLIP and BLIP.
67
+ - RAM++ even surpasses the fully supervised manners (ML-Decoder).
68
+ - RAM++ exhibits competitive performance with the Google tagging API. -->
69
+ - **For Diverse Open-set Categoires.** RAM++ achieves notably enhancements beyond CLIP and RAM.
70
+ <!-- - RAM++ integrate the image-tags-text triplets within a unified alignment framework.
71
+ - RAM++ pioneer the intergation of LLM's knowledge into image tagging training. -->
72
+
73
+
74
+ <p align="center">
75
+ <table class="tg">
76
+ <tr>
77
+ <td class="tg-c3ow"><img src="images/ram_plus_experiment.png" align="center" width="800" ></td>
78
+ </tr>
79
+ </table>
80
+ <p align="center">(Green color means fully supervised learning and others means zero-shot performance.)</p>
81
+ </p>
82
+
83
+
84
+ <p align="center">
85
+ <table class="tg">
86
+ <tr>
87
+ <td class="tg-c3ow"><img src="images/ram_plus_visualization.jpg" align="center" width="800" ></td>
88
+ </tr>
89
+ </table>
90
+ <p align="center">RAM++ demonstrate a significant improvement in open-set category recognition.</p>
91
+ </p>
92
+
93
+
94
+ </details>
95
+
96
+
97
+
98
+ <details>
99
+ <summary><font size="3" style="font-weight:bold;">
100
+ RAM
101
+ </font></summary>
102
+
103
+
104
+ RAM is a strong image tagging model, which can recognize any common category with high accuracy.
105
+ - **Strong and general.** RAM exhibits exceptional image tagging capabilities with powerful zero-shot generalization;
106
+ - RAM showcases impressive zero-shot performance, significantly outperforming CLIP and BLIP.
107
+ - RAM even surpasses the fully supervised manners (ML-Decoder).
108
+ - RAM exhibits competitive performance with the Google tagging API.
109
+ - **Reproducible and affordable.** RAM requires Low reproduction cost with open-source and annotation-free dataset;
110
+ - **Flexible and versatile.** RAM offers remarkable flexibility, catering to various application scenarios.
111
+
112
+
113
+ <p align="center">
114
+ <table class="tg">
115
+ <tr>
116
+ <td class="tg-c3ow"><img src="images/experiment_comparison.png" align="center" width="800" ></td>
117
+ </tr>
118
+ </table>
119
+ <p align="center">(Green color means fully supervised learning and Blue color means zero-shot performance.)</p>
120
+ </p>
121
+
122
+ <p align="center">
123
+ <table class="tg">
124
+ <tr>
125
+ <td class="tg-c3ow"><img src="images/tagging_results.jpg" align="center" width="800" ></td>
126
+ </tr>
127
+ </table>
128
+ </p>
129
+
130
+ RAM significantly improves the tagging ability based on the Tag2text framework.
131
+ - **Accuracy.** RAM utilizes a **data engine** to **generate** additional annotations and **clean** incorrect ones, **higher accuracy** compared to Tag2Text.
132
+ - **Scope.** RAM upgrades the number of fixed tags from 3,400+ to **[6,400+](./ram/data/ram_tag_list.txt)** (synonymous reduction to 4,500+ different semantic tags), covering **more valuable categories**.
133
+ Moreover, RAM is equipped with **open-set capability**, feasible to recognize tags not seen during training
134
+
135
+
136
+ </details>
137
+
138
+
139
+
140
+ <details>
141
+ <summary><font size="3" style="font-weight:bold;">
142
+ Tag2text
143
+ </font></summary>
144
+
145
+
146
+ Tag2Text is an efficient and controllable vision-language model with tagging guidance.
147
+ - **Tagging.** Tag2Text recognizes **[3,400+](./ram/data/tag2text_ori_tag_list.txt)** commonly human-used categories without manual annotations.
148
+ - **Captioning.** Tag2Text integrates **tags information** into text generation as the **guiding elements**, resulting in **more controllable and comprehensive descriptions**.
149
+ - **Retrieval.** Tag2Text provides **tags** as **additional visible alignment indicators** for image-text retrieval.
150
+
151
+
152
+ <p align="center">
153
+ <table class="tg">
154
+ <tr>
155
+ <td class="tg-c3ow"><img src="images/tag2text_visualization.png" align="center" width="800" ></td>
156
+ </tr>
157
+ </table>
158
+ <p align="center">Tag2Text generate more comprehensive captions with tagging guidance.</p>
159
+ </p>
160
+
161
+ <p align="center">
162
+ <table class="tg">
163
+ <tr>
164
+ <td class="tg-c3ow"><img src="images/tag2text_retrieval_visualization.png" align="center" width="800" ></td>
165
+ </tr>
166
+ </table>
167
+ <p align="center">Tag2Text provides tags as additional visible alignment indicators.</p>
168
+ </p>
169
+
170
+
171
+ </details>
172
+
173
+ <!-- ## :sparkles: Highlight Projects with other Models
174
+ - [Tag2Text/RAM with Grounded-SAM](https://github.com/IDEA-Research/Grounded-Segment-Anything) is trong and general pipeline for visual semantic analysis, which can automatically **recognize**, detect, and segment for an image!
175
+ - [Ask-Anything](https://github.com/OpenGVLab/Ask-Anything) is a multifunctional video question answering tool. Tag2Text provides powerful tagging and captioning capabilities as a fundamental component.
176
+ - [Prompt-can-anything](https://github.com/positive666/Prompt-Can-Anything) is a gradio web library that integrates SOTA multimodal large models, including Tag2text as the core model for graphic understanding -->
177
+
178
+
179
+ <!--
180
+ ## :fire: News
181
+
182
+ - **`2023/10/30`**: We release the [Recognize Anything Model Plus Model(RAM++)](), checkpoints and inference code!
183
+ - **`2023/06/08`**: We release the [Recognize Anything Model (RAM) Tag2Text web demo 🤗](https://huggingface.co/spaces/xinyu1205/Recognize_Anything-Tag2Text), checkpoints and inference code!
184
+ - **`2023/06/07`**: We release the [Recognize Anything Model (RAM)](https://recognize-anything.github.io/), a strong image tagging model!
185
+ - **`2023/06/05`**: Tag2Text is combined with [Prompt-can-anything](https://github.com/OpenGVLab/Ask-Anything).
186
+ - **`2023/05/20`**: Tag2Text is combined with [VideoChat](https://github.com/OpenGVLab/Ask-Anything).
187
+ - **`2023/04/20`**: We marry Tag2Text with with [Grounded-SAM](https://github.com/IDEA-Research/Grounded-Segment-Anything).
188
+ - **`2023/04/10`**: Code and checkpoint is available Now!
189
+ - **`2023/03/14`**: [Tag2Text web demo 🤗](https://huggingface.co/spaces/xinyu1205/Recognize_Anything-Tag2Text) is available on Hugging Face Space! -->
190
+
191
+
192
+
193
+
194
+ <!--
195
+ ## :writing_hand: TODO
196
+
197
+ - [x] Release checkpoints.
198
+ - [x] Release inference code.
199
+ - [x] Release demo and checkpoints.
200
+ - [x] Release training codes.
201
+ - [x] Release training datasets.
202
+ - [ ] Release full training codes and scripts. -->
203
+
204
+
205
+ ## :open_book: Training Datasets
206
+
207
+ ### **Image Texts and Tags**
208
+
209
+ These annotation files come from the [Tag2Text](https://arxiv.org/abs/2303.05657) and [RAM](https://recognize-anything.github.io/). Tag2Text automatically extracts image tags from image-text pairs. RAM further augments both tags and texts via an automatic data engine.
210
+
211
+
212
+ | DataSet | Size | Images | Texts | Tags |
213
+ |----------|---------|--------|-------|-------|
214
+ | [COCO](https://huggingface.co/datasets/xinyu1205/recognize-anything-dataset/blob/main/coco_train_rmcocodev_ram.json) | 168 MB | 113K | 680K | 3.2M |
215
+ | [VG](https://huggingface.co/datasets/xinyu1205/recognize-anything-dataset/blob/main/vg_ram.json) | 55 MB | 100K | 923K | 2.7M |
216
+ | [SBU](https://huggingface.co/datasets/xinyu1205/recognize-anything-dataset/blob/main/sbu_ram.json) | 234 MB | 849K | 1.7M | 7.6M |
217
+ | [CC3M](https://huggingface.co/datasets/xinyu1205/recognize-anything-dataset/blob/main/cc3m_train_ram.json) | 766 MB | 2.8M | 5.6M | 28.2M |
218
+ | [CC3M-val](https://huggingface.co/datasets/xinyu1205/recognize-anything-dataset/blob/main/cc3m_val_ram.json) | 3.5 MB | 12K | 26K | 132K |
219
+
220
+ CC12M to be released in the next update.
221
+
222
+ ### **LLM Tag Descriptions**
223
+
224
+ These tag descriptions files come from the [RAM++](https://arxiv.org/abs/2310.15200) by calling GPT api. You can also customize any tag categories by [generate_tag_des_llm.py](generate_tag_des_llm.py).
225
+
226
+ | Tag Descriptions | Tag List |
227
+ |---------------------|----------|
228
+ | [RAM Tag List](https://huggingface.co/datasets/xinyu1205/recognize-anything-plus-model-tag-descriptions/blob/main/ram_tag_list_4585_llm_tag_descriptions.json) | [4,585](ram/data/ram_tag_list.txt) |
229
+ | [OpenImages Uncommon](./datasets/openimages_rare_200/openimages_rare_200_llm_tag_descriptions.json) | [200](datasets/openimages_rare_200/openimages_rare_200_ram_taglist.txt) |
230
+
231
+ ## :toolbox: Checkpoints
232
+ Note : you need to create 'pretrained' folder and download these checkpoints into this folder.
233
+ <!-- insert a table -->
234
+ <table>
235
+ <thead>
236
+ <tr style="text-align: right;">
237
+ <th></th>
238
+ <th>Name</th>
239
+ <th>Backbone</th>
240
+ <th>Data</th>
241
+ <th>Illustration</th>
242
+ <th>Checkpoint</th>
243
+ </tr>
244
+ </thead>
245
+ <tbody>
246
+ <tr>
247
+ <th>1</th>
248
+ <td>RAM++ (14M)</td>
249
+ <td>Swin-Large</td>
250
+ <td>COCO, VG, SBU, CC3M, CC3M-val, CC12M</td>
251
+ <td>Provide strong image tagging ability for any category.</td>
252
+ <td><a href="https://huggingface.co/xinyu1205/recognize-anything-plus-model/blob/main/ram_plus_swin_large_14m.pth">Download link</a></td>
253
+ </tr>
254
+ <tr>
255
+ <th>2</th>
256
+ <td>RAM (14M)</td>
257
+ <td>Swin-Large</td>
258
+ <td>COCO, VG, SBU, CC3M, CC3M-val, CC12M</td>
259
+ <td>Provide strong image tagging ability for common category.</td>
260
+ <td><a href="https://huggingface.co/spaces/xinyu1205/Recognize_Anything-Tag2Text/blob/main/ram_swin_large_14m.pth">Download link</a></td>
261
+ </tr>
262
+ <tr>
263
+ <th>3</th>
264
+ <td>Tag2Text (14M)</td>
265
+ <td>Swin-Base</td>
266
+ <td>COCO, VG, SBU, CC3M, CC3M-val, CC12M</td>
267
+ <td>Support comprehensive captioning and tagging.</td>
268
+ <td><a href="https://huggingface.co/spaces/xinyu1205/Recognize_Anything-Tag2Text/blob/main/tag2text_swin_14m.pth">Download link</a></td>
269
+ </tr>
270
+ </tbody>
271
+ </table>
272
+
273
+
274
+ ## :running: Model Inference
275
+
276
+ ### **Setting Up** ###
277
+
278
+ 1. Create and activate a Conda environment:
279
+
280
+ ```bash
281
+ conda create -n recognize-anything python=3.8 -y
282
+ conda activate recognize-anything
283
+ ```
284
+
285
+ 2. Install `recognize-anything` as a package:
286
+
287
+ ```bash
288
+ pip install git+https://github.com/xinyu1205/recognize-anything.git
289
+ ```
290
+
291
+ 3. Or, for development, you may build from source:
292
+
293
+ ```bash
294
+ git clone https://github.com/xinyu1205/recognize-anything.git
295
+ cd recognize-anything
296
+ pip install -e .
297
+ ```
298
+
299
+ Then the RAM++, RAM, and Tag2Text models can be imported in other projects:
300
+
301
+ ```python
302
+ from ram.models import ram_plus, ram, tag2text
303
+ ```
304
+
305
+ ### **RAM++ Inference** ###
306
+
307
+ Get the English and Chinese outputs of the images:
308
+
309
+ ```bash
310
+ python inference_ram_plus.py --image images/demo/demo1.jpg --pretrained pretrained/ram_plus_swin_large_14m.pth
311
+ ```
312
+
313
+
314
+ The output will look like the following:
315
+
316
+ ```
317
+ Image Tags: armchair | blanket | lamp | carpet | couch | dog | gray | green | hassock | home | lay | living room | picture frame | pillow | plant | room | wall lamp | sit | wood floor
318
+ 图像标签: 扶手椅 | 毯子/覆盖层 | 灯 | 地毯 | 沙发 | 狗 | 灰色 | 绿色 | 坐垫/搁脚凳/草丛 | 家/住宅 | 躺 | 客厅 | 相框 | 枕头 | 植物 | 房间 | 壁灯 | 坐/放置/坐落 | 木地板
319
+ ```
320
+
321
+ ### **RAM++ Inference on Unseen Categories (Open-Set)** ##
322
+
323
+ 1. Get the [OpenImages-Uncommon categories](./datasets/openimages_rare_200/openimages_rare_200_ram_taglist.txt) of the image:
324
+
325
+ We have released the LLM tag descriptions of OpenImages-Uncommon categories in [openimages_rare_200_llm_tag_descriptions](./datasets/openimages_rare_200/).
326
+
327
+ <pre/>
328
+ python inference_ram_plus_openset.py --image images/openset_example.jpg \
329
+ --pretrained pretrained/ram_plus_swin_large_14m.pth \
330
+ --llm_tag_des datasets/openimages_rare_200/openimages_rare_200_llm_tag_descriptions.json
331
+ </pre>
332
+
333
+ The output will look like the following:
334
+ ```
335
+ Image Tags: Close-up | Compact car | Go-kart | Horse racing | Sport utility vehicle | Touring car
336
+ ```
337
+
338
+ 2. You can also customize any tag categories for recognition through tag descriptions:
339
+
340
+ Modify [categories](./generate_tag_des_llm.py#L56), and call GPT api to generate corresponding tag descriptions:
341
+
342
+ <pre/>
343
+ python generate_tag_des_llm.py \
344
+ --openai_api_key 'your openai api key' \
345
+ --output_file_path datasets/openimages_rare_200/openimages_rare_200_llm_tag_descriptions.json
346
+ </pre>
347
+
348
+ <details>
349
+ <summary><font size="4" style="font-weight:bold;">
350
+ RAM Inference
351
+ </font></summary>
352
+
353
+ Get the English and Chinese outputs of the images:
354
+
355
+ <pre/>
356
+ python inference_ram.py --image images/demo/demo1.jpg \
357
+ --pretrained pretrained/ram_swin_large_14m.pth
358
+ </pre>
359
+
360
+ The output will look like the following:
361
+
362
+ ```
363
+ Image Tags: armchair | blanket | lamp | carpet | couch | dog | floor | furniture | gray | green | living room | picture frame | pillow | plant | room | sit | stool | wood floor
364
+ 图像标签: 扶手椅 | 毯子/覆盖层 | 灯 | 地毯 | 沙发 | 狗 | 地板/地面 | 家具 | 灰色 | 绿色 | 客厅 | 相框 | 枕头 | 植物 | 房间 | 坐/放置/坐落 | 凳子 | 木地板
365
+ ```
366
+
367
+ </details>
368
+
369
+
370
+ <details>
371
+ <summary><font size="4" style="font-weight:bold;">
372
+ RAM Inference on Unseen Categories (Open-Set)
373
+ </font></summary>
374
+
375
+
376
+ Firstly, custom recognition categories in [build_openset_label_embedding](./ram/utils/openset_utils.py), then get the tags of the images:
377
+
378
+ <pre/>
379
+ python inference_ram_openset.py --image images/openset_example.jpg \
380
+ --pretrained pretrained/ram_swin_large_14m.pth
381
+ </pre>
382
+
383
+ The output will look like the following:
384
+ ```
385
+ Image Tags: Black-and-white | Go-kart
386
+ ```
387
+
388
+
389
+ </details>
390
+
391
+ <details>
392
+ <summary><font size="4" style="font-weight:bold;">
393
+ Tag2Text Inference
394
+ </font></summary>
395
+
396
+
397
+ Get the tagging and captioning results:
398
+ <pre/>
399
+ python inference_tag2text.py --image images/demo/demo1.jpg \
400
+ --pretrained pretrained/tag2text_swin_14m.pth
401
+ </pre>
402
+ Or get the tagging and sepcifed captioning results (optional):
403
+ <pre/>python inference_tag2text.py --image images/demo/demo1.jpg \
404
+ --pretrained pretrained/tag2text_swin_14m.pth \
405
+ --specified-tags "cloud,sky"</pre>
406
+
407
+ </details>
408
+
409
+ ### **Batch Inference and Evaluation** ##
410
+ We release two datasets `OpenImages-common` (214 common tag classes) and `OpenImages-rare` (200 uncommon tag classes). Copy or sym-link test images of [OpenImages v6](https://storage.googleapis.com/openimages/web/download_v6.html) to `datasets/openimages_common_214/imgs/` and `datasets/openimages_rare_200/imgs`.
411
+
412
+ To evaluate RAM++ on `OpenImages-common`:
413
+
414
+ ```bash
415
+ python batch_inference.py \
416
+ --model-type ram_plus \
417
+ --checkpoint pretrained/ram_plus_swin_large_14m.pth \
418
+ --dataset openimages_common_214 \
419
+ --output-dir outputs/ram_plus
420
+ ```
421
+
422
+ To evaluate RAM++ open-set capability on `OpenImages-rare`:
423
+
424
+ ```bash
425
+ python batch_inference.py \
426
+ --model-type ram_plus \
427
+ -- pretrained/ram_plus_swin_large_14m.pth \
428
+ --open-set \
429
+ --dataset openimages_rare_200 \
430
+ --output-dir outputs/ram_plus_openset
431
+ ```
432
+
433
+ To evaluate RAM on `OpenImages-common`:
434
+
435
+ ```bash
436
+ python batch_inference.py \
437
+ --model-type ram \
438
+ -- pretrained/ram_swin_large_14m.pth \
439
+ --dataset openimages_common_214 \
440
+ --output-dir outputs/ram
441
+ ```
442
+
443
+ To evaluate RAM open-set capability on `OpenImages-rare`:
444
+
445
+ ```bash
446
+ python batch_inference.py \
447
+ --model-type ram \
448
+ -- pretrained/ram_swin_large_14m.pth \
449
+ --open-set \
450
+ --dataset openimages_rare_200 \
451
+ --output-dir outputs/ram_openset
452
+ ```
453
+
454
+ To evaluate Tag2Text on `OpenImages-common`:
455
+
456
+ ```bash
457
+ python batch_inference.py \
458
+ --model-type tag2text \
459
+ -- pretrained/tag2text_swin_14m.pth \
460
+ --dataset openimages_common_214 \
461
+ --output-dir outputs/tag2text
462
+ ```
463
+
464
+ Please refer to `batch_inference.py` for more options. To get P/R in table 3 of RAM paper, pass `--threshold=0.86` for RAM and `--threshold=0.68` for Tag2Text.
465
+
466
+ To batch inference custom images, you can set up you own datasets following the given two datasets.
467
+
468
+
469
+ ## :golfing: Model Training/Finetuning
470
+
471
+ ### **RAM++** ##
472
+
473
+ 1. Download [RAM training datasets](#open_book-training-datasets) where each json file contains a list. Each item in the list is a dictonary with three key-value pairs: {'image_path': path_of_image, 'caption': text_of_image, 'union_label_id': image tags for tagging which including parsed tags and pseudo tags }.
474
+
475
+ 2. In ram/configs/pretrain.yaml, set 'train_file' as the paths for the json files.
476
+
477
+ 3. Prepare [pretained Swin-Transformer](https://github.com/microsoft/Swin-Transformer), and set 'ckpt' in ram/configs/swin.
478
+
479
+ 4. Download RAM++ frozen tag embedding file "[ram_plus_tag_embedding_class_4585_des_51.pth](https://huggingface.co/xinyu1205/recognize-anything-plus-model/blob/main/ram_plus_tag_embedding_class_4585_des_51.pth)", and set file in "ram/data/frozen_tag_embedding/ram_plus_tag_embedding_class_4585_des_51.pth"
480
+
481
+ 5. Pre-train the model using 8 A100 GPUs:
482
+
483
+ ```bash
484
+ python -m torch.distributed.run --nproc_per_node=8 pretrain.py \
485
+ --model-type ram_plus \
486
+ --config ram/configs/pretrain.yaml \
487
+ --output-dir outputs/ram_plus
488
+ ```
489
+
490
+ 6. Fine-tune the pre-trained checkpoint using 8 A100 GPUs:
491
+
492
+ ```bash
493
+ python -m torch.distributed.run --nproc_per_node=8 finetune.py \
494
+ --model-type ram_plus \
495
+ --config ram/configs/finetune.yaml \
496
+ --checkpoint outputs/ram_plus/checkpoint_04.pth \
497
+ --output-dir outputs/ram_plus_ft
498
+ ```
499
+
500
+
501
+ <details>
502
+ <summary><font size="4" style="font-weight:bold;">
503
+ RAM
504
+ </font></summary>
505
+
506
+ 1. Download [RAM training datasets](#open_book-training-datasets) where each json file contains a list. Each item in the list is a dictonary with four key-value pairs: {'image_path': path_of_image, 'caption': text_of_image, 'union_label_id': image tags for tagging which including parsed tags and pseudo tags, 'parse_label_id': image tags parsed from caption }.
507
+
508
+ 2. In ram/configs/pretrain.yaml, set 'train_file' as the paths for the json files.
509
+
510
+ 3. Prepare [pretained Swin-Transformer](https://github.com/microsoft/Swin-Transformer), and set 'ckpt' in ram/configs/swin.
511
+
512
+ 4. Download RAM frozen tag embedding file "[ram_tag_embedding_class_4585.pth](https://huggingface.co/xinyu1205/recognize_anything_model/blob/main/ram_tag_embedding_class_4585.pth)", and set file in "ram/data/frozen_tag_embedding/ram_tag_embedding_class_4585.pth"
513
+
514
+ 5. Pre-train the model using 8 A100 GPUs:
515
+
516
+ ```bash
517
+ python -m torch.distributed.run --nproc_per_node=8 pretrain.py \
518
+ --model-type ram \
519
+ --config ram/configs/pretrain.yaml \
520
+ --output-dir outputs/ram
521
+ ```
522
+
523
+ 6. Fine-tune the pre-trained checkpoint using 8 A100 GPUs:
524
+
525
+ ```bash
526
+ python -m torch.distributed.run --nproc_per_node=8 finetune.py \
527
+ --model-type ram \
528
+ --config ram/configs/finetune.yaml \
529
+ --checkpoint outputs/ram/checkpoint_04.pth \
530
+ --output-dir outputs/ram_ft
531
+ ```
532
+
533
+ </details>
534
+
535
+
536
+ <details>
537
+ <summary><font size="4" style="font-weight:bold;">
538
+ Tag2Text
539
+ </font></summary>
540
+
541
+ 1. Download [RAM training datasets](#open_book-training-datasets) where each json file contains a list. Each item in the list is a dictonary with three key-value pairs: {'image_path': path_of_image, 'caption': text_of_image, 'parse_label_id': image tags parsed from caption }.
542
+
543
+ 2. In ram/configs/pretrain_tag2text.yaml, set 'train_file' as the paths for the json files.
544
+
545
+ 3. Prepare [pretained Swin-Transformer](https://github.com/microsoft/Swin-Transformer), and set 'ckpt' in ram/configs/swin.
546
+
547
+ 4. Pre-train the model using 8 A100 GPUs:
548
+
549
+ ```bash
550
+ python -m torch.distributed.run --nproc_per_node=8 pretrain.py \
551
+ --model-type tag2text \
552
+ --config ram/configs/pretrain_tag2text.yaml \
553
+ --output-dir outputs/tag2text
554
+ ```
555
+
556
+ 5. Fine-tune the pre-trained checkpoint using 8 A100 GPUs:
557
+
558
+ ```bash
559
+ python -m torch.distributed.run --nproc_per_node=8 finetune.py \
560
+ --model-type tag2text \
561
+ --config ram/configs/finetune_tag2text.yaml \
562
+ --checkpoint outputs/tag2text/checkpoint_04.pth \
563
+ --output-dir outputs/tag2text_ft
564
+ ```
565
+
566
+ </details>
567
+
568
+
569
+ ## :black_nib: Citation
570
+ If you find our work to be useful for your research, please consider citing.
571
+
572
+ ```
573
+ @article{huang2023open,
574
+ title={Open-Set Image Tagging with Multi-Grained Text Supervision},
575
+ author={Huang, Xinyu and Huang, Yi-Jie and Zhang, Youcai and Tian, Weiwei and Feng, Rui and Zhang, Yuejie and Xie, Yanchun and Li, Yaqian and Zhang, Lei},
576
+ journal={arXiv e-prints},
577
+ pages={arXiv--2310},
578
+ year={2023}
579
+ }
580
+
581
+ @article{zhang2023recognize,
582
+ title={Recognize Anything: A Strong Image Tagging Model},
583
+ author={Zhang, Youcai and Huang, Xinyu and Ma, Jinyu and Li, Zhaoyang and Luo, Zhaochuan and Xie, Yanchun and Qin, Yuzhuo and Luo, Tong and Li, Yaqian and Liu, Shilong and others},
584
+ journal={arXiv preprint arXiv:2306.03514},
585
+ year={2023}
586
+ }
587
+
588
+ @article{huang2023tag2text,
589
+ title={Tag2Text: Guiding Vision-Language Model via Image Tagging},
590
+ author={Huang, Xinyu and Zhang, Youcai and Ma, Jinyu and Tian, Weiwei and Feng, Rui and Zhang, Yuejie and Li, Yaqian and Guo, Yandong and Zhang, Lei},
591
+ journal={arXiv preprint arXiv:2303.05657},
592
+ year={2023}
593
+ }
594
+ ```
595
+
596
+ ## :hearts: Acknowledgements
597
+ This work is done with the help of the amazing code base of [BLIP](https://github.com/salesforce/BLIP), thanks very much!
598
+
599
+ We want to thank @Cheng Rui @Shilong Liu @Ren Tianhe for their help in [marrying RAM/Tag2Text with Grounded-SAM](https://github.com/IDEA-Research/Grounded-Segment-Anything).
600
+
601
+ We also want to thank [Ask-Anything](https://github.com/OpenGVLab/Ask-Anything), [Prompt-can-anything](https://github.com/positive666/Prompt-Can-Anything) for combining RAM/Tag2Text, which greatly expands the application boundaries of RAM/Tag2Text.
recognize-anything/.ipynb_checkpoints/recognize_anything_demo-checkpoint.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
recognize-anything/datasets/hico/hico_600_annots.txt ADDED
The diff for this file is too large to render. See raw diff
 
recognize-anything/datasets/hico/hico_600_taglist.txt ADDED
@@ -0,0 +1,600 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ person board airplane
2
+ person direct airplane
3
+ person exit airplane
4
+ person fly airplane
5
+ person inspect airplane
6
+ person load airplane
7
+ person ride airplane
8
+ person sit_on airplane
9
+ person wash airplane
10
+ person no_interaction airplane
11
+ person carry bicycle
12
+ person hold bicycle
13
+ person inspect bicycle
14
+ person jump bicycle
15
+ person hop_on bicycle
16
+ person park bicycle
17
+ person push bicycle
18
+ person repair bicycle
19
+ person ride bicycle
20
+ person sit_on bicycle
21
+ person straddle bicycle
22
+ person walk bicycle
23
+ person wash bicycle
24
+ person no_interaction bicycle
25
+ person chase bird
26
+ person feed bird
27
+ person hold bird
28
+ person pet bird
29
+ person release bird
30
+ person watch bird
31
+ person no_interaction bird
32
+ person board boat
33
+ person drive boat
34
+ person exit boat
35
+ person inspect boat
36
+ person jump boat
37
+ person launch boat
38
+ person repair boat
39
+ person ride boat
40
+ person row boat
41
+ person sail boat
42
+ person sit_on boat
43
+ person stand_on boat
44
+ person tie boat
45
+ person wash boat
46
+ person no_interaction boat
47
+ person carry bottle
48
+ person drink_with bottle
49
+ person hold bottle
50
+ person inspect bottle
51
+ person lick bottle
52
+ person open bottle
53
+ person pour bottle
54
+ person no_interaction bottle
55
+ person board bus
56
+ person direct bus
57
+ person drive bus
58
+ person exit bus
59
+ person inspect bus
60
+ person load bus
61
+ person ride bus
62
+ person sit_on bus
63
+ person wash bus
64
+ person wave bus
65
+ person no_interaction bus
66
+ person board car
67
+ person direct car
68
+ person drive car
69
+ person hose car
70
+ person inspect car
71
+ person jump car
72
+ person load car
73
+ person park car
74
+ person ride car
75
+ person wash car
76
+ person no_interaction car
77
+ person dry cat
78
+ person feed cat
79
+ person hold cat
80
+ person hug cat
81
+ person kiss cat
82
+ person pet cat
83
+ person scratch cat
84
+ person wash cat
85
+ person chase cat
86
+ person no_interaction cat
87
+ person carry chair
88
+ person hold chair
89
+ person lie_on chair
90
+ person sit_on chair
91
+ person stand_on chair
92
+ person no_interaction chair
93
+ person carry couch
94
+ person lie_on couch
95
+ person sit_on couch
96
+ person no_interaction couch
97
+ person feed cow
98
+ person herd cow
99
+ person hold cow
100
+ person hug cow
101
+ person kiss cow
102
+ person lasso cow
103
+ person milk cow
104
+ person pet cow
105
+ person ride cow
106
+ person walk cow
107
+ person no_interaction cow
108
+ person clean dining_table
109
+ person eat_at dining_table
110
+ person sit_at dining_table
111
+ person no_interaction dining_table
112
+ person carry dog
113
+ person dry dog
114
+ person feed dog
115
+ person groom dog
116
+ person hold dog
117
+ person hose dog
118
+ person hug dog
119
+ person inspect dog
120
+ person kiss dog
121
+ person pet dog
122
+ person run dog
123
+ person scratch dog
124
+ person straddle dog
125
+ person train dog
126
+ person walk dog
127
+ person wash dog
128
+ person chase dog
129
+ person no_interaction dog
130
+ person feed horse
131
+ person groom horse
132
+ person hold horse
133
+ person hug horse
134
+ person jump horse
135
+ person kiss horse
136
+ person load horse
137
+ person hop_on horse
138
+ person pet horse
139
+ person race horse
140
+ person ride horse
141
+ person run horse
142
+ person straddle horse
143
+ person train horse
144
+ person walk horse
145
+ person wash horse
146
+ person no_interaction horse
147
+ person hold motorcycle
148
+ person inspect motorcycle
149
+ person jump motorcycle
150
+ person hop_on motorcycle
151
+ person park motorcycle
152
+ person push motorcycle
153
+ person race motorcycle
154
+ person ride motorcycle
155
+ person sit_on motorcycle
156
+ person straddle motorcycle
157
+ person turn motorcycle
158
+ person walk motorcycle
159
+ person wash motorcycle
160
+ person no_interaction motorcycle
161
+ person carry person
162
+ person greet person
163
+ person hold person
164
+ person hug person
165
+ person kiss person
166
+ person stab person
167
+ person tag person
168
+ person teach person
169
+ person lick person
170
+ person no_interaction person
171
+ person carry potted_plant
172
+ person hold potted_plant
173
+ person hose potted_plant
174
+ person no_interaction potted_plant
175
+ person carry sheep
176
+ person feed sheep
177
+ person herd sheep
178
+ person hold sheep
179
+ person hug sheep
180
+ person kiss sheep
181
+ person pet sheep
182
+ person ride sheep
183
+ person shear sheep
184
+ person walk sheep
185
+ person wash sheep
186
+ person no_interaction sheep
187
+ person board train
188
+ person drive train
189
+ person exit train
190
+ person load train
191
+ person ride train
192
+ person sit_on train
193
+ person wash train
194
+ person no_interaction train
195
+ person control tv
196
+ person repair tv
197
+ person watch tv
198
+ person no_interaction tv
199
+ person buy apple
200
+ person cut apple
201
+ person eat apple
202
+ person hold apple
203
+ person inspect apple
204
+ person peel apple
205
+ person pick apple
206
+ person smell apple
207
+ person wash apple
208
+ person no_interaction apple
209
+ person carry backpack
210
+ person hold backpack
211
+ person inspect backpack
212
+ person open backpack
213
+ person wear backpack
214
+ person no_interaction backpack
215
+ person buy banana
216
+ person carry banana
217
+ person cut banana
218
+ person eat banana
219
+ person hold banana
220
+ person inspect banana
221
+ person peel banana
222
+ person pick banana
223
+ person smell banana
224
+ person no_interaction banana
225
+ person break baseball_bat
226
+ person carry baseball_bat
227
+ person hold baseball_bat
228
+ person sign baseball_bat
229
+ person swing baseball_bat
230
+ person throw baseball_bat
231
+ person wield baseball_bat
232
+ person no_interaction baseball_bat
233
+ person hold baseball_glove
234
+ person wear baseball_glove
235
+ person no_interaction baseball_glove
236
+ person feed bear
237
+ person hunt bear
238
+ person watch bear
239
+ person no_interaction bear
240
+ person clean bed
241
+ person lie_on bed
242
+ person sit_on bed
243
+ person no_interaction bed
244
+ person inspect bench
245
+ person lie_on bench
246
+ person sit_on bench
247
+ person no_interaction bench
248
+ person carry book
249
+ person hold book
250
+ person open book
251
+ person read book
252
+ person no_interaction book
253
+ person hold bowl
254
+ person stir bowl
255
+ person wash bowl
256
+ person lick bowl
257
+ person no_interaction bowl
258
+ person cut broccoli
259
+ person eat broccoli
260
+ person hold broccoli
261
+ person smell broccoli
262
+ person stir broccoli
263
+ person wash broccoli
264
+ person no_interaction broccoli
265
+ person blow cake
266
+ person carry cake
267
+ person cut cake
268
+ person eat cake
269
+ person hold cake
270
+ person light cake
271
+ person make cake
272
+ person pick_up cake
273
+ person no_interaction cake
274
+ person carry carrot
275
+ person cook carrot
276
+ person cut carrot
277
+ person eat carrot
278
+ person hold carrot
279
+ person peel carrot
280
+ person smell carrot
281
+ person stir carrot
282
+ person wash carrot
283
+ person no_interaction carrot
284
+ person carry cell_phone
285
+ person hold cell_phone
286
+ person read cell_phone
287
+ person repair cell_phone
288
+ person talk_on cell_phone
289
+ person text_on cell_phone
290
+ person no_interaction cell_phone
291
+ person check clock
292
+ person hold clock
293
+ person repair clock
294
+ person set clock
295
+ person no_interaction clock
296
+ person carry cup
297
+ person drink_with cup
298
+ person hold cup
299
+ person inspect cup
300
+ person pour cup
301
+ person sip cup
302
+ person smell cup
303
+ person fill cup
304
+ person wash cup
305
+ person no_interaction cup
306
+ person buy donut
307
+ person carry donut
308
+ person eat donut
309
+ person hold donut
310
+ person make donut
311
+ person pick_up donut
312
+ person smell donut
313
+ person no_interaction donut
314
+ person feed elephant
315
+ person hold elephant
316
+ person hose elephant
317
+ person hug elephant
318
+ person kiss elephant
319
+ person hop_on elephant
320
+ person pet elephant
321
+ person ride elephant
322
+ person walk elephant
323
+ person wash elephant
324
+ person watch elephant
325
+ person no_interaction elephant
326
+ person hug fire_hydrant
327
+ person inspect fire_hydrant
328
+ person open fire_hydrant
329
+ person paint fire_hydrant
330
+ person no_interaction fire_hydrant
331
+ person hold fork
332
+ person lift fork
333
+ person stick fork
334
+ person lick fork
335
+ person wash fork
336
+ person no_interaction fork
337
+ person block frisbee
338
+ person catch frisbee
339
+ person hold frisbee
340
+ person spin frisbee
341
+ person throw frisbee
342
+ person no_interaction frisbee
343
+ person feed giraffe
344
+ person kiss giraffe
345
+ person pet giraffe
346
+ person ride giraffe
347
+ person watch giraffe
348
+ person no_interaction giraffe
349
+ person hold hair_drier
350
+ person operate hair_drier
351
+ person repair hair_drier
352
+ person no_interaction hair_drier
353
+ person carry handbag
354
+ person hold handbag
355
+ person inspect handbag
356
+ person no_interaction handbag
357
+ person carry hot_dog
358
+ person cook hot_dog
359
+ person cut hot_dog
360
+ person eat hot_dog
361
+ person hold hot_dog
362
+ person make hot_dog
363
+ person no_interaction hot_dog
364
+ person carry keyboard
365
+ person clean keyboard
366
+ person hold keyboard
367
+ person type_on keyboard
368
+ person no_interaction keyboard
369
+ person assemble kite
370
+ person carry kite
371
+ person fly kite
372
+ person hold kite
373
+ person inspect kite
374
+ person launch kite
375
+ person pull kite
376
+ person no_interaction kite
377
+ person cut_with knife
378
+ person hold knife
379
+ person stick knife
380
+ person wash knife
381
+ person wield knife
382
+ person lick knife
383
+ person no_interaction knife
384
+ person hold laptop
385
+ person open laptop
386
+ person read laptop
387
+ person repair laptop
388
+ person type_on laptop
389
+ person no_interaction laptop
390
+ person clean microwave
391
+ person open microwave
392
+ person operate microwave
393
+ person no_interaction microwave
394
+ person control mouse
395
+ person hold mouse
396
+ person repair mouse
397
+ person no_interaction mouse
398
+ person buy orange
399
+ person cut orange
400
+ person eat orange
401
+ person hold orange
402
+ person inspect orange
403
+ person peel orange
404
+ person pick orange
405
+ person squeeze orange
406
+ person wash orange
407
+ person no_interaction orange
408
+ person clean oven
409
+ person hold oven
410
+ person inspect oven
411
+ person open oven
412
+ person repair oven
413
+ person operate oven
414
+ person no_interaction oven
415
+ person check parking_meter
416
+ person pay parking_meter
417
+ person repair parking_meter
418
+ person no_interaction parking_meter
419
+ person buy pizza
420
+ person carry pizza
421
+ person cook pizza
422
+ person cut pizza
423
+ person eat pizza
424
+ person hold pizza
425
+ person make pizza
426
+ person pick_up pizza
427
+ person slide pizza
428
+ person smell pizza
429
+ person no_interaction pizza
430
+ person clean refrigerator
431
+ person hold refrigerator
432
+ person move refrigerator
433
+ person open refrigerator
434
+ person no_interaction refrigerator
435
+ person hold remote
436
+ person point remote
437
+ person swing remote
438
+ person no_interaction remote
439
+ person carry sandwich
440
+ person cook sandwich
441
+ person cut sandwich
442
+ person eat sandwich
443
+ person hold sandwich
444
+ person make sandwich
445
+ person no_interaction sandwich
446
+ person cut_with scissors
447
+ person hold scissors
448
+ person open scissors
449
+ person no_interaction scissors
450
+ person clean sink
451
+ person repair sink
452
+ person wash sink
453
+ person no_interaction sink
454
+ person carry skateboard
455
+ person flip skateboard
456
+ person grind skateboard
457
+ person hold skateboard
458
+ person jump skateboard
459
+ person pick_up skateboard
460
+ person ride skateboard
461
+ person sit_on skateboard
462
+ person stand_on skateboard
463
+ person no_interaction skateboard
464
+ person adjust skis
465
+ person carry skis
466
+ person hold skis
467
+ person inspect skis
468
+ person jump skis
469
+ person pick_up skis
470
+ person repair skis
471
+ person ride skis
472
+ person stand_on skis
473
+ person wear skis
474
+ person no_interaction skis
475
+ person adjust snowboard
476
+ person carry snowboard
477
+ person grind snowboard
478
+ person hold snowboard
479
+ person jump snowboard
480
+ person ride snowboard
481
+ person stand_on snowboard
482
+ person wear snowboard
483
+ person no_interaction snowboard
484
+ person hold spoon
485
+ person lick spoon
486
+ person wash spoon
487
+ person sip spoon
488
+ person no_interaction spoon
489
+ person block sports_ball
490
+ person carry sports_ball
491
+ person catch sports_ball
492
+ person dribble sports_ball
493
+ person hit sports_ball
494
+ person hold sports_ball
495
+ person inspect sports_ball
496
+ person kick sports_ball
497
+ person pick_up sports_ball
498
+ person serve sports_ball
499
+ person sign sports_ball
500
+ person spin sports_ball
501
+ person throw sports_ball
502
+ person no_interaction sports_ball
503
+ person hold stop_sign
504
+ person stand_under stop_sign
505
+ person stop_at stop_sign
506
+ person no_interaction stop_sign
507
+ person carry suitcase
508
+ person drag suitcase
509
+ person hold suitcase
510
+ person hug suitcase
511
+ person load suitcase
512
+ person open suitcase
513
+ person pack suitcase
514
+ person pick_up suitcase
515
+ person zip suitcase
516
+ person no_interaction suitcase
517
+ person carry surfboard
518
+ person drag surfboard
519
+ person hold surfboard
520
+ person inspect surfboard
521
+ person jump surfboard
522
+ person lie_on surfboard
523
+ person load surfboard
524
+ person ride surfboard
525
+ person stand_on surfboard
526
+ person sit_on surfboard
527
+ person wash surfboard
528
+ person no_interaction surfboard
529
+ person carry teddy_bear
530
+ person hold teddy_bear
531
+ person hug teddy_bear
532
+ person kiss teddy_bear
533
+ person no_interaction teddy_bear
534
+ person carry tennis_racket
535
+ person hold tennis_racket
536
+ person inspect tennis_racket
537
+ person swing tennis_racket
538
+ person no_interaction tennis_racket
539
+ person adjust tie
540
+ person cut tie
541
+ person hold tie
542
+ person inspect tie
543
+ person pull tie
544
+ person tie tie
545
+ person wear tie
546
+ person no_interaction tie
547
+ person hold toaster
548
+ person operate toaster
549
+ person repair toaster
550
+ person no_interaction toaster
551
+ person clean toilet
552
+ person flush toilet
553
+ person open toilet
554
+ person repair toilet
555
+ person sit_on toilet
556
+ person stand_on toilet
557
+ person wash toilet
558
+ person no_interaction toilet
559
+ person brush_with toothbrush
560
+ person hold toothbrush
561
+ person wash toothbrush
562
+ person no_interaction toothbrush
563
+ person install traffic_light
564
+ person repair traffic_light
565
+ person stand_under traffic_light
566
+ person stop_at traffic_light
567
+ person no_interaction traffic_light
568
+ person direct truck
569
+ person drive truck
570
+ person inspect truck
571
+ person load truck
572
+ person repair truck
573
+ person ride truck
574
+ person sit_on truck
575
+ person wash truck
576
+ person no_interaction truck
577
+ person carry umbrella
578
+ person hold umbrella
579
+ person lose umbrella
580
+ person open umbrella
581
+ person repair umbrella
582
+ person set umbrella
583
+ person stand_under umbrella
584
+ person no_interaction umbrella
585
+ person hold vase
586
+ person make vase
587
+ person paint vase
588
+ person no_interaction vase
589
+ person fill wine_glass
590
+ person hold wine_glass
591
+ person sip wine_glass
592
+ person toast wine_glass
593
+ person lick wine_glass
594
+ person wash wine_glass
595
+ person no_interaction wine_glass
596
+ person feed zebra
597
+ person hold zebra
598
+ person pet zebra
599
+ person watch zebra
600
+ person no_interaction zebra
recognize-anything/datasets/imagenet_multi/imagenet_multi_1000_annots.txt ADDED
The diff for this file is too large to render. See raw diff
 
recognize-anything/datasets/imagenet_multi/imagenet_multi_1000_taglist.txt ADDED
@@ -0,0 +1,1000 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ tench
2
+ goldfish
3
+ great white shark
4
+ tiger shark
5
+ hammerhead shark
6
+ electric ray
7
+ stingray
8
+ rooster
9
+ hen
10
+ ostrich
11
+ brambling
12
+ goldfinch
13
+ house finch
14
+ junco
15
+ indigo bunting
16
+ American robin
17
+ bulbul
18
+ jay
19
+ magpie
20
+ chickadee
21
+ American dipper
22
+ kite (bird of prey)
23
+ bald eagle
24
+ vulture
25
+ great grey owl
26
+ fire salamander
27
+ smooth newt
28
+ newt
29
+ spotted salamander
30
+ axolotl
31
+ American bullfrog
32
+ tree frog
33
+ tailed frog
34
+ loggerhead sea turtle
35
+ leatherback sea turtle
36
+ mud turtle
37
+ terrapin
38
+ box turtle
39
+ banded gecko
40
+ green iguana
41
+ Carolina anole
42
+ desert grassland whiptail lizard
43
+ agama
44
+ frilled-necked lizard
45
+ alligator lizard
46
+ Gila monster
47
+ European green lizard
48
+ chameleon
49
+ Komodo dragon
50
+ Nile crocodile
51
+ American alligator
52
+ triceratops
53
+ worm snake
54
+ ring-necked snake
55
+ eastern hog-nosed snake
56
+ smooth green snake
57
+ kingsnake
58
+ garter snake
59
+ water snake
60
+ vine snake
61
+ night snake
62
+ boa constrictor
63
+ African rock python
64
+ Indian cobra
65
+ green mamba
66
+ sea snake
67
+ Saharan horned viper
68
+ eastern diamondback rattlesnake
69
+ sidewinder rattlesnake
70
+ trilobite
71
+ harvestman
72
+ scorpion
73
+ yellow garden spider
74
+ barn spider
75
+ European garden spider
76
+ southern black widow
77
+ tarantula
78
+ wolf spider
79
+ tick
80
+ centipede
81
+ black grouse
82
+ ptarmigan
83
+ ruffed grouse
84
+ prairie grouse
85
+ peafowl
86
+ quail
87
+ partridge
88
+ african grey parrot
89
+ macaw
90
+ sulphur-crested cockatoo
91
+ lorikeet
92
+ coucal
93
+ bee eater
94
+ hornbill
95
+ hummingbird
96
+ jacamar
97
+ toucan
98
+ duck
99
+ red-breasted merganser
100
+ goose
101
+ black swan
102
+ tusker
103
+ echidna
104
+ platypus
105
+ wallaby
106
+ koala
107
+ wombat
108
+ jellyfish
109
+ sea anemone
110
+ brain coral
111
+ flatworm
112
+ nematode
113
+ conch
114
+ snail
115
+ slug
116
+ sea slug
117
+ chiton
118
+ chambered nautilus
119
+ Dungeness crab
120
+ rock crab
121
+ fiddler crab
122
+ red king crab
123
+ American lobster
124
+ spiny lobster
125
+ crayfish
126
+ hermit crab
127
+ isopod
128
+ white stork
129
+ black stork
130
+ spoonbill
131
+ flamingo
132
+ little blue heron
133
+ great egret
134
+ bittern bird
135
+ crane bird
136
+ limpkin
137
+ common gallinule
138
+ American coot
139
+ bustard
140
+ ruddy turnstone
141
+ dunlin
142
+ common redshank
143
+ dowitcher
144
+ oystercatcher
145
+ pelican
146
+ king penguin
147
+ albatross
148
+ grey whale
149
+ killer whale
150
+ dugong
151
+ sea lion
152
+ Chihuahua
153
+ Japanese Chin
154
+ Maltese
155
+ Pekingese
156
+ Shih Tzu
157
+ King Charles Spaniel
158
+ Papillon
159
+ toy terrier
160
+ Rhodesian Ridgeback
161
+ Afghan Hound
162
+ Basset Hound
163
+ Beagle
164
+ Bloodhound
165
+ Bluetick Coonhound
166
+ Black and Tan Coonhound
167
+ Treeing Walker Coonhound
168
+ English foxhound
169
+ Redbone Coonhound
170
+ borzoi
171
+ Irish Wolfhound
172
+ Italian Greyhound
173
+ Whippet
174
+ Ibizan Hound
175
+ Norwegian Elkhound
176
+ Otterhound
177
+ Saluki
178
+ Scottish Deerhound
179
+ Weimaraner
180
+ Staffordshire Bull Terrier
181
+ American Staffordshire Terrier
182
+ Bedlington Terrier
183
+ Border Terrier
184
+ Kerry Blue Terrier
185
+ Irish Terrier
186
+ Norfolk Terrier
187
+ Norwich Terrier
188
+ Yorkshire Terrier
189
+ Wire Fox Terrier
190
+ Lakeland Terrier
191
+ Sealyham Terrier
192
+ Airedale Terrier
193
+ Cairn Terrier
194
+ Australian Terrier
195
+ Dandie Dinmont Terrier
196
+ Boston Terrier
197
+ Miniature Schnauzer
198
+ Giant Schnauzer
199
+ Standard Schnauzer
200
+ Scottish Terrier
201
+ Tibetan Terrier
202
+ Australian Silky Terrier
203
+ Soft-coated Wheaten Terrier
204
+ West Highland White Terrier
205
+ Lhasa Apso
206
+ Flat-Coated Retriever
207
+ Curly-coated Retriever
208
+ Golden Retriever
209
+ Labrador Retriever
210
+ Chesapeake Bay Retriever
211
+ German Shorthaired Pointer
212
+ Vizsla
213
+ English Setter
214
+ Irish Setter
215
+ Gordon Setter
216
+ Brittany dog
217
+ Clumber Spaniel
218
+ English Springer Spaniel
219
+ Welsh Springer Spaniel
220
+ Cocker Spaniel
221
+ Sussex Spaniel
222
+ Irish Water Spaniel
223
+ Kuvasz
224
+ Schipperke
225
+ Groenendael dog
226
+ Malinois
227
+ Briard
228
+ Australian Kelpie
229
+ Komondor
230
+ Old English Sheepdog
231
+ Shetland Sheepdog
232
+ collie
233
+ Border Collie
234
+ Bouvier des Flandres dog
235
+ Rottweiler
236
+ German Shepherd Dog
237
+ Dobermann
238
+ Miniature Pinscher
239
+ Greater Swiss Mountain Dog
240
+ Bernese Mountain Dog
241
+ Appenzeller Sennenhund
242
+ Entlebucher Sennenhund
243
+ Boxer
244
+ Bullmastiff
245
+ Tibetan Mastiff
246
+ French Bulldog
247
+ Great Dane
248
+ St. Bernard
249
+ husky
250
+ Alaskan Malamute
251
+ Siberian Husky
252
+ Dalmatian
253
+ Affenpinscher
254
+ Basenji
255
+ pug
256
+ Leonberger
257
+ Newfoundland dog
258
+ Great Pyrenees dog
259
+ Samoyed
260
+ Pomeranian
261
+ Chow Chow
262
+ Keeshond
263
+ brussels griffon
264
+ Pembroke Welsh Corgi
265
+ Cardigan Welsh Corgi
266
+ Toy Poodle
267
+ Miniature Poodle
268
+ Standard Poodle
269
+ Mexican hairless dog (xoloitzcuintli)
270
+ grey wolf
271
+ Alaskan tundra wolf
272
+ red wolf or maned wolf
273
+ coyote
274
+ dingo
275
+ dhole
276
+ African wild dog
277
+ hyena
278
+ red fox
279
+ kit fox
280
+ Arctic fox
281
+ grey fox
282
+ tabby cat
283
+ tiger cat
284
+ Persian cat
285
+ Siamese cat
286
+ Egyptian Mau
287
+ cougar
288
+ lynx
289
+ leopard
290
+ snow leopard
291
+ jaguar
292
+ lion
293
+ tiger
294
+ cheetah
295
+ brown bear
296
+ American black bear
297
+ polar bear
298
+ sloth bear
299
+ mongoose
300
+ meerkat
301
+ tiger beetle
302
+ ladybug
303
+ ground beetle
304
+ longhorn beetle
305
+ leaf beetle
306
+ dung beetle
307
+ rhinoceros beetle
308
+ weevil
309
+ fly
310
+ bee
311
+ ant
312
+ grasshopper
313
+ cricket insect
314
+ stick insect
315
+ cockroach
316
+ praying mantis
317
+ cicada
318
+ leafhopper
319
+ lacewing
320
+ dragonfly
321
+ damselfly
322
+ red admiral butterfly
323
+ ringlet butterfly
324
+ monarch butterfly
325
+ small white butterfly
326
+ sulphur butterfly
327
+ gossamer-winged butterfly
328
+ starfish
329
+ sea urchin
330
+ sea cucumber
331
+ cottontail rabbit
332
+ hare
333
+ Angora rabbit
334
+ hamster
335
+ porcupine
336
+ fox squirrel
337
+ marmot
338
+ beaver
339
+ guinea pig
340
+ common sorrel horse
341
+ zebra
342
+ pig
343
+ wild boar
344
+ warthog
345
+ hippopotamus
346
+ ox
347
+ water buffalo
348
+ bison
349
+ ram (adult male sheep)
350
+ bighorn sheep
351
+ Alpine ibex
352
+ hartebeest
353
+ impala (antelope)
354
+ gazelle
355
+ arabian camel
356
+ llama
357
+ weasel
358
+ mink
359
+ European polecat
360
+ black-footed ferret
361
+ otter
362
+ skunk
363
+ badger
364
+ armadillo
365
+ three-toed sloth
366
+ orangutan
367
+ gorilla
368
+ chimpanzee
369
+ gibbon
370
+ siamang
371
+ guenon
372
+ patas monkey
373
+ baboon
374
+ macaque
375
+ langur
376
+ black-and-white colobus
377
+ proboscis monkey
378
+ marmoset
379
+ white-headed capuchin
380
+ howler monkey
381
+ titi monkey
382
+ Geoffroy's spider monkey
383
+ common squirrel monkey
384
+ ring-tailed lemur
385
+ indri
386
+ Asian elephant
387
+ African bush elephant
388
+ red panda
389
+ giant panda
390
+ snoek fish
391
+ eel
392
+ silver salmon
393
+ rock beauty fish
394
+ clownfish
395
+ sturgeon
396
+ gar fish
397
+ lionfish
398
+ pufferfish
399
+ abacus
400
+ abaya
401
+ academic gown
402
+ accordion
403
+ acoustic guitar
404
+ aircraft carrier
405
+ airliner
406
+ airship
407
+ altar
408
+ ambulance
409
+ amphibious vehicle
410
+ analog clock
411
+ apiary
412
+ apron
413
+ trash can
414
+ assault rifle
415
+ backpack
416
+ bakery
417
+ balance beam
418
+ balloon
419
+ ballpoint pen
420
+ Band-Aid
421
+ banjo
422
+ baluster / handrail
423
+ barbell
424
+ barber chair
425
+ barbershop
426
+ barn
427
+ barometer
428
+ barrel
429
+ wheelbarrow
430
+ baseball
431
+ basketball
432
+ bassinet
433
+ bassoon
434
+ swimming cap
435
+ bath towel
436
+ bathtub
437
+ station wagon
438
+ lighthouse
439
+ beaker
440
+ military hat (bearskin or shako)
441
+ beer bottle
442
+ beer glass
443
+ bell tower
444
+ baby bib
445
+ tandem bicycle
446
+ bikini
447
+ ring binder
448
+ binoculars
449
+ birdhouse
450
+ boathouse
451
+ bobsleigh
452
+ bolo tie
453
+ poke bonnet
454
+ bookcase
455
+ bookstore
456
+ bottle cap
457
+ hunting bow
458
+ bow tie
459
+ brass memorial plaque
460
+ bra
461
+ breakwater
462
+ breastplate
463
+ broom
464
+ bucket
465
+ buckle
466
+ bulletproof vest
467
+ high-speed train
468
+ butcher shop
469
+ taxicab
470
+ cauldron
471
+ candle
472
+ cannon
473
+ canoe
474
+ can opener
475
+ cardigan
476
+ car mirror
477
+ carousel
478
+ tool kit
479
+ cardboard box / carton
480
+ car wheel
481
+ automated teller machine
482
+ cassette
483
+ cassette player
484
+ castle
485
+ catamaran
486
+ CD player
487
+ cello
488
+ mobile phone
489
+ chain
490
+ chain-link fence
491
+ chain mail
492
+ chainsaw
493
+ storage chest
494
+ chiffonier
495
+ bell or wind chime
496
+ china cabinet
497
+ Christmas stocking
498
+ church
499
+ movie theater
500
+ cleaver
501
+ cliff dwelling
502
+ cloak
503
+ clogs
504
+ cocktail shaker
505
+ coffee mug
506
+ coffeemaker
507
+ spiral or coil
508
+ combination lock
509
+ computer keyboard
510
+ candy store
511
+ container ship
512
+ convertible
513
+ corkscrew
514
+ cornet
515
+ cowboy boot
516
+ cowboy hat
517
+ cradle
518
+ construction crane
519
+ crash helmet
520
+ crate
521
+ infant bed
522
+ Crock Pot
523
+ croquet ball
524
+ crutch
525
+ cuirass
526
+ dam
527
+ desk
528
+ desktop computer
529
+ rotary dial telephone
530
+ diaper
531
+ digital clock
532
+ digital watch
533
+ dining table
534
+ dishcloth
535
+ dishwasher
536
+ disc brake
537
+ dock
538
+ dog sled
539
+ dome
540
+ doormat
541
+ drilling rig
542
+ drum
543
+ drumstick
544
+ dumbbell
545
+ Dutch oven
546
+ electric fan
547
+ electric guitar
548
+ electric locomotive
549
+ entertainment center
550
+ envelope
551
+ espresso machine
552
+ face powder
553
+ feather boa
554
+ filing cabinet
555
+ fireboat
556
+ fire truck
557
+ fire screen
558
+ flagpole
559
+ flute
560
+ folding chair
561
+ football helmet
562
+ forklift
563
+ fountain
564
+ fountain pen
565
+ four-poster bed
566
+ freight car
567
+ French horn
568
+ frying pan
569
+ fur coat
570
+ garbage truck
571
+ gas mask or respirator
572
+ gas pump
573
+ goblet
574
+ go-kart
575
+ golf ball
576
+ golf cart
577
+ gondola
578
+ gong
579
+ gown
580
+ grand piano
581
+ greenhouse
582
+ radiator grille
583
+ grocery store
584
+ guillotine
585
+ hair clip
586
+ hair spray
587
+ half-track
588
+ hammer
589
+ hamper
590
+ hair dryer
591
+ hand-held computer
592
+ handkerchief
593
+ hard disk drive
594
+ harmonica
595
+ harp
596
+ combine harvester
597
+ hatchet
598
+ holster
599
+ home theater
600
+ honeycomb
601
+ hook
602
+ hoop skirt
603
+ gymnastic horizontal bar
604
+ horse-drawn vehicle
605
+ hourglass
606
+ iPod
607
+ clothes iron
608
+ carved pumpkin
609
+ jeans
610
+ jeep
611
+ T-shirt
612
+ jigsaw puzzle
613
+ rickshaw
614
+ joystick
615
+ kimono
616
+ knee pad
617
+ knot
618
+ lab coat
619
+ ladle
620
+ lampshade
621
+ laptop computer
622
+ lawn mower
623
+ lens cap
624
+ letter opener
625
+ library
626
+ lifeboat
627
+ lighter
628
+ limousine
629
+ ocean liner
630
+ lipstick
631
+ slip-on shoe
632
+ lotion
633
+ music speaker
634
+ loupe magnifying glass
635
+ sawmill
636
+ magnetic compass
637
+ messenger bag
638
+ mailbox
639
+ tights
640
+ one-piece bathing suit
641
+ manhole cover
642
+ maraca
643
+ marimba
644
+ mask
645
+ matchstick
646
+ maypole
647
+ maze
648
+ measuring cup
649
+ medicine cabinet
650
+ megalith
651
+ microphone
652
+ microwave oven
653
+ military uniform
654
+ milk can
655
+ minibus
656
+ miniskirt
657
+ minivan
658
+ missile
659
+ mitten
660
+ mixing bowl
661
+ mobile home
662
+ ford model t
663
+ modem
664
+ monastery
665
+ monitor
666
+ moped
667
+ mortar and pestle
668
+ graduation cap
669
+ mosque
670
+ mosquito net
671
+ vespa
672
+ mountain bike
673
+ tent
674
+ computer mouse
675
+ mousetrap
676
+ moving van
677
+ muzzle
678
+ metal nail
679
+ neck brace
680
+ necklace
681
+ baby pacifier
682
+ notebook computer
683
+ obelisk
684
+ oboe
685
+ ocarina
686
+ odometer
687
+ oil filter
688
+ pipe organ
689
+ oscilloscope
690
+ overskirt
691
+ bullock cart
692
+ oxygen mask
693
+ product packet / packaging
694
+ paddle
695
+ paddle wheel
696
+ padlock
697
+ paintbrush
698
+ pajamas
699
+ palace
700
+ pan flute
701
+ paper towel
702
+ parachute
703
+ parallel bars
704
+ park bench
705
+ parking meter
706
+ railroad car
707
+ patio
708
+ payphone
709
+ pedestal
710
+ pencil case
711
+ pencil sharpener
712
+ perfume
713
+ Petri dish
714
+ photocopier
715
+ plectrum
716
+ Pickelhaube
717
+ picket fence
718
+ pickup truck
719
+ pier
720
+ piggy bank
721
+ pill bottle
722
+ pillow
723
+ ping-pong ball
724
+ pinwheel
725
+ pirate ship
726
+ drink pitcher
727
+ block plane
728
+ planetarium
729
+ plastic bag
730
+ plate rack
731
+ farm plow
732
+ plunger
733
+ Polaroid camera
734
+ pole
735
+ police van
736
+ poncho
737
+ pool table
738
+ soda bottle
739
+ plant pot
740
+ potter's wheel
741
+ power drill
742
+ prayer rug
743
+ printer
744
+ prison
745
+ missile
746
+ projector
747
+ hockey puck
748
+ punching bag
749
+ purse
750
+ quill
751
+ quilt
752
+ race car
753
+ racket
754
+ radiator
755
+ radio
756
+ radio telescope
757
+ rain barrel
758
+ recreational vehicle
759
+ fishing casting reel
760
+ reflex camera
761
+ refrigerator
762
+ remote control
763
+ restaurant
764
+ revolver
765
+ rifle
766
+ rocking chair
767
+ rotisserie
768
+ eraser
769
+ rugby ball
770
+ ruler measuring stick
771
+ sneaker
772
+ safe
773
+ safety pin
774
+ salt shaker
775
+ sandal
776
+ sarong
777
+ saxophone
778
+ scabbard
779
+ weighing scale
780
+ school bus
781
+ schooner
782
+ scoreboard
783
+ CRT monitor
784
+ screw
785
+ screwdriver
786
+ seat belt
787
+ sewing machine
788
+ shield
789
+ shoe store
790
+ shoji screen / room divider
791
+ shopping basket
792
+ shopping cart
793
+ shovel
794
+ shower cap
795
+ shower curtain
796
+ ski
797
+ balaclava ski mask
798
+ sleeping bag
799
+ slide rule
800
+ sliding door
801
+ slot machine
802
+ snorkel
803
+ snowmobile
804
+ snowplow
805
+ soap dispenser
806
+ soccer ball
807
+ sock
808
+ solar thermal collector
809
+ sombrero
810
+ soup bowl
811
+ keyboard space bar
812
+ space heater
813
+ space shuttle
814
+ spatula
815
+ motorboat
816
+ spider web
817
+ spindle
818
+ sports car
819
+ spotlight
820
+ stage
821
+ steam locomotive
822
+ through arch bridge
823
+ steel drum
824
+ stethoscope
825
+ scarf
826
+ stone wall
827
+ stopwatch
828
+ stove
829
+ strainer
830
+ tram
831
+ stretcher
832
+ couch
833
+ stupa
834
+ submarine
835
+ suit
836
+ sundial
837
+ sunglasses
838
+ sunglasses
839
+ sunscreen
840
+ suspension bridge
841
+ mop
842
+ sweatshirt
843
+ swim trunks / shorts
844
+ swing
845
+ electrical switch
846
+ syringe
847
+ table lamp
848
+ tank
849
+ tape player
850
+ teapot
851
+ teddy bear
852
+ television
853
+ tennis ball
854
+ thatched roof
855
+ front curtain
856
+ thimble
857
+ threshing machine
858
+ throne
859
+ tile roof
860
+ toaster
861
+ tobacco shop
862
+ toilet seat
863
+ torch
864
+ totem pole
865
+ tow truck
866
+ toy store
867
+ tractor
868
+ semi-trailer truck
869
+ tray
870
+ trench coat
871
+ tricycle
872
+ trimaran
873
+ tripod
874
+ triumphal arch
875
+ trolleybus
876
+ trombone
877
+ hot tub
878
+ turnstile
879
+ typewriter keyboard
880
+ umbrella
881
+ unicycle
882
+ upright piano
883
+ vacuum cleaner
884
+ vase
885
+ vaulted or arched ceiling
886
+ velvet fabric
887
+ vending machine
888
+ vestment
889
+ viaduct
890
+ violin
891
+ volleyball
892
+ waffle iron
893
+ wall clock
894
+ wallet
895
+ wardrobe
896
+ military aircraft
897
+ sink
898
+ washing machine
899
+ water bottle
900
+ water jug
901
+ water tower
902
+ whiskey jug
903
+ whistle
904
+ hair wig
905
+ window screen
906
+ window shade
907
+ Windsor tie
908
+ wine bottle
909
+ airplane wing
910
+ wok
911
+ wooden spoon
912
+ wool
913
+ split-rail fence
914
+ shipwreck
915
+ sailboat
916
+ yurt
917
+ website
918
+ comic book
919
+ crossword
920
+ traffic or street sign
921
+ traffic light
922
+ dust jacket
923
+ menu
924
+ plate
925
+ guacamole
926
+ consomme
927
+ hot pot
928
+ trifle
929
+ ice cream
930
+ popsicle
931
+ baguette
932
+ bagel
933
+ pretzel
934
+ cheeseburger
935
+ hot dog
936
+ mashed potatoes
937
+ cabbage
938
+ broccoli
939
+ cauliflower
940
+ zucchini
941
+ spaghetti squash
942
+ acorn squash
943
+ butternut squash
944
+ cucumber
945
+ artichoke
946
+ bell pepper
947
+ cardoon
948
+ mushroom
949
+ Granny Smith apple
950
+ strawberry
951
+ orange
952
+ lemon
953
+ fig
954
+ pineapple
955
+ banana
956
+ jackfruit
957
+ cherimoya (custard apple)
958
+ pomegranate
959
+ hay
960
+ carbonara
961
+ chocolate syrup
962
+ dough
963
+ meatloaf
964
+ pizza
965
+ pot pie
966
+ burrito
967
+ red wine
968
+ espresso
969
+ tea cup
970
+ eggnog
971
+ mountain
972
+ bubble
973
+ cliff
974
+ coral reef
975
+ geyser
976
+ lakeshore
977
+ promontory
978
+ sandbar
979
+ beach
980
+ valley
981
+ volcano
982
+ baseball player
983
+ bridegroom
984
+ scuba diver
985
+ rapeseed
986
+ daisy
987
+ yellow lady's slipper
988
+ corn
989
+ acorn
990
+ rose hip
991
+ horse chestnut seed
992
+ coral fungus
993
+ agaric
994
+ gyromitra
995
+ stinkhorn mushroom
996
+ earth star fungus
997
+ hen of the woods mushroom
998
+ bolete
999
+ corn cob
1000
+ toilet paper
recognize-anything/datasets/openimages_common_214/imgs/.gitkeep ADDED
File without changes
recognize-anything/datasets/openimages_common_214/openimages_common_214_ram_annots.txt ADDED
The diff for this file is too large to render. See raw diff
 
recognize-anything/datasets/openimages_common_214/openimages_common_214_ram_taglist.txt ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accident
2
+ accordion
3
+ plane
4
+ airport
5
+ antelope
6
+ apple
7
+ art gallery
8
+ eggplant
9
+ auditorium
10
+ autumn
11
+ baboon
12
+ backpack
13
+ bakery
14
+ bamboo
15
+ banana
16
+ barbecue
17
+ bed
18
+ bedroom
19
+ clock
20
+ bicycle
21
+ bikini
22
+ birthday cake
23
+ blackberry
24
+ blueberry
25
+ pig
26
+ bookcase
27
+ bridge
28
+ broccoli
29
+ bus
30
+ butterfly
31
+ calculator
32
+ calendar
33
+ camping
34
+ candle
35
+ candy
36
+ cannon
37
+ canyon
38
+ car
39
+ carousel
40
+ cat
41
+ cave
42
+ ceiling
43
+ cheese
44
+ cheetah
45
+ chef
46
+ chicken
47
+ christmas
48
+ christmas tree
49
+ clover
50
+ coral
51
+ corn
52
+ courtyard
53
+ crab
54
+ lobster
55
+ crocodile
56
+ crosswalk
57
+ crow
58
+ cucumber
59
+ cup
60
+ currency
61
+ dachshund
62
+ deer
63
+ desert
64
+ die
65
+ dinosaur
66
+ dog
67
+ dolphin
68
+ doodle
69
+ dragonfly
70
+ drum
71
+ duck
72
+ dumbbell
73
+ easter egg
74
+ egg
75
+ elephant
76
+ faucet
77
+ ferris wheel
78
+ fire
79
+ fireman
80
+ firework
81
+ flamingo
82
+ flower
83
+ football
84
+ fountain
85
+ fox
86
+ fridge
87
+ frog
88
+ ham
89
+ gas stove
90
+ giraffe
91
+ glacier
92
+ glove
93
+ goat
94
+ goose
95
+ gorilla
96
+ grape
97
+ guitar
98
+ gull
99
+ gym
100
+ halloween
101
+ hamburger
102
+ hamster
103
+ handbag
104
+ hedgehog
105
+ helicopter
106
+ horse
107
+ hummingbird
108
+ jellyfish
109
+ kangaroo
110
+ kimono
111
+ kite
112
+ ladybird
113
+ laptop
114
+ leg
115
+ mailbox
116
+ library
117
+ lightning
118
+ lily
119
+ lion
120
+ lizard
121
+ luggage
122
+ mannequin
123
+ map
124
+ mask
125
+ mattress
126
+ microphone
127
+ microwave
128
+ monkey
129
+ moon
130
+ mosque
131
+ mouse
132
+ mushroom
133
+ nebula
134
+ sea
135
+ ostrich
136
+ palm tree
137
+ paper
138
+ pasta
139
+ patient
140
+ pavilion
141
+ pear
142
+ pebble
143
+ penguin
144
+ pet
145
+ piano
146
+ picture frame
147
+ pine
148
+ pineapple
149
+ pizza
150
+ police car
151
+ pomegranate
152
+ poodle
153
+ popcorn
154
+ stamp
155
+ power station
156
+ printer
157
+ pumpkin
158
+ raccoon
159
+ rainbow
160
+ rat
161
+ restroom
162
+ ring
163
+ run
164
+ salad
165
+ sandwich
166
+ sausage
167
+ shark
168
+ sheet music
169
+ shrine
170
+ snowboard
171
+ snake
172
+ sparrow
173
+ squirrel
174
+ stage
175
+ starfish
176
+ statue
177
+ steering wheel
178
+ stream
179
+ street art
180
+ street light
181
+ submarine
182
+ suite
183
+ surfboard
184
+ sushi
185
+ swan
186
+ tattoo
187
+ teddy
188
+ tennis court
189
+ tennis racket
190
+ tiger
191
+ toast
192
+ toilet bowl
193
+ toy
194
+ tractor
195
+ train
196
+ trampoline
197
+ treadmill
198
+ truck
199
+ tunnel
200
+ turkey
201
+ vending machine
202
+ waffle
203
+ walnut
204
+ washing machine
205
+ water buffalo
206
+ waterfall
207
+ watermelon
208
+ wheat
209
+ wheelchair
210
+ windmill
211
+ winter
212
+ wolf
213
+ woodpecker
214
+ zebra
recognize-anything/datasets/openimages_common_214/openimages_common_214_tag2text_idannots.txt ADDED
The diff for this file is too large to render. See raw diff
 
recognize-anything/datasets/openimages_common_214/openimages_common_214_tag2text_tagidlist.txt ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 3
2
+ 8
3
+ 16
4
+ 19
5
+ 21
6
+ 33
7
+ 44
8
+ 50
9
+ 58
10
+ 61
11
+ 71
12
+ 77
13
+ 84
14
+ 96
15
+ 117
16
+ 139
17
+ 142
18
+ 147
19
+ 180
20
+ 200
21
+ 202
22
+ 206
23
+ 244
24
+ 267
25
+ 317
26
+ 321
27
+ 347
28
+ 361
29
+ 380
30
+ 387
31
+ 398
32
+ 407
33
+ 471
34
+ 486
35
+ 489
36
+ 509
37
+ 514
38
+ 530
39
+ 568
40
+ 590
41
+ 595
42
+ 612
43
+ 622
44
+ 626
45
+ 654
46
+ 658
47
+ 664
48
+ 684
49
+ 699
50
+ 704
51
+ 717
52
+ 720
53
+ 727
54
+ 760
55
+ 773
56
+ 786
57
+ 787
58
+ 812
59
+ 814
60
+ 817
61
+ 843
62
+ 855
63
+ 856
64
+ 907
65
+ 950
66
+ 955
67
+ 957
68
+ 1023
69
+ 1042
70
+ 1056
71
+ 1066
72
+ 1091
73
+ 1094
74
+ 1108
75
+ 1141
76
+ 1148
77
+ 1152
78
+ 1168
79
+ 1174
80
+ 1187
81
+ 1231
82
+ 1235
83
+ 1246
84
+ 1276
85
+ 1277
86
+ 1305
87
+ 1308
88
+ 1344
89
+ 1359
90
+ 1362
91
+ 1393
92
+ 1394
93
+ 1410
94
+ 1411
95
+ 1468
96
+ 1504
97
+ 1524
98
+ 1536
99
+ 1540
100
+ 1542
101
+ 1546
102
+ 1553
103
+ 1572
104
+ 1574
105
+ 1606
106
+ 1610
107
+ 1615
108
+ 1655
109
+ 1672
110
+ 1680
111
+ 1682
112
+ 1687
113
+ 1691
114
+ 1692
115
+ 1711
116
+ 1712
117
+ 1713
118
+ 1719
119
+ 1727
120
+ 1733
121
+ 1761
122
+ 1770
123
+ 1782
124
+ 1784
125
+ 1786
126
+ 1803
127
+ 1812
128
+ 1816
129
+ 1820
130
+ 1829
131
+ 1831
132
+ 1841
133
+ 1845
134
+ 1878
135
+ 1882
136
+ 1931
137
+ 1940
138
+ 1944
139
+ 1947
140
+ 1974
141
+ 1975
142
+ 1977
143
+ 2009
144
+ 2031
145
+ 2035
146
+ 2052
147
+ 2065
148
+ 2110
149
+ 2113
150
+ 2138
151
+ 2149
152
+ 2154
153
+ 2157
154
+ 2174
155
+ 2178
156
+ 2184
157
+ 2185
158
+ 2202
159
+ 2222
160
+ 2233
161
+ 2291
162
+ 2301
163
+ 2302
164
+ 2317
165
+ 2320
166
+ 2351
167
+ 2354
168
+ 2373
169
+ 2383
170
+ 2393
171
+ 2403
172
+ 2413
173
+ 2415
174
+ 2417
175
+ 2423
176
+ 2449
177
+ 2454
178
+ 2455
179
+ 2472
180
+ 2494
181
+ 2495
182
+ 2528
183
+ 2541
184
+ 2543
185
+ 2553
186
+ 2563
187
+ 2589
188
+ 2603
189
+ 2654
190
+ 2656
191
+ 2658
192
+ 2676
193
+ 2690
194
+ 2693
195
+ 2700
196
+ 2708
197
+ 2720
198
+ 2721
199
+ 2729
200
+ 2732
201
+ 2734
202
+ 2756
203
+ 2786
204
+ 2792
205
+ 2801
206
+ 2821
207
+ 2851
208
+ 2887
209
+ 2906
210
+ 2909
211
+ 2924
212
+ 2929
213
+ 2966
214
+ 2980
recognize-anything/datasets/openimages_rare_200/imgs/.gitkeep ADDED
File without changes
recognize-anything/datasets/openimages_rare_200/openimages_rare_200_llm_tag_descriptions.json ADDED
The diff for this file is too large to render. See raw diff
 
recognize-anything/datasets/openimages_rare_200/openimages_rare_200_ram_annots.txt ADDED
The diff for this file is too large to render. See raw diff
 
recognize-anything/datasets/openimages_rare_200/openimages_rare_200_ram_taglist.txt ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Aerial photography
2
+ Aircraft engine
3
+ Ale
4
+ Aloe
5
+ Amphibian
6
+ Angling
7
+ Anole
8
+ Antique car
9
+ Arcade game
10
+ Arthropod
11
+ Assault rifle
12
+ Athletic shoe
13
+ Auto racing
14
+ Backlighting
15
+ Bagpipes
16
+ Ball game
17
+ Barbecue chicken
18
+ Barechested
19
+ Barquentine
20
+ Beef tenderloin
21
+ Billiard room
22
+ Billiards
23
+ Bird of prey
24
+ Black swan
25
+ Black-and-white
26
+ Blond
27
+ Boating
28
+ Bonbon
29
+ Bottled water
30
+ Bouldering
31
+ Bovine
32
+ Bratwurst
33
+ Breadboard
34
+ Briefs
35
+ Brisket
36
+ Brochette
37
+ Calabaza
38
+ Camera operator
39
+ Canola
40
+ Childbirth
41
+ Chordophone
42
+ Church bell
43
+ Classical sculpture
44
+ Close-up
45
+ Cobblestone
46
+ Coca-cola
47
+ Combat sport
48
+ Comics
49
+ Compact car
50
+ Computer speaker
51
+ Cookies and crackers
52
+ Coral reef fish
53
+ Corn on the cob
54
+ Cosmetics
55
+ Crocodilia
56
+ Digital camera
57
+ Dishware
58
+ Divemaster
59
+ Dobermann
60
+ Dog walking
61
+ Domestic rabbit
62
+ Domestic short-haired cat
63
+ Double-decker bus
64
+ Drums
65
+ Electric guitar
66
+ Electric piano
67
+ Electronic instrument
68
+ Equestrianism
69
+ Equitation
70
+ Erinaceidae
71
+ Extreme sport
72
+ Falafel
73
+ Figure skating
74
+ Filling station
75
+ Fire apparatus
76
+ Firearm
77
+ Flatbread
78
+ Floristry
79
+ Forklift truck
80
+ Freight transport
81
+ Fried food
82
+ Fried noodles
83
+ Frigate
84
+ Frozen yogurt
85
+ Frying
86
+ Full moon
87
+ Galleon
88
+ Glacial landform
89
+ Gliding
90
+ Go-kart
91
+ Goats
92
+ Grappling
93
+ Great white shark
94
+ Gumbo
95
+ Gun turret
96
+ Hair coloring
97
+ Halter
98
+ Headphones
99
+ Heavy cruiser
100
+ Herding
101
+ High-speed rail
102
+ Holding hands
103
+ Horse and buggy
104
+ Horse racing
105
+ Hound
106
+ Hunting knife
107
+ Hurdling
108
+ Inflatable
109
+ Jackfruit
110
+ Jeans
111
+ Jiaozi
112
+ Junk food
113
+ Khinkali
114
+ Kitesurfing
115
+ Lawn game
116
+ Leaf vegetable
117
+ Lechon
118
+ Lifebuoy
119
+ Locust
120
+ Lumpia
121
+ Luxury vehicle
122
+ Machine tool
123
+ Medical imaging
124
+ Melee weapon
125
+ Microcontroller
126
+ Middle ages
127
+ Military person
128
+ Military vehicle
129
+ Milky way
130
+ Miniature Poodle
131
+ Modern dance
132
+ Molluscs
133
+ Monoplane
134
+ Motorcycling
135
+ Musical theatre
136
+ Narcissus
137
+ Nest box
138
+ Newsagent's shop
139
+ Nile crocodile
140
+ Nordic skiing
141
+ Nuclear power plant
142
+ Orator
143
+ Outdoor shoe
144
+ Parachuting
145
+ Pasta salad
146
+ Peafowl
147
+ Pelmeni
148
+ Perching bird
149
+ Performance car
150
+ Personal water craft
151
+ Pit bull
152
+ Plant stem
153
+ Pork chop
154
+ Portrait photography
155
+ Primate
156
+ Procyonidae
157
+ Prosciutto
158
+ Public speaking
159
+ Racewalking
160
+ Ramen
161
+ Rear-view mirror
162
+ Residential area
163
+ Ribs
164
+ Rice ball
165
+ Road cycling
166
+ Roller skating
167
+ Roman temple
168
+ Rowing
169
+ Rural area
170
+ Sailboat racing
171
+ Scaled reptile
172
+ Scuba diving
173
+ Senior citizen
174
+ Shallot
175
+ Shinto shrine
176
+ Shooting range
177
+ Siberian husky
178
+ Sledding
179
+ Soba
180
+ Solar energy
181
+ Sport climbing
182
+ Sport utility vehicle
183
+ Steamed rice
184
+ Stemware
185
+ Sumo
186
+ Surfing Equipment
187
+ Team sport
188
+ Touring car
189
+ Toy block
190
+ Trampolining
191
+ Underwater diving
192
+ Vegetarian food
193
+ Wallaby
194
+ Water polo
195
+ Watercolor paint
196
+ Whiskers
197
+ Wind wave
198
+ Woodwind instrument
199
+ Yakitori
200
+ Zeppelin
recognize-anything/images/.ipynb_checkpoints/ram_plus_framework-checkpoint.jpg ADDED

Git LFS Details

  • SHA256: 5bf4f19389cda78a019156abddf945e192cc95cc364c74f85c8dee5264763125
  • Pointer size: 131 Bytes
  • Size of remote file: 199 kB
recognize-anything/images/.ipynb_checkpoints/ram_plus_visualization-checkpoint.jpg ADDED

Git LFS Details

  • SHA256: 24e13c7472aada4ef244f41e894d07a4d0c0b88e22ceb54b353eb34af217dece
  • Pointer size: 131 Bytes
  • Size of remote file: 404 kB
recognize-anything/images/.ipynb_checkpoints/tag2text_retrieval_visualization-checkpoint.png ADDED

Git LFS Details

  • SHA256: b9f87eed6197420f1ac525300afc75d257bccbd4825fd053e328255e82a46968
  • Pointer size: 132 Bytes
  • Size of remote file: 1.05 MB
recognize-anything/images/1641173_2291260800.jpg ADDED

Git LFS Details

  • SHA256: c562fea3659c4b112f71cfecb4a57143124b8b734e1ca96144bbdda734e494d4
  • Pointer size: 132 Bytes
  • Size of remote file: 1.81 MB
recognize-anything/images/demo/.ipynb_checkpoints/demo2-checkpoint.jpg ADDED

Git LFS Details

  • SHA256: 5c5159bf7114d08967f95475176670043115b157bf700efa34190260cd917662
  • Pointer size: 132 Bytes
  • Size of remote file: 1.03 MB
recognize-anything/images/demo/.ipynb_checkpoints/demo4-checkpoint.jpg ADDED

Git LFS Details

  • SHA256: 5c71251326fb9ece01b5ce6334869861b3fce82eeb5cae45977e78e6332f4170
  • Pointer size: 131 Bytes
  • Size of remote file: 165 kB
recognize-anything/images/demo/demo1.jpg ADDED

Git LFS Details

  • SHA256: 1b2906f4058a69936df49cb6156ec4cd117a286b420e1eb14764033bf8f3c05f
  • Pointer size: 132 Bytes
  • Size of remote file: 5.7 MB
recognize-anything/images/demo/demo2.jpg ADDED

Git LFS Details

  • SHA256: 5c5159bf7114d08967f95475176670043115b157bf700efa34190260cd917662
  • Pointer size: 132 Bytes
  • Size of remote file: 1.03 MB
recognize-anything/images/demo/demo3.jpg ADDED

Git LFS Details

  • SHA256: c562fea3659c4b112f71cfecb4a57143124b8b734e1ca96144bbdda734e494d4
  • Pointer size: 132 Bytes
  • Size of remote file: 1.81 MB
recognize-anything/images/demo/demo4.jpg ADDED

Git LFS Details

  • SHA256: 5c71251326fb9ece01b5ce6334869861b3fce82eeb5cae45977e78e6332f4170
  • Pointer size: 131 Bytes
  • Size of remote file: 165 kB
recognize-anything/images/experiment_comparison.png ADDED

Git LFS Details

  • SHA256: 4146dfb4a00c973d012436073fa166af50a5c8668312246f2648447914e27506
  • Pointer size: 131 Bytes
  • Size of remote file: 158 kB
recognize-anything/images/localization_and_recognition.jpg ADDED

Git LFS Details

  • SHA256: 80dea86f6cc3773300070a9b817042985651273228008b12a3f1cf285426d1b6
  • Pointer size: 131 Bytes
  • Size of remote file: 222 kB
recognize-anything/images/openset_example.jpg ADDED

Git LFS Details

  • SHA256: 257c8d969c625f1f6727ffc8a5d5e70ad8f00f679c1deedabe0830edd2c24d01
  • Pointer size: 131 Bytes
  • Size of remote file: 215 kB
recognize-anything/images/ram_grounded_sam.jpg ADDED

Git LFS Details

  • SHA256: 9b288d96d34420df062bdc07393947d47a7d8ee163d07decde646f7898be8d61
  • Pointer size: 131 Bytes
  • Size of remote file: 544 kB
recognize-anything/images/ram_plus_compare.jpg ADDED

Git LFS Details

  • SHA256: 3114244c0698559c5df8ff51fb00c754ae13a28a7c04d610edbd87bc11f88b88
  • Pointer size: 131 Bytes
  • Size of remote file: 125 kB