Upload folder using huggingface_hub
Browse files- README.md +113 -45
- config.json +1 -1
- configuration_intern_vit.py +1 -1
- configuration_internvl_chat.py +1 -1
- conversation.py +7 -4
- modeling_intern_vit.py +1 -1
README.md
CHANGED
@@ -11,7 +11,7 @@ pipeline_tag: image-text-to-text
|
|
11 |
|
12 |
## Introduction
|
13 |
|
14 |
-
We are excited to announce the release of InternVL 2.0, the latest addition to the InternVL series of multimodal large language models. InternVL 2.0 features a variety of **instruction-tuned models**, ranging from
|
15 |
|
16 |
Compared to the state-of-the-art open-source multimodal large language models, InternVL 2.0 surpasses most open-source models. It demonstrates competitive performance on par with proprietary commercial models across various capabilities, including document and chart comprehension, infographics QA, scene text understanding and OCR tasks, scientific and mathematical problem solving, as well as cultural understanding and integrated multimodal capabilities.
|
17 |
|
@@ -60,8 +60,8 @@ InternVL 2.0 is a multimodal large language model series, featuring models of va
|
|
60 |
| Model Size | - | 34B | 34B | 25.5B | 40B |
|
61 |
| | | | | | |
|
62 |
| MVBench | - | - | - | 67.5 | 72.5 |
|
63 |
-
| Video-MME<br>wo subs | 59.9 | 59.0 | 52.0 |
|
64 |
-
| Video-MME<br>w/ subs | 63.3 | 59.4 | 54.9 |
|
65 |
|
66 |
- We evaluate our models on MVBench by extracting 16 frames from each video, and each frame was resized to a 448x448 image.
|
67 |
|
@@ -71,6 +71,8 @@ Limitations: Although we have made efforts to ensure the safety of the model dur
|
|
71 |
|
72 |
We provide an example code to run InternVL2-40B using `transformers`.
|
73 |
|
|
|
|
|
74 |
> Please use transformers==4.37.2 to ensure the model works normally.
|
75 |
|
76 |
```python
|
@@ -162,21 +164,15 @@ def load_image(image_file, input_size=448, max_num=6):
|
|
162 |
|
163 |
|
164 |
path = 'OpenGVLab/InternVL2-40B'
|
165 |
-
#
|
|
|
|
|
166 |
model = AutoModel.from_pretrained(
|
167 |
path,
|
168 |
torch_dtype=torch.bfloat16,
|
169 |
low_cpu_mem_usage=True,
|
170 |
-
trust_remote_code=True
|
171 |
-
|
172 |
-
# import os
|
173 |
-
# os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
|
174 |
-
# model = AutoModel.from_pretrained(
|
175 |
-
# path,
|
176 |
-
# torch_dtype=torch.bfloat16,
|
177 |
-
# low_cpu_mem_usage=True,
|
178 |
-
# trust_remote_code=True,
|
179 |
-
# device_map='auto').eval()
|
180 |
|
181 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True)
|
182 |
# set the max number of tiles in `max_num`
|
@@ -340,9 +336,9 @@ from lmdeploy import pipeline, TurbomindEngineConfig, ChatTemplateConfig
|
|
340 |
from lmdeploy.vl import load_image
|
341 |
|
342 |
model = 'OpenGVLab/InternVL2-40B'
|
343 |
-
system_prompt = '我是书生·万象,英文名是InternVL
|
344 |
image = load_image('https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/tests/data/tiger.jpeg')
|
345 |
-
chat_template_config = ChatTemplateConfig('
|
346 |
chat_template_config.meta_instruction = system_prompt
|
347 |
pipe = pipeline(model, chat_template_config=chat_template_config,
|
348 |
backend_config=TurbomindEngineConfig(session_len=8192))
|
@@ -356,14 +352,16 @@ If `ImportError` occurs while executing this case, please install the required d
|
|
356 |
|
357 |
When dealing with multiple images, you can put them all in one list. Keep in mind that multiple images will lead to a higher number of input tokens, and as a result, the size of the context window typically needs to be increased.
|
358 |
|
|
|
|
|
359 |
```python
|
360 |
from lmdeploy import pipeline, TurbomindEngineConfig, ChatTemplateConfig
|
361 |
from lmdeploy.vl import load_image
|
362 |
from lmdeploy.vl.constants import IMAGE_TOKEN
|
363 |
|
364 |
model = 'OpenGVLab/InternVL2-40B'
|
365 |
-
system_prompt = '我是书生·万象,英文名是InternVL
|
366 |
-
chat_template_config = ChatTemplateConfig('
|
367 |
chat_template_config.meta_instruction = system_prompt
|
368 |
pipe = pipeline(model, chat_template_config=chat_template_config,
|
369 |
backend_config=TurbomindEngineConfig(session_len=8192))
|
@@ -388,8 +386,8 @@ from lmdeploy import pipeline, TurbomindEngineConfig, ChatTemplateConfig
|
|
388 |
from lmdeploy.vl import load_image
|
389 |
|
390 |
model = 'OpenGVLab/InternVL2-40B'
|
391 |
-
system_prompt = '我是书生·万象,英文名是InternVL
|
392 |
-
chat_template_config = ChatTemplateConfig('
|
393 |
chat_template_config.meta_instruction = system_prompt
|
394 |
pipe = pipeline(model, chat_template_config=chat_template_config,
|
395 |
backend_config=TurbomindEngineConfig(session_len=8192))
|
@@ -412,8 +410,8 @@ from lmdeploy import pipeline, TurbomindEngineConfig, ChatTemplateConfig, Genera
|
|
412 |
from lmdeploy.vl import load_image
|
413 |
|
414 |
model = 'OpenGVLab/InternVL2-40B'
|
415 |
-
system_prompt = '我是书生·万象,英文名是InternVL
|
416 |
-
chat_template_config = ChatTemplateConfig('
|
417 |
chat_template_config.meta_instruction = system_prompt
|
418 |
pipe = pipeline(model, chat_template_config=chat_template_config,
|
419 |
backend_config=TurbomindEngineConfig(session_len=8192))
|
@@ -428,12 +426,12 @@ print(sess.response.text)
|
|
428 |
|
429 |
#### Service
|
430 |
|
431 |
-
|
432 |
|
433 |
```json
|
434 |
{
|
435 |
"model_name":"internvl-zh-hermes2",
|
436 |
-
"meta_instruction":"我是书生·万象,英文名是InternVL
|
437 |
"stop_words":["<|im_start|>", "<|im_end|>"]
|
438 |
}
|
439 |
```
|
@@ -441,16 +439,50 @@ For lmdeploy v0.5.0, please configure the chat template config first. Create the
|
|
441 |
LMDeploy's `api_server` enables models to be easily packed into services with a single command. The provided RESTful APIs are compatible with OpenAI's interfaces. Below are an example of service startup:
|
442 |
|
443 |
```shell
|
444 |
-
lmdeploy serve api_server OpenGVLab/InternVL2-40B --backend turbomind --chat-template chat_template.json
|
445 |
```
|
446 |
|
447 |
-
|
448 |
|
449 |
```shell
|
450 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
451 |
```
|
452 |
|
453 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
454 |
|
455 |
## License
|
456 |
|
@@ -477,7 +509,7 @@ If you find this project useful in your research, please consider citing:
|
|
477 |
|
478 |
## 简介
|
479 |
|
480 |
-
我们很高兴宣布 InternVL 2.0 的发布,这是 InternVL 系列多模态大语言模型的最新版本。InternVL 2.0 提供了多种**指令微调**的模型,参数从
|
481 |
|
482 |
与最先进的开源多模态大语言模型相比,InternVL 2.0 超越了大多数开源模型。它在各种能力上表现出与闭源商业模型相媲美的竞争力,包括文档和图表理解、信息图表问答、场景文本理解和 OCR 任务、科学和数学问题解决,以及文化理解和综合多模态能力。
|
483 |
|
@@ -526,8 +558,8 @@ InternVL 2.0 是一个多模态大语言模型系列,包含各种规模的模
|
|
526 |
| 模型大小 | - | 34B | 34B | 25.5B | 40B |
|
527 |
| | | | | | |
|
528 |
| MVBench | - | - | - | 67.5 | 72.5 |
|
529 |
-
| Video-MME<br>wo subs | 59.9 | 59.0 | 52.0 |
|
530 |
-
| Video-MME<br>w/ subs | 63.3 | 59.4 | 54.9 |
|
531 |
|
532 |
- 我们通过从每个视频中提取16帧来评估我们的模型在MVBench上的性能,每个视频帧被调整为448x448的图像。
|
533 |
|
@@ -537,6 +569,8 @@ InternVL 2.0 是一个多模态大语言模型系列,包含各种规模的模
|
|
537 |
|
538 |
我们提供了一个示例代码,用于使用 `transformers` 运行 InternVL2-40B。
|
539 |
|
|
|
|
|
540 |
> 请使用 transformers==4.37.2 以确保模型正常运行。
|
541 |
|
542 |
示例代码请[点击这里](#quick-start)。
|
@@ -560,9 +594,9 @@ from lmdeploy import pipeline, TurbomindEngineConfig, ChatTemplateConfig
|
|
560 |
from lmdeploy.vl import load_image
|
561 |
|
562 |
model = 'OpenGVLab/InternVL2-40B'
|
563 |
-
system_prompt = '我是书生·万象,英文名是InternVL
|
564 |
image = load_image('https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/tests/data/tiger.jpeg')
|
565 |
-
chat_template_config = ChatTemplateConfig('
|
566 |
chat_template_config.meta_instruction = system_prompt
|
567 |
pipe = pipeline(model, chat_template_config=chat_template_config,
|
568 |
backend_config=TurbomindEngineConfig(session_len=8192))
|
@@ -582,8 +616,8 @@ from lmdeploy.vl import load_image
|
|
582 |
from lmdeploy.vl.constants import IMAGE_TOKEN
|
583 |
|
584 |
model = 'OpenGVLab/InternVL2-40B'
|
585 |
-
system_prompt = '我是书生·万象,英文名是InternVL
|
586 |
-
chat_template_config = ChatTemplateConfig('
|
587 |
chat_template_config.meta_instruction = system_prompt
|
588 |
pipe = pipeline(model, chat_template_config=chat_template_config,
|
589 |
backend_config=TurbomindEngineConfig(session_len=8192))
|
@@ -607,8 +641,8 @@ from lmdeploy import pipeline, TurbomindEngineConfig, ChatTemplateConfig
|
|
607 |
from lmdeploy.vl import load_image
|
608 |
|
609 |
model = 'OpenGVLab/InternVL2-40B'
|
610 |
-
system_prompt = '我是书生·万象,英文名是InternVL
|
611 |
-
chat_template_config = ChatTemplateConfig('
|
612 |
chat_template_config.meta_instruction = system_prompt
|
613 |
pipe = pipeline(model, chat_template_config=chat_template_config,
|
614 |
backend_config=TurbomindEngineConfig(session_len=8192))
|
@@ -631,8 +665,8 @@ from lmdeploy import pipeline, TurbomindEngineConfig, ChatTemplateConfig, Genera
|
|
631 |
from lmdeploy.vl import load_image
|
632 |
|
633 |
model = 'OpenGVLab/InternVL2-40B'
|
634 |
-
system_prompt = '我是书生·万象,英文名是InternVL
|
635 |
-
chat_template_config = ChatTemplateConfig('
|
636 |
chat_template_config.meta_instruction = system_prompt
|
637 |
pipe = pipeline(model, chat_template_config=chat_template_config,
|
638 |
backend_config=TurbomindEngineConfig(session_len=8192))
|
@@ -647,12 +681,12 @@ print(sess.response.text)
|
|
647 |
|
648 |
#### API部署
|
649 |
|
650 |
-
|
651 |
|
652 |
```json
|
653 |
{
|
654 |
"model_name":"internvl-zh-hermes2",
|
655 |
-
"meta_instruction":"我是书生·万象,英文名是InternVL
|
656 |
"stop_words":["<|im_start|>", "<|im_end|>"]
|
657 |
}
|
658 |
```
|
@@ -660,16 +694,50 @@ print(sess.response.text)
|
|
660 |
LMDeploy 的 `api_server` 使模型能够通过一个命令轻松打包成服务。提供的 RESTful API 与 OpenAI 的接口兼容。以下是服务启动的示例:
|
661 |
|
662 |
```shell
|
663 |
-
lmdeploy serve api_server OpenGVLab/InternVL2-40B --backend turbomind --chat-template chat_template.json
|
664 |
```
|
665 |
|
666 |
-
|
667 |
|
668 |
```shell
|
669 |
-
|
670 |
```
|
671 |
|
672 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
673 |
|
674 |
## 开源许可证
|
675 |
|
|
|
11 |
|
12 |
## Introduction
|
13 |
|
14 |
+
We are excited to announce the release of InternVL 2.0, the latest addition to the InternVL series of multimodal large language models. InternVL 2.0 features a variety of **instruction-tuned models**, ranging from 1 billion to 108 billion parameters. This repository contains the instruction-tuned InternVL2-40B model.
|
15 |
|
16 |
Compared to the state-of-the-art open-source multimodal large language models, InternVL 2.0 surpasses most open-source models. It demonstrates competitive performance on par with proprietary commercial models across various capabilities, including document and chart comprehension, infographics QA, scene text understanding and OCR tasks, scientific and mathematical problem solving, as well as cultural understanding and integrated multimodal capabilities.
|
17 |
|
|
|
60 |
| Model Size | - | 34B | 34B | 25.5B | 40B |
|
61 |
| | | | | | |
|
62 |
| MVBench | - | - | - | 67.5 | 72.5 |
|
63 |
+
| Video-MME<br>wo subs | 59.9 | 59.0 | 52.0 | TODO | TODO |
|
64 |
+
| Video-MME<br>w/ subs | 63.3 | 59.4 | 54.9 | TODO | TODO |
|
65 |
|
66 |
- We evaluate our models on MVBench by extracting 16 frames from each video, and each frame was resized to a 448x448 image.
|
67 |
|
|
|
71 |
|
72 |
We provide an example code to run InternVL2-40B using `transformers`.
|
73 |
|
74 |
+
We also welcome you to experience the InternVL2 series models in our [online demo](https://internvl.opengvlab.com/). Currently, due to the limited GPU resources with public IP addresses, we can only deploy models up to a maximum of 26B. We will expand soon and deploy larger models to the online demo.
|
75 |
+
|
76 |
> Please use transformers==4.37.2 to ensure the model works normally.
|
77 |
|
78 |
```python
|
|
|
164 |
|
165 |
|
166 |
path = 'OpenGVLab/InternVL2-40B'
|
167 |
+
# You need to set device_map='auto' to use multiple GPUs for inference.
|
168 |
+
import os
|
169 |
+
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
|
170 |
model = AutoModel.from_pretrained(
|
171 |
path,
|
172 |
torch_dtype=torch.bfloat16,
|
173 |
low_cpu_mem_usage=True,
|
174 |
+
trust_remote_code=True,
|
175 |
+
device_map='auto').eval()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
|
177 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True)
|
178 |
# set the max number of tiles in `max_num`
|
|
|
336 |
from lmdeploy.vl import load_image
|
337 |
|
338 |
model = 'OpenGVLab/InternVL2-40B'
|
339 |
+
system_prompt = '我是书生·万象,英文名是InternVL,是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。'
|
340 |
image = load_image('https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/tests/data/tiger.jpeg')
|
341 |
+
chat_template_config = ChatTemplateConfig('internvl-zh-hermes2')
|
342 |
chat_template_config.meta_instruction = system_prompt
|
343 |
pipe = pipeline(model, chat_template_config=chat_template_config,
|
344 |
backend_config=TurbomindEngineConfig(session_len=8192))
|
|
|
352 |
|
353 |
When dealing with multiple images, you can put them all in one list. Keep in mind that multiple images will lead to a higher number of input tokens, and as a result, the size of the context window typically needs to be increased.
|
354 |
|
355 |
+
> Warning: Due to the scarcity of multi-image conversation data, the performance on multi-image tasks may be unstable, and it may require multiple attempts to achieve satisfactory results.
|
356 |
+
|
357 |
```python
|
358 |
from lmdeploy import pipeline, TurbomindEngineConfig, ChatTemplateConfig
|
359 |
from lmdeploy.vl import load_image
|
360 |
from lmdeploy.vl.constants import IMAGE_TOKEN
|
361 |
|
362 |
model = 'OpenGVLab/InternVL2-40B'
|
363 |
+
system_prompt = '我是书生·万象,英文名是InternVL,是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。'
|
364 |
+
chat_template_config = ChatTemplateConfig('internvl-zh-hermes2')
|
365 |
chat_template_config.meta_instruction = system_prompt
|
366 |
pipe = pipeline(model, chat_template_config=chat_template_config,
|
367 |
backend_config=TurbomindEngineConfig(session_len=8192))
|
|
|
386 |
from lmdeploy.vl import load_image
|
387 |
|
388 |
model = 'OpenGVLab/InternVL2-40B'
|
389 |
+
system_prompt = '我是书生·万象,英文名是InternVL,是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。'
|
390 |
+
chat_template_config = ChatTemplateConfig('internvl-zh-hermes2')
|
391 |
chat_template_config.meta_instruction = system_prompt
|
392 |
pipe = pipeline(model, chat_template_config=chat_template_config,
|
393 |
backend_config=TurbomindEngineConfig(session_len=8192))
|
|
|
410 |
from lmdeploy.vl import load_image
|
411 |
|
412 |
model = 'OpenGVLab/InternVL2-40B'
|
413 |
+
system_prompt = '我是书生·万象,英文名是InternVL,是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。'
|
414 |
+
chat_template_config = ChatTemplateConfig('internvl-zh-hermes2')
|
415 |
chat_template_config.meta_instruction = system_prompt
|
416 |
pipe = pipeline(model, chat_template_config=chat_template_config,
|
417 |
backend_config=TurbomindEngineConfig(session_len=8192))
|
|
|
426 |
|
427 |
#### Service
|
428 |
|
429 |
+
To deploy InternVL2 as an API, please configure the chat template config first. Create the following JSON file `chat_template.json`.
|
430 |
|
431 |
```json
|
432 |
{
|
433 |
"model_name":"internvl-zh-hermes2",
|
434 |
+
"meta_instruction":"我是书生·万象,英文名是InternVL,是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。",
|
435 |
"stop_words":["<|im_start|>", "<|im_end|>"]
|
436 |
}
|
437 |
```
|
|
|
439 |
LMDeploy's `api_server` enables models to be easily packed into services with a single command. The provided RESTful APIs are compatible with OpenAI's interfaces. Below are an example of service startup:
|
440 |
|
441 |
```shell
|
442 |
+
lmdeploy serve api_server OpenGVLab/InternVL2-40B --model-name InternVL2-40B --backend turbomind --server-port 23333 --chat-template chat_template.json
|
443 |
```
|
444 |
|
445 |
+
To use the OpenAI-style interface, you need to install OpenAI:
|
446 |
|
447 |
```shell
|
448 |
+
pip install openai
|
449 |
+
```
|
450 |
+
|
451 |
+
Then, use the code below to make the API call:
|
452 |
+
|
453 |
+
```python
|
454 |
+
from openai import OpenAI
|
455 |
+
|
456 |
+
client = OpenAI(api_key='YOUR_API_KEY', base_url='http://0.0.0.0:23333/v1')
|
457 |
+
model_name = client.models.list().data[0].id
|
458 |
+
response = client.chat.completions.create(
|
459 |
+
model="InternVL2-40B",
|
460 |
+
messages=[{
|
461 |
+
'role':
|
462 |
+
'user',
|
463 |
+
'content': [{
|
464 |
+
'type': 'text',
|
465 |
+
'text': 'describe this image',
|
466 |
+
}, {
|
467 |
+
'type': 'image_url',
|
468 |
+
'image_url': {
|
469 |
+
'url':
|
470 |
+
'https://modelscope.oss-cn-beijing.aliyuncs.com/resource/tiger.jpeg',
|
471 |
+
},
|
472 |
+
}],
|
473 |
+
}],
|
474 |
+
temperature=0.8,
|
475 |
+
top_p=0.8)
|
476 |
+
print(response)
|
477 |
```
|
478 |
|
479 |
+
### vLLM
|
480 |
+
|
481 |
+
TODO
|
482 |
+
|
483 |
+
### Ollama
|
484 |
+
|
485 |
+
TODO
|
486 |
|
487 |
## License
|
488 |
|
|
|
509 |
|
510 |
## 简介
|
511 |
|
512 |
+
我们很高兴宣布 InternVL 2.0 的发布,这是 InternVL 系列多模态大语言模型的最新版本。InternVL 2.0 提供了多种**指令微调**的模型,参数从 10 亿到 1080 亿不等。此仓库包含经过指令微调的 InternVL2-40B 模型。
|
513 |
|
514 |
与最先进的开源多模态大语言模型相比,InternVL 2.0 超越了大多数开源模型。它在各种能力上表现出与闭源商业模型相媲美的竞争力,包括文档和图表理解、信息图表问答、场景文本理解和 OCR 任务、科学和数学问题解决,以及文化理解和综合多模态能力。
|
515 |
|
|
|
558 |
| 模型大小 | - | 34B | 34B | 25.5B | 40B |
|
559 |
| | | | | | |
|
560 |
| MVBench | - | - | - | 67.5 | 72.5 |
|
561 |
+
| Video-MME<br>wo subs | 59.9 | 59.0 | 52.0 | TODO | TODO |
|
562 |
+
| Video-MME<br>w/ subs | 63.3 | 59.4 | 54.9 | TODO | TODO |
|
563 |
|
564 |
- 我们通过从每个视频中提取16帧来评估我们的模型在MVBench上的性能,每个视频帧被调整为448x448的图像。
|
565 |
|
|
|
569 |
|
570 |
我们提供了一个示例代码,用于使用 `transformers` 运行 InternVL2-40B。
|
571 |
|
572 |
+
我们也欢迎你在我们的[在线demo](https://internvl.opengvlab.com/)中体验InternVL2的系列模型。目前,由于具备公网IP地址的GPU资源有限,我们目前只能部署最大到26B的模型。我们会在不久之后进行扩容,把更大的模型部署到在线demo上,敬请期待。
|
573 |
+
|
574 |
> 请使用 transformers==4.37.2 以确保模型正常运行。
|
575 |
|
576 |
示例代码请[点击这里](#quick-start)。
|
|
|
594 |
from lmdeploy.vl import load_image
|
595 |
|
596 |
model = 'OpenGVLab/InternVL2-40B'
|
597 |
+
system_prompt = '我是书生·万象,英文名是InternVL,是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。'
|
598 |
image = load_image('https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/tests/data/tiger.jpeg')
|
599 |
+
chat_template_config = ChatTemplateConfig('internvl-zh-hermes2')
|
600 |
chat_template_config.meta_instruction = system_prompt
|
601 |
pipe = pipeline(model, chat_template_config=chat_template_config,
|
602 |
backend_config=TurbomindEngineConfig(session_len=8192))
|
|
|
616 |
from lmdeploy.vl.constants import IMAGE_TOKEN
|
617 |
|
618 |
model = 'OpenGVLab/InternVL2-40B'
|
619 |
+
system_prompt = '我是书生·万象,英文名是InternVL,是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。'
|
620 |
+
chat_template_config = ChatTemplateConfig('internvl-zh-hermes2')
|
621 |
chat_template_config.meta_instruction = system_prompt
|
622 |
pipe = pipeline(model, chat_template_config=chat_template_config,
|
623 |
backend_config=TurbomindEngineConfig(session_len=8192))
|
|
|
641 |
from lmdeploy.vl import load_image
|
642 |
|
643 |
model = 'OpenGVLab/InternVL2-40B'
|
644 |
+
system_prompt = '我是书生·万象,英文名是InternVL,是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。'
|
645 |
+
chat_template_config = ChatTemplateConfig('internvl-zh-hermes2')
|
646 |
chat_template_config.meta_instruction = system_prompt
|
647 |
pipe = pipeline(model, chat_template_config=chat_template_config,
|
648 |
backend_config=TurbomindEngineConfig(session_len=8192))
|
|
|
665 |
from lmdeploy.vl import load_image
|
666 |
|
667 |
model = 'OpenGVLab/InternVL2-40B'
|
668 |
+
system_prompt = '我是书生·万象,英文名是InternVL,是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。'
|
669 |
+
chat_template_config = ChatTemplateConfig('internvl-zh-hermes2')
|
670 |
chat_template_config.meta_instruction = system_prompt
|
671 |
pipe = pipeline(model, chat_template_config=chat_template_config,
|
672 |
backend_config=TurbomindEngineConfig(session_len=8192))
|
|
|
681 |
|
682 |
#### API部署
|
683 |
|
684 |
+
为了将InternVL2部署成API,请先配置聊天模板配置文件。创建如下的 JSON 文件 `chat_template.json`。
|
685 |
|
686 |
```json
|
687 |
{
|
688 |
"model_name":"internvl-zh-hermes2",
|
689 |
+
"meta_instruction":"我是书生·万象,英文名是InternVL,是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。",
|
690 |
"stop_words":["<|im_start|>", "<|im_end|>"]
|
691 |
}
|
692 |
```
|
|
|
694 |
LMDeploy 的 `api_server` 使模型能够通过一个命令轻松打包成服务。提供的 RESTful API 与 OpenAI 的接口兼容。以下是服务启动的示例:
|
695 |
|
696 |
```shell
|
697 |
+
lmdeploy serve api_server OpenGVLab/InternVL2-40B --model-name InternVL2-40B --backend turbomind --server-port 23333 --chat-template chat_template.json
|
698 |
```
|
699 |
|
700 |
+
为了使用OpenAI风格的API接口,您需要安装OpenAI:
|
701 |
|
702 |
```shell
|
703 |
+
pip install openai
|
704 |
```
|
705 |
|
706 |
+
然后,使用下面的代码进行API调用:
|
707 |
+
|
708 |
+
```python
|
709 |
+
from openai import OpenAI
|
710 |
+
|
711 |
+
client = OpenAI(api_key='YOUR_API_KEY', base_url='http://0.0.0.0:23333/v1')
|
712 |
+
model_name = client.models.list().data[0].id
|
713 |
+
response = client.chat.completions.create(
|
714 |
+
model="InternVL2-40B",
|
715 |
+
messages=[{
|
716 |
+
'role':
|
717 |
+
'user',
|
718 |
+
'content': [{
|
719 |
+
'type': 'text',
|
720 |
+
'text': 'describe this image',
|
721 |
+
}, {
|
722 |
+
'type': 'image_url',
|
723 |
+
'image_url': {
|
724 |
+
'url':
|
725 |
+
'https://modelscope.oss-cn-beijing.aliyuncs.com/resource/tiger.jpeg',
|
726 |
+
},
|
727 |
+
}],
|
728 |
+
}],
|
729 |
+
temperature=0.8,
|
730 |
+
top_p=0.8)
|
731 |
+
print(response)
|
732 |
+
```
|
733 |
+
|
734 |
+
### vLLM
|
735 |
+
|
736 |
+
TODO
|
737 |
+
|
738 |
+
### Ollama
|
739 |
+
|
740 |
+
TODO
|
741 |
|
742 |
## 开源许可证
|
743 |
|
config.json
CHANGED
@@ -87,7 +87,7 @@
|
|
87 |
"tie_word_embeddings": false,
|
88 |
"tokenizer_class": null,
|
89 |
"top_k": 50,
|
90 |
-
"top_p":
|
91 |
"torch_dtype": "bfloat16",
|
92 |
"torchscript": false,
|
93 |
"transformers_version": "4.37.2",
|
|
|
87 |
"tie_word_embeddings": false,
|
88 |
"tokenizer_class": null,
|
89 |
"top_k": 50,
|
90 |
+
"top_p": 1.0,
|
91 |
"torch_dtype": "bfloat16",
|
92 |
"torchscript": false,
|
93 |
"transformers_version": "4.37.2",
|
configuration_intern_vit.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
# --------------------------------------------------------
|
2 |
# InternVL
|
3 |
-
# Copyright (c)
|
4 |
# Licensed under The MIT License [see LICENSE for details]
|
5 |
# --------------------------------------------------------
|
6 |
import os
|
|
|
1 |
# --------------------------------------------------------
|
2 |
# InternVL
|
3 |
+
# Copyright (c) 2024 OpenGVLab
|
4 |
# Licensed under The MIT License [see LICENSE for details]
|
5 |
# --------------------------------------------------------
|
6 |
import os
|
configuration_internvl_chat.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
# --------------------------------------------------------
|
2 |
# InternVL
|
3 |
-
# Copyright (c)
|
4 |
# Licensed under The MIT License [see LICENSE for details]
|
5 |
# --------------------------------------------------------
|
6 |
|
|
|
1 |
# --------------------------------------------------------
|
2 |
# InternVL
|
3 |
+
# Copyright (c) 2024 OpenGVLab
|
4 |
# Licensed under The MIT License [see LICENSE for details]
|
5 |
# --------------------------------------------------------
|
6 |
|
conversation.py
CHANGED
@@ -330,13 +330,16 @@ def get_conv_template(name: str) -> Conversation:
|
|
330 |
return conv_templates[name].copy()
|
331 |
|
332 |
|
333 |
-
#
|
|
|
|
|
|
|
334 |
register_conv_template(
|
335 |
Conversation(
|
336 |
name='Hermes-2',
|
337 |
system_template='<|im_start|>system\n{system_message}',
|
338 |
# note: The new system prompt was not used here to avoid changes in benchmark performance.
|
339 |
-
# system_message='我是书生·万象,英文名是InternVL
|
340 |
system_message='你是由上海人工智能实验室联合商汤科技开发的书生多模态大模型,英文名叫InternVL, 是一个有用无害的人工智能助手。',
|
341 |
roles=('<|im_start|>user\n', '<|im_start|>assistant\n'),
|
342 |
sep_style=SeparatorStyle.MPT,
|
@@ -357,7 +360,7 @@ register_conv_template(
|
|
357 |
name='internlm2-chat',
|
358 |
system_template='<|im_start|>system\n{system_message}',
|
359 |
# note: The new system prompt was not used here to avoid changes in benchmark performance.
|
360 |
-
# system_message='我是书生·万象,英文名是InternVL
|
361 |
system_message='你是由上海人工智能实验室联合商汤科技开发的书生多模态大模型,英文名叫InternVL, 是一个有用无害的人工智能助手。',
|
362 |
roles=('<|im_start|>user\n', '<|im_start|>assistant\n'),
|
363 |
sep_style=SeparatorStyle.MPT,
|
@@ -376,7 +379,7 @@ register_conv_template(
|
|
376 |
name='phi3-chat',
|
377 |
system_template='<|system|>\n{system_message}',
|
378 |
# note: The new system prompt was not used here to avoid changes in benchmark performance.
|
379 |
-
# system_message='我是书生·万象,英文名是InternVL
|
380 |
system_message='你是由上海人工智能实验室联合商汤科技开发的书生多模态大模型,英文名叫InternVL, 是一个有用无害的人工智能助手。',
|
381 |
roles=('<|user|>\n', '<|assistant|>\n'),
|
382 |
sep_style=SeparatorStyle.MPT,
|
|
|
330 |
return conv_templates[name].copy()
|
331 |
|
332 |
|
333 |
+
# Both Hermes-2 and internlm2-chat are chatml-format conversation templates. The difference
|
334 |
+
# is that during training, the preprocessing function for the Hermes-2 template doesn't add
|
335 |
+
# <s> at the beginning of the tokenized sequence, while the internlm2-chat template does.
|
336 |
+
# Therefore, they are completely equivalent during inference.
|
337 |
register_conv_template(
|
338 |
Conversation(
|
339 |
name='Hermes-2',
|
340 |
system_template='<|im_start|>system\n{system_message}',
|
341 |
# note: The new system prompt was not used here to avoid changes in benchmark performance.
|
342 |
+
# system_message='我是书生·万象,英文名是InternVL,是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。',
|
343 |
system_message='你是由上海人工智能实验室联合商汤科技开发的书生多模态大模型,英文名叫InternVL, 是一个有用无害的人工智能助手。',
|
344 |
roles=('<|im_start|>user\n', '<|im_start|>assistant\n'),
|
345 |
sep_style=SeparatorStyle.MPT,
|
|
|
360 |
name='internlm2-chat',
|
361 |
system_template='<|im_start|>system\n{system_message}',
|
362 |
# note: The new system prompt was not used here to avoid changes in benchmark performance.
|
363 |
+
# system_message='我是书生·万象,英文名是InternVL,是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。',
|
364 |
system_message='你是由上海人工智能实验室联合商汤科技开发的书生多模态大模型,英文名叫InternVL, 是一个有用无害的人工智能助手。',
|
365 |
roles=('<|im_start|>user\n', '<|im_start|>assistant\n'),
|
366 |
sep_style=SeparatorStyle.MPT,
|
|
|
379 |
name='phi3-chat',
|
380 |
system_template='<|system|>\n{system_message}',
|
381 |
# note: The new system prompt was not used here to avoid changes in benchmark performance.
|
382 |
+
# system_message='我是书生·万象,英文名是InternVL,是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。',
|
383 |
system_message='你是由上海人工智能实验室联合商汤科技开发的书生多模态大模型,英文名叫InternVL, 是一个有用无害的人工智能助手。',
|
384 |
roles=('<|user|>\n', '<|assistant|>\n'),
|
385 |
sep_style=SeparatorStyle.MPT,
|
modeling_intern_vit.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
# --------------------------------------------------------
|
2 |
# InternVL
|
3 |
-
# Copyright (c)
|
4 |
# Licensed under The MIT License [see LICENSE for details]
|
5 |
# --------------------------------------------------------
|
6 |
from typing import Optional, Tuple, Union
|
|
|
1 |
# --------------------------------------------------------
|
2 |
# InternVL
|
3 |
+
# Copyright (c) 2024 OpenGVLab
|
4 |
# Licensed under The MIT License [see LICENSE for details]
|
5 |
# --------------------------------------------------------
|
6 |
from typing import Optional, Tuple, Union
|