Create README.md
Browse files
README.md
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
```python
|
2 |
+
import torch
|
3 |
+
import requests
|
4 |
+
from PIL import Image
|
5 |
+
from transformers import AutoModel, AutoProcessor, AutoImageProcessor
|
6 |
+
from transformers import Qwen2VLForConditionalGeneration
|
7 |
+
image_processor = AutoImageProcessor.from_pretrained("shilinxu/Qwen2-VL-2B-ViT", trust_remote_code=True)
|
8 |
+
vit = AutoModel.from_pretrained("shilinxu/Qwen2-VL-2B-ViT", trust_remote_code=True, device_map='auto',torch_dtype=torch.bfloat16, attn_implementation='flash_attention_2')
|
9 |
+
|
10 |
+
url = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg"
|
11 |
+
image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
|
12 |
+
images = [image]
|
13 |
+
|
14 |
+
inputs = image_processor(images, return_tensors="pt")
|
15 |
+
pixel_values = inputs['pixel_values'].to(device=vit.device, dtype=vit.dtype)
|
16 |
+
image_grid_thw = inputs['image_grid_thw']
|
17 |
+
image_embeds = vit(pixel_values, grid_thw=image_grid_thw)
|
18 |
+
```
|