Update README.md
Browse files
README.md
CHANGED
|
@@ -2,115 +2,12 @@
|
|
| 2 |
language:
|
| 3 |
- en
|
| 4 |
license: mit
|
| 5 |
-
library_name: Tevatron
|
| 6 |
-
datasets:
|
| 7 |
-
- Tevatron/docmatix-ir
|
| 8 |
-
- HuggingFaceM4/Docmatix
|
| 9 |
-
- Tevatron/msmarco-passage-aug
|
| 10 |
---
|
| 11 |
|
| 12 |
-
# DSE-Phi3-Docmatix-V1
|
| 13 |
|
| 14 |
-
|
| 15 |
|
| 16 |
-
|
| 17 |
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
## How to Use the Model
|
| 23 |
-
|
| 24 |
-
### Load the Model and Processor
|
| 25 |
-
|
| 26 |
-
```python
|
| 27 |
-
import torch
|
| 28 |
-
from transformers import AutoProcessor, AutoModelForCausalLM
|
| 29 |
-
|
| 30 |
-
processor = AutoProcessor.from_pretrained('Tevatron/dse-phi3-docmatix-v1', trust_remote_code=True)
|
| 31 |
-
model = AutoModelForCausalLM.from_pretrained('Tevatron/dse-phi3-docmatix-v1', trust_remote_code=True, attn_implementation="flash_attention_2", torch_dtype=torch.bfloat16, use_cache=False).to('cuda:0')
|
| 32 |
-
|
| 33 |
-
def get_embedding(last_hidden_state: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
|
| 34 |
-
sequence_lengths = attention_mask.sum(dim=1) - 1
|
| 35 |
-
bs = last_hidden_state.shape[0]
|
| 36 |
-
reps = last_hidden_state[torch.arange(bs, device=last_hidden_state.device), sequence_lengths]
|
| 37 |
-
reps = torch.nn.functional.normalize(reps, p=2, dim=-1)
|
| 38 |
-
return reps
|
| 39 |
-
```
|
| 40 |
-
|
| 41 |
-
### Encode Text Query
|
| 42 |
-
|
| 43 |
-
```python
|
| 44 |
-
queries = ["query: Where can we see Llama?</s>", "query: What is LLaMA model?</s>"]
|
| 45 |
-
query_inputs = processor(queries, return_tensors="pt", padding="longest", max_length=128, truncation=True).to('cuda:0')
|
| 46 |
-
with torch.no_grad():
|
| 47 |
-
output = model(**query_inputs, return_dict=True, output_hidden_states=True)
|
| 48 |
-
query_embeddings = get_embedding(output.hidden_states[-1], query_inputs["attention_mask"])
|
| 49 |
-
```
|
| 50 |
-
|
| 51 |
-
### Encode Document Screenshot
|
| 52 |
-
|
| 53 |
-
```python
|
| 54 |
-
from PIL import Image
|
| 55 |
-
import requests
|
| 56 |
-
from io import BytesIO
|
| 57 |
-
|
| 58 |
-
# URLs of the images
|
| 59 |
-
url1 = "https://huggingface.co/Tevatron/dse-phi3-docmatix-v1/resolve/main/animal-llama.png"
|
| 60 |
-
url2 = "https://huggingface.co/Tevatron/dse-phi3-docmatix-v1/resolve/main/meta-llama.png"
|
| 61 |
-
|
| 62 |
-
# Download and open images
|
| 63 |
-
response1 = requests.get(url1)
|
| 64 |
-
response2 = requests.get(url2)
|
| 65 |
-
|
| 66 |
-
passage_image1 = Image.open(BytesIO(response1.content))
|
| 67 |
-
passage_image2 = Image.open(BytesIO(response2.content))
|
| 68 |
-
|
| 69 |
-
passage_images = [passage_image1, passage_image2]
|
| 70 |
-
passage_prompts = ["<|image_1|>\nWhat is shown in this image?</s>", "<|image_2|>\nWhat is shown in this image?</s>"]
|
| 71 |
-
|
| 72 |
-
# Process inputs and get embeddings
|
| 73 |
-
passage_inputs = processor(passage_prompts, images=passage_images, return_tensors="pt", padding="longest", max_length=4096, truncation=True).to('cuda:0')
|
| 74 |
-
passage_inputs['input_ids'] = passage_inputs['input_ids'].squeeze(0)
|
| 75 |
-
passage_inputs['attention_mask'] = passage_inputs['attention_mask'].squeeze(0)
|
| 76 |
-
passage_inputs['image_sizes'] = passage_inputs['image_sizes'].squeeze(0)
|
| 77 |
-
with torch.no_grad():
|
| 78 |
-
output = model(**passage_inputs, return_dict=True, output_hidden_states=True)
|
| 79 |
-
doc_embeddings = get_embedding(output.hidden_states[-1], passage_inputs["attention_mask"])
|
| 80 |
-
|
| 81 |
-
```
|
| 82 |
-
|
| 83 |
-
### Compute Similarity
|
| 84 |
-
|
| 85 |
-
```python
|
| 86 |
-
from torch.nn.functional import cosine_similarity
|
| 87 |
-
num_queries = query_embeddings.size(0)
|
| 88 |
-
num_passages = doc_embeddings.size(0)
|
| 89 |
-
|
| 90 |
-
for i in range(num_queries):
|
| 91 |
-
query_embedding = query_embeddings[i].unsqueeze(0)
|
| 92 |
-
similarities = cosine_similarity(query_embedding, doc_embeddings)
|
| 93 |
-
print(f"Similarities for Query {i+1}: {similarities.cpu().float().numpy()}")
|
| 94 |
-
```
|
| 95 |
-
|
| 96 |
-
### Encode Document Text
|
| 97 |
-
This DSE checkpoint is warm-up with `Tevatron/msmarco-passage-aug`, thus the model can also effectively encode document as text input.
|
| 98 |
-
```python
|
| 99 |
-
passage_prompts = [
|
| 100 |
-
"The llama (/ˈlɑːmə/; Spanish pronunciation: [ˈʎama] or [ˈʝama]) (Lama glama) is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era.</s>",
|
| 101 |
-
"Llama (acronym for Large Language Model Meta AI, and formerly stylized as LLaMA) is a family of autoregressive large language models (LLMs) released by Meta AI starting in February 2023.[2][3] The latest version is Llama 3.1, released in July 2024.[4]</s>"
|
| 102 |
-
]
|
| 103 |
-
|
| 104 |
-
passage_inputs = processor(passage_prompts, images=None, return_tensors="pt", padding="longest", max_length=4096, truncation=True).to('cuda:0')
|
| 105 |
-
with torch.no_grad():
|
| 106 |
-
output = model(**passage_inputs, return_dict=True, output_hidden_states=True)
|
| 107 |
-
doc_embeddings = get_embedding(output.hidden_states[-1], passage_inputs["attention_mask"])
|
| 108 |
-
|
| 109 |
-
for i in range(num_queries):
|
| 110 |
-
query_embedding = query_embeddings[i].unsqueeze(0)
|
| 111 |
-
similarities = cosine_similarity(query_embedding, doc_embeddings)
|
| 112 |
-
print(f"Similarities for Query {i+1}: {similarities.cpu().float().numpy()}")
|
| 113 |
-
```
|
| 114 |
-
|
| 115 |
-
### Citation
|
| 116 |
-
If you find this checkpoint is helpful, please consider cite Phi3, Docmatix and our DSE work.
|
|
|
|
| 2 |
language:
|
| 3 |
- en
|
| 4 |
license: mit
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
---
|
| 6 |
|
|
|
|
| 7 |
|
| 8 |
+
# Clone of Phi-3-vision-128k-instruct
|
| 9 |
|
| 10 |
+
This is a clone of https://huggingface.co/microsoft/Phi-3-vision-128k-instruct
|
| 11 |
|
| 12 |
+
with the batch processing issue fixed based on:
|
| 13 |
+
https://huggingface.co/microsoft/Phi-3-vision-128k-instruct/discussions/32/files
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|