Susant-Achary commited on
Commit
1943cf0
·
verified ·
1 Parent(s): 0abd65a

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +9 -9
README.md CHANGED
@@ -6,17 +6,18 @@ tags:
6
  - base_model:adapter:HuggingFaceTB/SmolVLM2-500M-Video-Instruct
7
  - lora
8
  - transformers
 
9
  model-index:
10
  - name: Susant-Achary/SmolVLM2-500M-Video-Instruct-VQA2
11
  results:
12
  - task:
13
  type: visual-question-answering
14
  dataset:
15
- type: jinaai/table-vqa
16
- name: jinaai/table-vqa
17
  metrics:
18
- - type: training_loss
19
- value: 0.7473664236068726
20
  datasets:
21
  - jinaai/table-vqa
22
  language:
@@ -79,12 +80,12 @@ import requests
79
 
80
  # Define the base model and the fine-tuned adapter repository
81
  base_model_id = "HuggingFaceTB/SmolVLM2-500M-Video-Instruct"
82
- adapter_model_id = "Susant-Achary/SmolVLM2-500M-Video-Instruct-vqav2"
83
 
84
  # Load the processor from the base model
85
  processor = AutoProcessor.from_pretrained(base_model_id)
86
 
87
- # Load the base model with quantization
88
  bnb_config = BitsAndBytesConfig(
89
  load_in_4bit=True,
90
  bnb_4bit_use_double_quant=True,
@@ -95,7 +96,6 @@ bnb_config = BitsAndBytesConfig(
95
  model = Idefics3ForConditionalGeneration.from_pretrained(
96
  base_model_id,
97
  quantization_config=bnb_config,
98
- _attn_implementation="flash_attention_2",
99
  device_map="auto"
100
  )
101
 
@@ -104,8 +104,8 @@ model = PeftModel.from_pretrained(model, adapter_model_id)
104
 
105
  # Prepare an example image and question
106
  # You can replace this with your own image and question
107
- url = "https://www.researchgate.net/profile/Sarah-Parisot/publication/325862405/figure/tbl1/AS:668602864791562@1536418753263/VQA-20-standard-test-set-results-comparison-of-state-of-the-art-methods.png"
108
- image = Image.open(requests.get(url, stream=True).raw)
109
  question = "What is in the image?"
110
 
111
  # Prepare the input for the model
 
6
  - base_model:adapter:HuggingFaceTB/SmolVLM2-500M-Video-Instruct
7
  - lora
8
  - transformers
9
+ - finance
10
  model-index:
11
  - name: Susant-Achary/SmolVLM2-500M-Video-Instruct-VQA2
12
  results:
13
  - task:
14
  type: visual-question-answering
15
  dataset:
16
+ type: jinaai/table-vqa
17
+ name: jinaai/table-vqa
18
  metrics:
19
+ - type: training_loss
20
+ value: 0.7473664236068726
21
  datasets:
22
  - jinaai/table-vqa
23
  language:
 
80
 
81
  # Define the base model and the fine-tuned adapter repository
82
  base_model_id = "HuggingFaceTB/SmolVLM2-500M-Video-Instruct"
83
+ adapter_model_id = "Susant-Achary/SmolVLM2-500M-Video-Instruct-vqav2"
84
 
85
  # Load the processor from the base model
86
  processor = AutoProcessor.from_pretrained(base_model_id)
87
 
88
+ # Load the base model with quantization
89
  bnb_config = BitsAndBytesConfig(
90
  load_in_4bit=True,
91
  bnb_4bit_use_double_quant=True,
 
96
  model = Idefics3ForConditionalGeneration.from_pretrained(
97
  base_model_id,
98
  quantization_config=bnb_config,
 
99
  device_map="auto"
100
  )
101
 
 
104
 
105
  # Prepare an example image and question
106
  # You can replace this with your own image and question
107
+ url = "/content/VQA-20-standard-test-set-results-comparison-of-state-of-the-art-methods.png"
108
+ image = Image.open(url)
109
  question = "What is in the image?"
110
 
111
  # Prepare the input for the model