HenryM commited on
Commit
db441d7
·
verified ·
1 Parent(s): 6ef3775

Upload 7 files

Browse files
Files changed (8) hide show
  1. .gitattributes +4 -0
  2. 163.png +3 -0
  3. 6.png +3 -0
  4. README.md +0 -13
  5. app.py +173 -0
  6. img_71_2.png +3 -0
  7. img_98.png +3 -0
  8. requirements.txt +7 -0
.gitattributes CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ 163.png filter=lfs diff=lfs merge=lfs -text
37
+ 6.png filter=lfs diff=lfs merge=lfs -text
38
+ img_71_2.png filter=lfs diff=lfs merge=lfs -text
39
+ img_98.png filter=lfs diff=lfs merge=lfs -text
163.png ADDED

Git LFS Details

  • SHA256: 6ba0e49364284b5973299c9ba0d39367ebdb3bca8848b988a47901b733d8dbc0
  • Pointer size: 131 Bytes
  • Size of remote file: 404 kB
6.png ADDED

Git LFS Details

  • SHA256: cf67586794e3bcaed7add2680fc7ba69e7c883919d41f470ef5626ab567b5106
  • Pointer size: 131 Bytes
  • Size of remote file: 208 kB
README.md CHANGED
@@ -1,13 +0,0 @@
1
- ---
2
- title: BacanoResponder
3
- emoji: 👁
4
- colorFrom: gray
5
- colorTo: green
6
- sdk: gradio
7
- sdk_version: 5.32.1
8
- app_file: app.py
9
- pinned: false
10
- short_description: BacanoResponder - VQA en Imágenes
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from PIL import Image
3
+ import numpy as np
4
+ import torch
5
+ from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
6
+ from qwen_vl_utils import process_vision_info
7
+ from peft import PeftModel
8
+ from pathlib import Path
9
+
10
+ system_prompt = (
11
+ "A conversation between User and Assistant. The user asks a question, and the Assistant solves it. "
12
+ "The assistant es un experto sobre Colombia. Primero razona en mente y luego da la respuesta. "
13
+ "El razonamiento y la respuesta van en <think></think> y <answer></answer>."
14
+ )
15
+
16
+ MODEL_ID = 'Qwen/Qwen2.5-VL-3B-Instruct'
17
+ ADAPTER_ID = 'Factral/qwen2.5vl-3b-colombia-finetuned'
18
+
19
+ processor = AutoProcessor.from_pretrained(MODEL_ID)
20
+
21
+ # Carga del modelo base
22
+ has_gpu = torch.cuda.is_available()
23
+ attn_impl = "flash_attention_2" if has_gpu else "eager"
24
+ model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
25
+ MODEL_ID,
26
+ torch_dtype=torch.bfloat16,
27
+ attn_implementation=attn_impl,
28
+ device_map="auto"
29
+ )
30
+
31
+ # Carga y fusión del adaptador PEFT
32
+ model = PeftModel.from_pretrained(model, ADAPTER_ID)
33
+ model = model.merge_and_unload()
34
+ model.eval()
35
+ device = torch.device("cuda" if has_gpu else "cpu")
36
+ model.to(device)
37
+
38
+ example_imgs = [
39
+ ("6.png", "Shakira"),
40
+ ("163.png", "Tienda esquinera"),
41
+ ("img_71_2.png", "Comida colombiana"),
42
+ ("img_98.png", "Oso de anteojos"),
43
+ ]
44
+
45
+ def cargar_imagen(imagen_path: str) -> Image.Image:
46
+ return Image.open(imagen_path)
47
+
48
+ with gr.Blocks(theme='lone17/kotaemon') as demo:
49
+ demo.css = """
50
+ #galeria-scroll {
51
+ max-height: 320px;
52
+ overflow-y: auto;
53
+ border: 1px solid #ccc;
54
+ padding: 8px;
55
+ border-radius: 8px;
56
+ }
57
+ """
58
+
59
+ gr.Markdown(
60
+ """
61
+ # 🇨🇴 Bacano Responder
62
+ Sube o elige una imagen, haz una pregunta y obtén una respuesta con contexto local.
63
+ """
64
+ )
65
+
66
+ with gr.Row(equal_height=True):
67
+ # Columna izquierda
68
+ with gr.Column(scale=1):
69
+ pregunta = gr.Textbox(
70
+ label="❓ Pregunta sobre tu imagen",
71
+ placeholder="¿Qué muestra esta imagen?",
72
+ lines=2
73
+ )
74
+
75
+ with gr.Box(elem_id="galeria-scroll"):
76
+ galeria = gr.Gallery(
77
+ label="📁 Elige una imagen de ejemplo",
78
+ value=[img for img, _ in example_imgs],
79
+ columns=2,
80
+ height=None,
81
+ allow_preview=True,
82
+ show_label=True,
83
+ )
84
+
85
+ # Columna derecha
86
+ with gr.Column(scale=1):
87
+ imagen_mostrada = gr.Image(
88
+ label="🖼 Imagen seleccionada o subida",
89
+ type="numpy", # Puede ser numpy o PIL; convertiremos luego si es numpy
90
+ height=256
91
+ )
92
+
93
+ respuesta = gr.Textbox(
94
+ label="🧠 Respuesta",
95
+ interactive=False,
96
+ lines=4
97
+ )
98
+
99
+ btn_procesar = gr.Button("🔍 Procesar")
100
+
101
+ def seleccionar_imagen(evt: gr.SelectData):
102
+ idx = evt.index
103
+ img_path = example_imgs[idx][0]
104
+ pil = cargar_imagen(img_path)
105
+ return np.array(pil)
106
+
107
+ galeria.select(fn=seleccionar_imagen, inputs=None, outputs=imagen_mostrada)
108
+
109
+ def responder(img, pregunta_text):
110
+ if img is None or pregunta_text.strip() == "":
111
+ return "Por favor sube una imagen y escribe una pregunta."
112
+
113
+ # Convertir array numpy a PIL si es necesario
114
+ if isinstance(img, np.ndarray):
115
+ img_pil = Image.fromarray(img.astype('uint8'))
116
+ else:
117
+ img_pil = img # ya es PIL
118
+
119
+ messages = [
120
+ {
121
+ "role": "system",
122
+ "content": [{"type": "text", "text": system_prompt}],
123
+ },
124
+ {
125
+ "role": "user",
126
+ "content": [
127
+ {"type": "text", "text": pregunta_text},
128
+ {"type": "image", "image": img_pil},
129
+ ],
130
+ }
131
+ ]
132
+
133
+ text = processor.apply_chat_template(
134
+ messages,
135
+ tokenize=False,
136
+ add_generation_prompt=True
137
+ )
138
+ image_inputs, video_inputs = process_vision_info(messages)
139
+
140
+ inputs = processor(
141
+ text=[text],
142
+ images=image_inputs,
143
+ videos=video_inputs,
144
+ padding=True,
145
+ return_tensors="pt",
146
+ )
147
+ inputs = inputs.to(device)
148
+
149
+ with torch.no_grad():
150
+ generated_ids = model.generate(
151
+ **inputs,
152
+ max_new_tokens=512,
153
+ top_p=1.0,
154
+ do_sample=True,
155
+ temperature=0.9
156
+ )
157
+
158
+ trimmed = [
159
+ out_ids[len(in_ids):]
160
+ for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
161
+ ]
162
+ respuesta_text = processor.batch_decode(
163
+ trimmed,
164
+ skip_special_tokens=True,
165
+ clean_up_tokenization_spaces=False
166
+ )
167
+
168
+ return respuesta_text[0]
169
+
170
+ btn_procesar.click(fn=responder, inputs=[imagen_mostrada, pregunta], outputs=respuesta)
171
+
172
+ if __name__ == "__main__":
173
+ demo.launch()
img_71_2.png ADDED

Git LFS Details

  • SHA256: 315059e598dec87be10ff38a12e32240669834ae10c2fe11969310f106e9cc94
  • Pointer size: 131 Bytes
  • Size of remote file: 151 kB
img_98.png ADDED

Git LFS Details

  • SHA256: 462beb252c58274b0093266a1a6d4b75cd6005dbc665a2db7fa43620229a44c9
  • Pointer size: 131 Bytes
  • Size of remote file: 176 kB
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio>=4.0.0
2
+ torchvision
3
+ torch
4
+ pillow
5
+ transformers
6
+ peft
7
+ qwen_vl_utils