Spaces:

mbwanaf
/

Swahili_vqa_app

Running

App Files Files Community

mbwanaf commited on 2 days ago

Commit

38bc9e9

verified ·

1 Parent(s): 3fa1adc

update app.py

Browse files

Files changed (1) hide show

app.py +13 -18

app.py CHANGED Viewed

@@ -1,13 +1,11 @@
 import os
-# Use /tmp for all runtime-related folders
 os.environ["STREAMLIT_HOME"] = "/tmp"
 os.environ["STREAMLIT_RUNTIME_METRICS_ENABLED"] = "false"
-os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache"
-os.environ["HF_HOME"] = "/tmp/huggingface"  # safer replacement for TRANSFORMERS_CACHE
 os.environ["STREAMLIT_WATCHED_MODULES"] = ""
 import streamlit as st
 import torch
@@ -17,10 +15,10 @@ import random
 from PIL import Image
 from transformers import AutoTokenizer, AutoModel, ViTModel, ViTImageProcessor
-# CPU device only
 device = torch.device("cpu")
-# Define Swahili VQA Model
 class SwahiliVQAModel(torch.nn.Module):
     def __init__(self, num_answers):
         super().__init__()
@@ -43,20 +41,19 @@ class SwahiliVQAModel(torch.nn.Module):
         fused = self.fusion(combined)
         return self.classifier(fused)
-# Load label encoder
 le = joblib.load("Vit_3895_label_encoder_best.pkl")
-# Load model weights normally — no override
 model = SwahiliVQAModel(num_answers=len(le.classes_)).to(device)
 state_dict = torch.load("Vit_3895_best_model_epoch25.pth", map_location=device)
 model.load_state_dict(state_dict)
 model.eval()
-# Load tokenizer and processor
 tokenizer = AutoTokenizer.from_pretrained("benjamin/roberta-base-wechsel-swahili")
 vit_processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224-in21k')
-# Streamlit UI
 st.set_page_config(page_title="Swahili VQA", layout="wide")
 st.title("🦜 Swahili Visual Question Answering (VQA)")
@@ -69,13 +66,12 @@ col1, col2 = st.columns([1, 2], gap="large")
 with col1:
     if uploaded_image:
-        st.image(uploaded_image, caption="Picha Iliyopakiwa", use_container_width=True)
-        st.markdown("<div style='margin-bottom: 25px;'></div>", unsafe_allow_html=True)
 with col2:
     st.markdown("<div style='padding-top: 15px;'>", unsafe_allow_html=True)
-    question = st.text_input("💬Andika swali lako hapa:", key="question_input")
-    submit_button = st.button("📩Tuma")
     st.markdown("</div>", unsafe_allow_html=True)
     if submit_button and uploaded_image and question:
@@ -100,10 +96,9 @@ with col2:
             ]
             results = sorted(results, key=lambda x: x["confidence"], reverse=True)
-            st.subheader("Majibu Yanayowezekana:")
             max_confidence = max(result["confidence"] for result in results)
             for i, pred in enumerate(results):
                 bar_width = (pred["confidence"] / max_confidence) * 70
                 color = generate_random_color()

 import os
+# ✅ Use /tmp for all cache & runtime folders (Hugging Face safe)
 os.environ["STREAMLIT_HOME"] = "/tmp"
 os.environ["STREAMLIT_RUNTIME_METRICS_ENABLED"] = "false"
 os.environ["STREAMLIT_WATCHED_MODULES"] = ""
+os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache"
+os.environ["HF_HOME"] = "/tmp/huggingface"
 import streamlit as st
 import torch
 from PIL import Image
 from transformers import AutoTokenizer, AutoModel, ViTModel, ViTImageProcessor
+# Use CPU only
 device = torch.device("cpu")
+# === Define Swahili VQA Model ===
 class SwahiliVQAModel(torch.nn.Module):
     def __init__(self, num_answers):
         super().__init__()
         fused = self.fusion(combined)
         return self.classifier(fused)
+# === Load model and encoders ===
 le = joblib.load("Vit_3895_label_encoder_best.pkl")
 model = SwahiliVQAModel(num_answers=len(le.classes_)).to(device)
+# Load full state dict (already trained classifier)
 state_dict = torch.load("Vit_3895_best_model_epoch25.pth", map_location=device)
 model.load_state_dict(state_dict)
 model.eval()
 tokenizer = AutoTokenizer.from_pretrained("benjamin/roberta-base-wechsel-swahili")
 vit_processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224-in21k')
+# === Streamlit App ===
 st.set_page_config(page_title="Swahili VQA", layout="wide")
 st.title("🦜 Swahili Visual Question Answering (VQA)")
 with col1:
     if uploaded_image:
+        st.image(uploaded_image, caption="Picha Iliyopakiwa")
 with col2:
     st.markdown("<div style='padding-top: 15px;'>", unsafe_allow_html=True)
+    question = st.text_input("💬 Andika swali lako hapa:")
+    submit_button = st.button("📩 Tuma")
     st.markdown("</div>", unsafe_allow_html=True)
     if submit_button and uploaded_image and question:
             ]
             results = sorted(results, key=lambda x: x["confidence"], reverse=True)
+            st.subheader("🔎 Majibu Yanayowezekana:")
             max_confidence = max(result["confidence"] for result in results)
             for i, pred in enumerate(results):
                 bar_width = (pred["confidence"] / max_confidence) * 70
                 color = generate_random_color()