|
|
|
import streamlit as st |
|
from transformers import pipeline |
|
from rouge_score import rouge_scorer |
|
import torch |
|
|
|
st.set_page_config(page_title="Multilingual Summarization Dashboard", layout="wide") |
|
|
|
|
|
|
|
|
|
with open("style.css") as f: |
|
st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
|
st.sidebar.title("Hugging Face API Token") |
|
api_token = st.sidebar.text_input( |
|
"Enter your Hugging Face API token:", |
|
type="password", |
|
help="Get your token from https://huggingface.co/settings/tokens" |
|
) |
|
|
|
if not api_token: |
|
st.warning("Please enter your Hugging Face API token to enable model inference.") |
|
|
|
|
|
|
|
|
|
@st.cache_resource(show_spinner=True) |
|
def load_models(token): |
|
models = {} |
|
models['urT5-base'] = pipeline( |
|
"summarization", |
|
model="mbshr/urt5-base-finetuned", |
|
device=0 if torch.cuda.is_available() else -1, |
|
use_auth_token=token |
|
) |
|
models['mT5-small'] = pipeline( |
|
"summarization", |
|
model="google/mt5-small", |
|
device=0 if torch.cuda.is_available() else -1, |
|
use_auth_token=token |
|
) |
|
models['mT5-base'] = pipeline( |
|
"summarization", |
|
model="google/mt5-base", |
|
device=0 if torch.cuda.is_available() else -1, |
|
use_auth_token=token |
|
) |
|
|
|
models['en→ur'] = pipeline( |
|
"translation", |
|
model="Helsinki-NLP/opus-mt-en-ur", |
|
device=0 if torch.cuda.is_available() else -1, |
|
use_auth_token=token |
|
) |
|
models['ur→en'] = pipeline( |
|
"translation", |
|
model="Helsinki-NLP/opus-mt-ur-en", |
|
device=0 if torch.cuda.is_available() else -1, |
|
use_auth_token=token |
|
) |
|
return models |
|
|
|
models = load_models(api_token) if api_token else {} |
|
|
|
|
|
|
|
|
|
st.sidebar.title("Settings") |
|
selected_model = st.sidebar.selectbox("Choose Summarization Model", ["urT5-base", "mT5-small", "mT5-base"]) |
|
max_length = st.sidebar.slider("Max summary length", 50, 500, 150) |
|
min_length = st.sidebar.slider("Min summary length", 10, 300, 40) |
|
target_lang = st.sidebar.selectbox("Translate summary to", ["None", "English", "Urdu"]) |
|
show_comparison = st.sidebar.checkbox("Compare models") |
|
show_rouge = st.sidebar.checkbox("Show ROUGE Score (requires reference)") |
|
|
|
|
|
|
|
|
|
st.title("🌐 Multilingual Summarization Dashboard (API Version)") |
|
st.write("Enter text to summarize, optionally translate, compare models, and evaluate with ROUGE.") |
|
|
|
text = st.text_area("Enter text to summarize:", height=200) |
|
reference_text = "" |
|
if show_rouge: |
|
reference_text = st.text_area("Reference summary for ROUGE evaluation:", height=100) |
|
|
|
|
|
|
|
|
|
if st.button("Generate Summary"): |
|
if not api_token: |
|
st.error("Please provide Hugging Face API token.") |
|
elif not text.strip(): |
|
st.error("Please enter some text!") |
|
else: |
|
|
|
chunk_size = 500 |
|
chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)] |
|
full_summary = "" |
|
for chunk in chunks: |
|
summ = models[selected_model](chunk, max_length=max_length, min_length=min_length, do_sample=False)[0]['summary_text'] |
|
full_summary += summ + " " |
|
|
|
st.subheader(f"Summary ({selected_model}):") |
|
st.write(full_summary) |
|
|
|
|
|
if target_lang != "None": |
|
try: |
|
if target_lang == "English": |
|
translated = models['ur→en'](full_summary)[0]['translation_text'] |
|
else: |
|
translated = models['en→ur'](full_summary)[0]['translation_text'] |
|
st.subheader(f"Summary in {target_lang}:") |
|
st.write(translated) |
|
except Exception as e: |
|
st.warning(f"Translation failed: {str(e)}") |
|
|
|
|
|
if show_comparison: |
|
st.subheader("Comparison with other models:") |
|
for model_name in ["urT5-base", "mT5-small", "mT5-base"]: |
|
if model_name != selected_model: |
|
comp_summary = "" |
|
for chunk in chunks: |
|
comp_summary += models[model_name](chunk, max_length=max_length, min_length=min_length, do_sample=False)[0]['summary_text'] + " " |
|
st.markdown(f"**{model_name} Summary:** {comp_summary}") |
|
|
|
|
|
if show_rouge and reference_text.strip(): |
|
scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True) |
|
scores = scorer.score(reference_text, full_summary) |
|
st.subheader("ROUGE Scores:") |
|
for k, v in scores.items(): |
|
st.write(f"{k}: Precision: {v.precision:.3f}, Recall: {v.recall:.3f}, F1: {v.fmeasure:.3f}") |
|
|