import streamlit as st
import spacy
from spacy import displacy
import pandas as pd
from collections import Counter
import plotly.express as px
from utils import analyze_text
from utils import svg_to_png
import base64
# Set page to wide mode for better visualization
st.set_page_config(layout="wide")
# Load English language model
@st.cache_resource
def load_model():
return spacy.load('en_core_web_md')
nlp = load_model()
# Streamlit UI
st.markdown("
English Sentences Analyzer
", unsafe_allow_html=True)
# Text Input and Help side by side
col1, col2 = st.columns([3, 1])
with col1:
text_input = st.text_area(
"Enter English text:",
"The ambitious startup in Silicon Valley developed an innovative AI system last year. " +
"Google and Microsoft showed interest in acquiring the technology for $50 million.",
height=200
)
analyze_button = st.button("Analyze Text")
with col2:
with st.expander("Quick Guide", expanded=True):
st.markdown("""
1. Enter your text in the input box
2. Click "Analyze Text" to see:
- Sentence structure visualization
- Detailed token analysis
- Additional analysis in expandable sections
3. Use mouse wheel or buttons to zoom the visualization
4. Click and drag to pan around
""")
if analyze_button:
if text_input:
tokens, entities, noun_chunks, stats, doc = analyze_text(nlp, text_input)
# 1. Dependency Parse with improved visualization
st.header("Sentence Structure Analysis")
# Generate sentence visualizations
sentences = list(doc.sents)
sentence_htmls = []
for sent in sentences:
sent_html = displacy.render(sent, style="dep", options={
"distance": 120,
"arrow_stroke": 2,
"arrow_width": 8,
"font": "Arial",
"bg": "#ffffff",
})
# Ensure proper SVG structure
if not sent_html.startswith('' + sent_html
sentence_htmls.append(sent_html)
doc_html = "
".join(sentence_htmls)
# Convert SVG to PNG with error handling
png_bytes = svg_to_png(doc_html)
if png_bytes is None:
st.error("Failed to generate visualization")
else:
png_b64 = base64.b64encode(png_bytes).decode()
# CSS for image container
st.markdown("""
""", unsafe_allow_html=True)
# JavaScript for zoom and pan functionality
js_code = f"""
"""
st.markdown(js_code, unsafe_allow_html=True)
# Add caption
col1, col2 = st.columns([3, 1])
with col1:
st.caption("💡 Tip: Use mouse wheel to zoom, click and drag to pan around")
# 2. Detailed Token Analysis
st.header("Token Analysis")
token_df = pd.DataFrame(tokens)
# Create two columns for token distribution and token details
col1, col2 = st.columns([1, 2])
with col1:
# Token distribution visualization
pos_counts = Counter([token['POS'] for token in tokens])
fig = px.pie(
values=list(pos_counts.values()),
names=list(pos_counts.keys()),
title="Parts of Speech Distribution"
)
fig.update_layout(height=400)
st.plotly_chart(fig, use_container_width=True)
with col2:
st.dataframe(token_df, use_container_width=True)
# Additional Analysis in Expanders
with st.expander("Named Entities"):
if entities:
ent_df = pd.DataFrame(entities)
# Visualization of entity distribution
entity_counts = Counter([ent['Label'] for ent in entities])
fig = px.bar(
x=list(entity_counts.keys()),
y=list(entity_counts.values()),
title="Distribution of Named Entities",
labels={'x': 'Entity Type', 'y': 'Count'}
)
st.plotly_chart(fig)
st.table(ent_df)
else:
st.info("No named entities found in the text.")
with st.expander("Noun Chunks (Phrases)"):
if noun_chunks:
st.table(pd.DataFrame(noun_chunks))
else:
st.info("No noun chunks found in the text.")
with st.expander("Text Statistics"):
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Word Count", stats['Word Count'])
with col2:
st.metric("Sentence Count", stats['Sentence Count'])
with col3:
st.metric("Unique Words", stats['Unique Words'])
st.metric("Average Words per Sentence", stats['Average Words per Sentence'])
st.metric("Stop Words Percentage", f"{stats['Stop Words %']}%")