HuuHuy227
init commit
ad57a01
raw
history blame
9.2 kB
import streamlit as st
import spacy
from spacy import displacy
import pandas as pd
from collections import Counter
import plotly.express as px
from utils import analyze_text
from utils import svg_to_png
import base64
# Set page to wide mode for better visualization
st.set_page_config(layout="wide")
# Load English language model
@st.cache_resource
def load_model():
return spacy.load('en_core_web_md')
nlp = load_model()
# Streamlit UI
st.markdown("<h1 style='text-align: center; color: white;'>English Sentences Analyzer</h1>", unsafe_allow_html=True)
# Text Input and Help side by side
col1, col2 = st.columns([3, 1])
with col1:
text_input = st.text_area(
"Enter English text:",
"The ambitious startup in Silicon Valley developed an innovative AI system last year. " +
"Google and Microsoft showed interest in acquiring the technology for $50 million.",
height=200
)
analyze_button = st.button("Analyze Text")
with col2:
with st.expander("Quick Guide", expanded=True):
st.markdown("""
1. Enter your text in the input box
2. Click "Analyze Text" to see:
- Sentence structure visualization
- Detailed token analysis
- Additional analysis in expandable sections
3. Use mouse wheel or buttons to zoom the visualization
4. Click and drag to pan around
""")
if analyze_button:
if text_input:
tokens, entities, noun_chunks, stats, doc = analyze_text(nlp, text_input)
# 1. Dependency Parse with improved visualization
st.header("Sentence Structure Analysis")
# Generate sentence visualizations
sentences = list(doc.sents)
sentence_htmls = []
for sent in sentences:
sent_html = displacy.render(sent, style="dep", options={
"distance": 120,
"arrow_stroke": 2,
"arrow_width": 8,
"font": "Arial",
"bg": "#ffffff",
})
# Ensure proper SVG structure
if not sent_html.startswith('<?xml'):
sent_html = '<?xml version="1.0" encoding="UTF-8"?>' + sent_html
sentence_htmls.append(sent_html)
doc_html = "<br><br>".join(sentence_htmls)
# Convert SVG to PNG with error handling
png_bytes = svg_to_png(doc_html)
if png_bytes is None:
st.error("Failed to generate visualization")
else:
png_b64 = base64.b64encode(png_bytes).decode()
# CSS for image container
st.markdown("""
<style>
.image-container {
position: relative;
overflow: hidden;
background: #b4b4b4;
border: 1px solid #ddd;
border-radius: 5px;
margin: 10px 0;
}
.zoomable-image {
transform-origin: 0 0;
transition: transform 0.1s;
}
.download-btn {
position: absolute;
right: 10px;
top: 10px;
background: rgba(255, 255, 255, 0.8);
border: 1px solid #ddd;
border-radius: 4px;
padding: 5px 10px;
cursor: pointer;
}
.download-btn:hover {
background: white;
}
</style>
""", unsafe_allow_html=True)
# JavaScript for zoom and pan functionality
js_code = f"""
<div class="image-container" id="imageContainer">
<img src="data:image/png;base64,{png_b64}"
class="zoomable-image"
id="zoomableImage"
style="max-width: 100%;">
<a class="download-btn"
href="data:image/png;base64,{png_b64}"
download="sentence_structure.png">
📥 Download
</a>
</div>
<script>
const container = document.getElementById('imageContainer');
const img = document.getElementById('zoomableImage');
let scale = 1;
let isPanning = false;
let startX, startY, translateX = 0, translateY = 0;
// Zoom functionality
container.addEventListener('wheel', (e) => {{
e.preventDefault();
const rect = container.getBoundingClientRect();
const mouseX = e.clientX - rect.left;
const mouseY = e.clientY - rect.top;
const delta = e.deltaY * -0.01;
const newScale = Math.max(1, Math.min(scale + delta, 4));
const scaleChange = newScale / scale;
translateX = mouseX - (mouseX - translateX) * scaleChange;
translateY = mouseY - (mouseY - translateY) * scaleChange;
scale = newScale;
updateTransform();
}});
// Pan functionality
container.addEventListener('mousedown', (e) => {{
isPanning = true;
startX = e.clientX - translateX;
startY = e.clientY - translateY;
container.style.cursor = 'grabbing';
}});
container.addEventListener('mousemove', (e) => {{
if (!isPanning) return;
translateX = e.clientX - startX;
translateY = e.clientY - startY;
updateTransform();
}});
container.addEventListener('mouseup', () => {{
isPanning = false;
container.style.cursor = 'grab';
}});
container.addEventListener('mouseleave', () => {{
isPanning = false;
container.style.cursor = 'grab';
}});
function updateTransform() {{
img.style.transform = `translate(${{translateX}}px, ${{translateY}}px) scale(${{scale}})`;
}}
// Initialize
container.style.cursor = 'grab';
container.style.height = '500px';
</script>
"""
st.markdown(js_code, unsafe_allow_html=True)
# Add caption
col1, col2 = st.columns([3, 1])
with col1:
st.caption("💡 Tip: Use mouse wheel to zoom, click and drag to pan around")
# 2. Detailed Token Analysis
st.header("Token Analysis")
token_df = pd.DataFrame(tokens)
# Create two columns for token distribution and token details
col1, col2 = st.columns([1, 2])
with col1:
# Token distribution visualization
pos_counts = Counter([token['POS'] for token in tokens])
fig = px.pie(
values=list(pos_counts.values()),
names=list(pos_counts.keys()),
title="Parts of Speech Distribution"
)
fig.update_layout(height=400)
st.plotly_chart(fig, use_container_width=True)
with col2:
st.dataframe(token_df, use_container_width=True)
# Additional Analysis in Expanders
with st.expander("Named Entities"):
if entities:
ent_df = pd.DataFrame(entities)
# Visualization of entity distribution
entity_counts = Counter([ent['Label'] for ent in entities])
fig = px.bar(
x=list(entity_counts.keys()),
y=list(entity_counts.values()),
title="Distribution of Named Entities",
labels={'x': 'Entity Type', 'y': 'Count'}
)
st.plotly_chart(fig)
st.table(ent_df)
else:
st.info("No named entities found in the text.")
with st.expander("Noun Chunks (Phrases)"):
if noun_chunks:
st.table(pd.DataFrame(noun_chunks))
else:
st.info("No noun chunks found in the text.")
with st.expander("Text Statistics"):
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Word Count", stats['Word Count'])
with col2:
st.metric("Sentence Count", stats['Sentence Count'])
with col3:
st.metric("Unique Words", stats['Unique Words'])
st.metric("Average Words per Sentence", stats['Average Words per Sentence'])
st.metric("Stop Words Percentage", f"{stats['Stop Words %']}%")