Spaces:
Sleeping
Sleeping
import streamlit as st | |
import spacy | |
from spacy import displacy | |
import pandas as pd | |
from collections import Counter | |
import plotly.express as px | |
from utils import analyze_text | |
from utils import svg_to_png | |
import base64 | |
# Set page to wide mode for better visualization | |
st.set_page_config(layout="wide") | |
# Load English language model | |
def load_model(): | |
return spacy.load('en_core_web_md') | |
nlp = load_model() | |
# Streamlit UI | |
st.markdown("<h1 style='text-align: center; color: white;'>English Sentences Analyzer</h1>", unsafe_allow_html=True) | |
# Text Input and Help side by side | |
col1, col2 = st.columns([3, 1]) | |
with col1: | |
text_input = st.text_area( | |
"Enter English text:", | |
"The ambitious startup in Silicon Valley developed an innovative AI system last year. " + | |
"Google and Microsoft showed interest in acquiring the technology for $50 million.", | |
height=200 | |
) | |
analyze_button = st.button("Analyze Text") | |
with col2: | |
with st.expander("Quick Guide", expanded=True): | |
st.markdown(""" | |
1. Enter your text in the input box | |
2. Click "Analyze Text" to see: | |
- Sentence structure visualization | |
- Detailed token analysis | |
- Additional analysis in expandable sections | |
3. Use mouse wheel or buttons to zoom the visualization | |
4. Click and drag to pan around | |
""") | |
if analyze_button: | |
if text_input: | |
tokens, entities, noun_chunks, stats, doc = analyze_text(nlp, text_input) | |
# 1. Dependency Parse with improved visualization | |
st.header("Sentence Structure Analysis") | |
# Generate sentence visualizations | |
sentences = list(doc.sents) | |
sentence_htmls = [] | |
for sent in sentences: | |
sent_html = displacy.render(sent, style="dep", options={ | |
"distance": 120, | |
"arrow_stroke": 2, | |
"arrow_width": 8, | |
"font": "Arial", | |
"bg": "#ffffff", | |
}) | |
# Ensure proper SVG structure | |
if not sent_html.startswith('<?xml'): | |
sent_html = '<?xml version="1.0" encoding="UTF-8"?>' + sent_html | |
sentence_htmls.append(sent_html) | |
doc_html = "<br><br>".join(sentence_htmls) | |
# Convert SVG to PNG with error handling | |
png_bytes = svg_to_png(doc_html) | |
if png_bytes is None: | |
st.error("Failed to generate visualization") | |
else: | |
png_b64 = base64.b64encode(png_bytes).decode() | |
# CSS for image container | |
st.markdown(""" | |
<style> | |
.image-container { | |
position: relative; | |
overflow: hidden; | |
background: #b4b4b4; | |
border: 1px solid #ddd; | |
border-radius: 5px; | |
margin: 10px 0; | |
} | |
.zoomable-image { | |
transform-origin: 0 0; | |
transition: transform 0.1s; | |
} | |
.download-btn { | |
position: absolute; | |
right: 10px; | |
top: 10px; | |
background: rgba(255, 255, 255, 0.8); | |
border: 1px solid #ddd; | |
border-radius: 4px; | |
padding: 5px 10px; | |
cursor: pointer; | |
} | |
.download-btn:hover { | |
background: white; | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
# JavaScript for zoom and pan functionality | |
js_code = f""" | |
<div class="image-container" id="imageContainer"> | |
<img src="data:image/png;base64,{png_b64}" | |
class="zoomable-image" | |
id="zoomableImage" | |
style="max-width: 100%;"> | |
<a class="download-btn" | |
href="data:image/png;base64,{png_b64}" | |
download="sentence_structure.png"> | |
📥 Download | |
</a> | |
</div> | |
<script> | |
const container = document.getElementById('imageContainer'); | |
const img = document.getElementById('zoomableImage'); | |
let scale = 1; | |
let isPanning = false; | |
let startX, startY, translateX = 0, translateY = 0; | |
// Zoom functionality | |
container.addEventListener('wheel', (e) => {{ | |
e.preventDefault(); | |
const rect = container.getBoundingClientRect(); | |
const mouseX = e.clientX - rect.left; | |
const mouseY = e.clientY - rect.top; | |
const delta = e.deltaY * -0.01; | |
const newScale = Math.max(1, Math.min(scale + delta, 4)); | |
const scaleChange = newScale / scale; | |
translateX = mouseX - (mouseX - translateX) * scaleChange; | |
translateY = mouseY - (mouseY - translateY) * scaleChange; | |
scale = newScale; | |
updateTransform(); | |
}}); | |
// Pan functionality | |
container.addEventListener('mousedown', (e) => {{ | |
isPanning = true; | |
startX = e.clientX - translateX; | |
startY = e.clientY - translateY; | |
container.style.cursor = 'grabbing'; | |
}}); | |
container.addEventListener('mousemove', (e) => {{ | |
if (!isPanning) return; | |
translateX = e.clientX - startX; | |
translateY = e.clientY - startY; | |
updateTransform(); | |
}}); | |
container.addEventListener('mouseup', () => {{ | |
isPanning = false; | |
container.style.cursor = 'grab'; | |
}}); | |
container.addEventListener('mouseleave', () => {{ | |
isPanning = false; | |
container.style.cursor = 'grab'; | |
}}); | |
function updateTransform() {{ | |
img.style.transform = `translate(${{translateX}}px, ${{translateY}}px) scale(${{scale}})`; | |
}} | |
// Initialize | |
container.style.cursor = 'grab'; | |
container.style.height = '500px'; | |
</script> | |
""" | |
st.markdown(js_code, unsafe_allow_html=True) | |
# Add caption | |
col1, col2 = st.columns([3, 1]) | |
with col1: | |
st.caption("💡 Tip: Use mouse wheel to zoom, click and drag to pan around") | |
# 2. Detailed Token Analysis | |
st.header("Token Analysis") | |
token_df = pd.DataFrame(tokens) | |
# Create two columns for token distribution and token details | |
col1, col2 = st.columns([1, 2]) | |
with col1: | |
# Token distribution visualization | |
pos_counts = Counter([token['POS'] for token in tokens]) | |
fig = px.pie( | |
values=list(pos_counts.values()), | |
names=list(pos_counts.keys()), | |
title="Parts of Speech Distribution" | |
) | |
fig.update_layout(height=400) | |
st.plotly_chart(fig, use_container_width=True) | |
with col2: | |
st.dataframe(token_df, use_container_width=True) | |
# Additional Analysis in Expanders | |
with st.expander("Named Entities"): | |
if entities: | |
ent_df = pd.DataFrame(entities) | |
# Visualization of entity distribution | |
entity_counts = Counter([ent['Label'] for ent in entities]) | |
fig = px.bar( | |
x=list(entity_counts.keys()), | |
y=list(entity_counts.values()), | |
title="Distribution of Named Entities", | |
labels={'x': 'Entity Type', 'y': 'Count'} | |
) | |
st.plotly_chart(fig) | |
st.table(ent_df) | |
else: | |
st.info("No named entities found in the text.") | |
with st.expander("Noun Chunks (Phrases)"): | |
if noun_chunks: | |
st.table(pd.DataFrame(noun_chunks)) | |
else: | |
st.info("No noun chunks found in the text.") | |
with st.expander("Text Statistics"): | |
col1, col2, col3 = st.columns(3) | |
with col1: | |
st.metric("Word Count", stats['Word Count']) | |
with col2: | |
st.metric("Sentence Count", stats['Sentence Count']) | |
with col3: | |
st.metric("Unique Words", stats['Unique Words']) | |
st.metric("Average Words per Sentence", stats['Average Words per Sentence']) | |
st.metric("Stop Words Percentage", f"{stats['Stop Words %']}%") |