Spaces:

Huy227
/

English_Sentences_Analyzer

Sleeping

HuuHuy227

init commit

ad57a01 6 months ago

9.2 kB

	import streamlit as st
	import spacy
	from spacy import displacy
	import pandas as pd
	from collections import Counter
	import plotly.express as px
	from utils import analyze_text
	from utils import svg_to_png
	import base64

	# Set page to wide mode for better visualization
	st.set_page_config(layout="wide")

	# Load English language model
	@st.cache_resource
	def load_model():
	return spacy.load('en_core_web_md')

	nlp = load_model()

	# Streamlit UI
	st.markdown("<h1 style='text-align: center; color: white;'>English Sentences Analyzer</h1>", unsafe_allow_html=True)

	# Text Input and Help side by side
	col1, col2 = st.columns([3, 1])
	with col1:
	text_input = st.text_area(
	"Enter English text:",
	"The ambitious startup in Silicon Valley developed an innovative AI system last year. " +
	"Google and Microsoft showed interest in acquiring the technology for $50 million.",
	height=200
	)
	analyze_button = st.button("Analyze Text")

	with col2:
	with st.expander("Quick Guide", expanded=True):
	st.markdown("""
	1. Enter your text in the input box
	2. Click "Analyze Text" to see:
	- Sentence structure visualization
	- Detailed token analysis
	- Additional analysis in expandable sections
	3. Use mouse wheel or buttons to zoom the visualization
	4. Click and drag to pan around
	""")

	if analyze_button:
	if text_input:
	tokens, entities, noun_chunks, stats, doc = analyze_text(nlp, text_input)

	# 1. Dependency Parse with improved visualization
	st.header("Sentence Structure Analysis")

	# Generate sentence visualizations
	sentences = list(doc.sents)
	sentence_htmls = []
	for sent in sentences:
	sent_html = displacy.render(sent, style="dep", options={
	"distance": 120,
	"arrow_stroke": 2,
	"arrow_width": 8,
	"font": "Arial",
	"bg": "#ffffff",
	})
	# Ensure proper SVG structure
	if not sent_html.startswith('<?xml'):
	sent_html = '<?xml version="1.0" encoding="UTF-8"?>' + sent_html
	sentence_htmls.append(sent_html)

	doc_html = "<br><br>".join(sentence_htmls)

	# Convert SVG to PNG with error handling
	png_bytes = svg_to_png(doc_html)
	if png_bytes is None:
	st.error("Failed to generate visualization")
	else:
	png_b64 = base64.b64encode(png_bytes).decode()

	# CSS for image container
	st.markdown("""
	<style>
	.image-container {
	position: relative;
	overflow: hidden;
	background: #b4b4b4;
	border: 1px solid #ddd;
	border-radius: 5px;
	margin: 10px 0;
	}
	.zoomable-image {
	transform-origin: 0 0;
	transition: transform 0.1s;
	}
	.download-btn {
	position: absolute;
	right: 10px;
	top: 10px;
	background: rgba(255, 255, 255, 0.8);
	border: 1px solid #ddd;
	border-radius: 4px;
	padding: 5px 10px;
	cursor: pointer;
	}
	.download-btn:hover {
	background: white;
	}
	</style>
	""", unsafe_allow_html=True)

	# JavaScript for zoom and pan functionality
	js_code = f"""
	<div class="image-container" id="imageContainer">
	<img src="data:image/png;base64,{png_b64}"
	class="zoomable-image"
	id="zoomableImage"
	style="max-width: 100%;">
	<a class="download-btn"
	href="data:image/png;base64,{png_b64}"
	download="sentence_structure.png">
	📥 Download
	</a>
	</div>
	<script>
	const container = document.getElementById('imageContainer');
	const img = document.getElementById('zoomableImage');
	let scale = 1;
	let isPanning = false;
	let startX, startY, translateX = 0, translateY = 0;

	// Zoom functionality
	container.addEventListener('wheel', (e) => {{
	e.preventDefault();
	const rect = container.getBoundingClientRect();
	const mouseX = e.clientX - rect.left;
	const mouseY = e.clientY - rect.top;

	const delta = e.deltaY * -0.01;
	const newScale = Math.max(1, Math.min(scale + delta, 4));
	const scaleChange = newScale / scale;

	translateX = mouseX - (mouseX - translateX) * scaleChange;
	translateY = mouseY - (mouseY - translateY) * scaleChange;

	scale = newScale;
	updateTransform();
	}});

	// Pan functionality
	container.addEventListener('mousedown', (e) => {{
	isPanning = true;
	startX = e.clientX - translateX;
	startY = e.clientY - translateY;
	container.style.cursor = 'grabbing';
	}});

	container.addEventListener('mousemove', (e) => {{
	if (!isPanning) return;
	translateX = e.clientX - startX;
	translateY = e.clientY - startY;
	updateTransform();
	}});

	container.addEventListener('mouseup', () => {{
	isPanning = false;
	container.style.cursor = 'grab';
	}});

	container.addEventListener('mouseleave', () => {{
	isPanning = false;
	container.style.cursor = 'grab';
	}});

	function updateTransform() {{
	img.style.transform = `translate(${{translateX}}px, ${{translateY}}px) scale(${{scale}})`;
	}}

	// Initialize
	container.style.cursor = 'grab';
	container.style.height = '500px';
	</script>
	"""

	st.markdown(js_code, unsafe_allow_html=True)

	# Add caption
	col1, col2 = st.columns([3, 1])
	with col1:
	st.caption("💡 Tip: Use mouse wheel to zoom, click and drag to pan around")

	# 2. Detailed Token Analysis
	st.header("Token Analysis")
	token_df = pd.DataFrame(tokens)

	# Create two columns for token distribution and token details
	col1, col2 = st.columns([1, 2])

	with col1:
	# Token distribution visualization
	pos_counts = Counter([token['POS'] for token in tokens])
	fig = px.pie(
	values=list(pos_counts.values()),
	names=list(pos_counts.keys()),
	title="Parts of Speech Distribution"
	)
	fig.update_layout(height=400)
	st.plotly_chart(fig, use_container_width=True)

	with col2:
	st.dataframe(token_df, use_container_width=True)

	# Additional Analysis in Expanders
	with st.expander("Named Entities"):
	if entities:
	ent_df = pd.DataFrame(entities)

	# Visualization of entity distribution
	entity_counts = Counter([ent['Label'] for ent in entities])
	fig = px.bar(
	x=list(entity_counts.keys()),
	y=list(entity_counts.values()),
	title="Distribution of Named Entities",
	labels={'x': 'Entity Type', 'y': 'Count'}
	)
	st.plotly_chart(fig)

	st.table(ent_df)
	else:
	st.info("No named entities found in the text.")

	with st.expander("Noun Chunks (Phrases)"):
	if noun_chunks:
	st.table(pd.DataFrame(noun_chunks))
	else:
	st.info("No noun chunks found in the text.")

	with st.expander("Text Statistics"):
	col1, col2, col3 = st.columns(3)
	with col1:
	st.metric("Word Count", stats['Word Count'])
	with col2:
	st.metric("Sentence Count", stats['Sentence Count'])
	with col3:
	st.metric("Unique Words", stats['Unique Words'])

	st.metric("Average Words per Sentence", stats['Average Words per Sentence'])
	st.metric("Stop Words Percentage", f"{stats['Stop Words %']}%")