import streamlit as st import spacy from spacy import displacy import pandas as pd from collections import Counter import plotly.express as px from utils import analyze_text from utils import svg_to_png import base64 # Set page to wide mode for better visualization st.set_page_config(layout="wide") # Load English language model @st.cache_resource def load_model(): return spacy.load('en_core_web_md') nlp = load_model() # Streamlit UI st.markdown("

English Sentences Analyzer

", unsafe_allow_html=True) # Text Input and Help side by side col1, col2 = st.columns([3, 1]) with col1: text_input = st.text_area( "Enter English text:", "The ambitious startup in Silicon Valley developed an innovative AI system last year. " + "Google and Microsoft showed interest in acquiring the technology for $50 million.", height=200 ) analyze_button = st.button("Analyze Text") with col2: with st.expander("Quick Guide", expanded=True): st.markdown(""" 1. Enter your text in the input box 2. Click "Analyze Text" to see: - Sentence structure visualization - Detailed token analysis - Additional analysis in expandable sections 3. Use mouse wheel or buttons to zoom the visualization 4. Click and drag to pan around """) if analyze_button: if text_input: tokens, entities, noun_chunks, stats, doc = analyze_text(nlp, text_input) # 1. Dependency Parse with improved visualization st.header("Sentence Structure Analysis") # Generate sentence visualizations sentences = list(doc.sents) sentence_htmls = [] for sent in sentences: sent_html = displacy.render(sent, style="dep", options={ "distance": 120, "arrow_stroke": 2, "arrow_width": 8, "font": "Arial", "bg": "#ffffff", }) # Ensure proper SVG structure if not sent_html.startswith('' + sent_html sentence_htmls.append(sent_html) doc_html = "

".join(sentence_htmls) # Convert SVG to PNG with error handling png_bytes = svg_to_png(doc_html) if png_bytes is None: st.error("Failed to generate visualization") else: png_b64 = base64.b64encode(png_bytes).decode() # CSS for image container st.markdown(""" """, unsafe_allow_html=True) # JavaScript for zoom and pan functionality js_code = f"""
📥 Download
""" st.markdown(js_code, unsafe_allow_html=True) # Add caption col1, col2 = st.columns([3, 1]) with col1: st.caption("💡 Tip: Use mouse wheel to zoom, click and drag to pan around") # 2. Detailed Token Analysis st.header("Token Analysis") token_df = pd.DataFrame(tokens) # Create two columns for token distribution and token details col1, col2 = st.columns([1, 2]) with col1: # Token distribution visualization pos_counts = Counter([token['POS'] for token in tokens]) fig = px.pie( values=list(pos_counts.values()), names=list(pos_counts.keys()), title="Parts of Speech Distribution" ) fig.update_layout(height=400) st.plotly_chart(fig, use_container_width=True) with col2: st.dataframe(token_df, use_container_width=True) # Additional Analysis in Expanders with st.expander("Named Entities"): if entities: ent_df = pd.DataFrame(entities) # Visualization of entity distribution entity_counts = Counter([ent['Label'] for ent in entities]) fig = px.bar( x=list(entity_counts.keys()), y=list(entity_counts.values()), title="Distribution of Named Entities", labels={'x': 'Entity Type', 'y': 'Count'} ) st.plotly_chart(fig) st.table(ent_df) else: st.info("No named entities found in the text.") with st.expander("Noun Chunks (Phrases)"): if noun_chunks: st.table(pd.DataFrame(noun_chunks)) else: st.info("No noun chunks found in the text.") with st.expander("Text Statistics"): col1, col2, col3 = st.columns(3) with col1: st.metric("Word Count", stats['Word Count']) with col2: st.metric("Sentence Count", stats['Sentence Count']) with col3: st.metric("Unique Words", stats['Unique Words']) st.metric("Average Words per Sentence", stats['Average Words per Sentence']) st.metric("Stop Words Percentage", f"{stats['Stop Words %']}%")