import io from cairosvg import svg2png from PIL import Image # import base64 def get_entity_explanation(label): """Return explanation for named entity labels""" explanations = { 'PERSON': 'People, including fictional', 'NORP': 'Nationalities, religious or political groups', 'FAC': 'Buildings, airports, highways, bridges, etc.', 'ORG': 'Companies, agencies, institutions, etc.', 'GPE': 'Countries, cities, states', 'LOC': 'Non-GPE locations, mountain ranges, water bodies', 'PRODUCT': 'Objects, vehicles, foods, etc.', 'EVENT': 'Named hurricanes, battles, wars, sports events, etc.', 'WORK_OF_ART': 'Titles of books, songs, etc.', 'DATE': 'Absolute or relative dates or periods', 'TIME': 'Times smaller than a day', 'MONEY': 'Monetary values, including unit', 'QUANTITY': 'Measurements, as of weight or distance' } return explanations.get(label, 'Other type of entity') def analyze_text(nlp, text): doc = nlp(text) # Basic tokenization and POS analysis tokens = [{ 'Text': token.text, 'Lemma': token.lemma_, 'POS': token.pos_, 'Tag': token.tag_, 'Dependency': token.dep_, 'Shape': token.shape_, 'Is Alpha': token.is_alpha, 'Is Stop': token.is_stop } for token in doc] # Named Entity Recognition entities = [{ 'Text': ent.text, 'Label': ent.label_, 'Explanation': get_entity_explanation(ent.label_), 'Start': ent.start_char, 'End': ent.end_char } for ent in doc.ents] # Noun Chunks (phrases) noun_chunks = [{ 'Text': chunk.text, 'Root Text': chunk.root.text, 'Root Dep': chunk.root.dep_, 'Root Head Text': chunk.root.head.text } for chunk in doc.noun_chunks] # Text Statistics stats = { 'Word Count': len([token for token in doc if not token.is_punct]), 'Sentence Count': len(list(doc.sents)), 'Average Words per Sentence': round(len([token for token in doc if not token.is_punct]) / len(list(doc.sents)), 2), 'Unique Words': len(set([token.text.lower() for token in doc if token.is_alpha])), 'Stop Words %': round(len([token for token in doc if token.is_stop]) / len(doc) * 100, 2) } return tokens, entities, noun_chunks, stats, doc def svg_to_png(svg_content, background_color='white'): """Convert SVG to PNG with specified background color""" # Split multiple SVGs if present svg_parts = svg_content.split('

') images = [] for svg in svg_parts: # Add SVG namespace if missing if not 'xmlns="http://www.w3.org/2000/svg"' in svg: svg = svg.replace(' 1: # Calculate total height and max width total_height = sum(img.height for img in images) max_width = max(img.width for img in images) # Create new image to hold all sentences combined = Image.new('RGB', (max_width, total_height), background_color) # Paste each image y_offset = 0 for img in images: # Center image horizontally x_offset = (max_width - img.width) // 2 combined.paste(img, (x_offset, y_offset)) y_offset += img.height else: combined = images[0] # Convert to bytes for Streamlit img_byte_arr = io.BytesIO() combined.save(img_byte_arr, format='PNG') img_byte_arr.seek(0) return img_byte_arr.getvalue()