import io
from cairosvg import svg2png
from PIL import Image
# import base64
def get_entity_explanation(label):
"""Return explanation for named entity labels"""
explanations = {
'PERSON': 'People, including fictional',
'NORP': 'Nationalities, religious or political groups',
'FAC': 'Buildings, airports, highways, bridges, etc.',
'ORG': 'Companies, agencies, institutions, etc.',
'GPE': 'Countries, cities, states',
'LOC': 'Non-GPE locations, mountain ranges, water bodies',
'PRODUCT': 'Objects, vehicles, foods, etc.',
'EVENT': 'Named hurricanes, battles, wars, sports events, etc.',
'WORK_OF_ART': 'Titles of books, songs, etc.',
'DATE': 'Absolute or relative dates or periods',
'TIME': 'Times smaller than a day',
'MONEY': 'Monetary values, including unit',
'QUANTITY': 'Measurements, as of weight or distance'
}
return explanations.get(label, 'Other type of entity')
def analyze_text(nlp, text):
doc = nlp(text)
# Basic tokenization and POS analysis
tokens = [{
'Text': token.text,
'Lemma': token.lemma_,
'POS': token.pos_,
'Tag': token.tag_,
'Dependency': token.dep_,
'Shape': token.shape_,
'Is Alpha': token.is_alpha,
'Is Stop': token.is_stop
} for token in doc]
# Named Entity Recognition
entities = [{
'Text': ent.text,
'Label': ent.label_,
'Explanation': get_entity_explanation(ent.label_),
'Start': ent.start_char,
'End': ent.end_char
} for ent in doc.ents]
# Noun Chunks (phrases)
noun_chunks = [{
'Text': chunk.text,
'Root Text': chunk.root.text,
'Root Dep': chunk.root.dep_,
'Root Head Text': chunk.root.head.text
} for chunk in doc.noun_chunks]
# Text Statistics
stats = {
'Word Count': len([token for token in doc if not token.is_punct]),
'Sentence Count': len(list(doc.sents)),
'Average Words per Sentence': round(len([token for token in doc if not token.is_punct]) / len(list(doc.sents)), 2),
'Unique Words': len(set([token.text.lower() for token in doc if token.is_alpha])),
'Stop Words %': round(len([token for token in doc if token.is_stop]) / len(doc) * 100, 2)
}
return tokens, entities, noun_chunks, stats, doc
def svg_to_png(svg_content, background_color='white'):
"""Convert SVG to PNG with specified background color"""
# Split multiple SVGs if present
svg_parts = svg_content.split('
')
images = []
for svg in svg_parts:
# Add SVG namespace if missing
if not 'xmlns="http://www.w3.org/2000/svg"' in svg:
svg = svg.replace('