Spaces:

Huy227
/

English_Sentences_Analyzer

Sleeping

HuuHuy227

init commit

ad57a01 6 months ago

4.88 kB

	import io
	from cairosvg import svg2png
	from PIL import Image
	# import base64

	def get_entity_explanation(label):
	"""Return explanation for named entity labels"""
	explanations = {
	'PERSON': 'People, including fictional',
	'NORP': 'Nationalities, religious or political groups',
	'FAC': 'Buildings, airports, highways, bridges, etc.',
	'ORG': 'Companies, agencies, institutions, etc.',
	'GPE': 'Countries, cities, states',
	'LOC': 'Non-GPE locations, mountain ranges, water bodies',
	'PRODUCT': 'Objects, vehicles, foods, etc.',
	'EVENT': 'Named hurricanes, battles, wars, sports events, etc.',
	'WORK_OF_ART': 'Titles of books, songs, etc.',
	'DATE': 'Absolute or relative dates or periods',
	'TIME': 'Times smaller than a day',
	'MONEY': 'Monetary values, including unit',
	'QUANTITY': 'Measurements, as of weight or distance'
	}
	return explanations.get(label, 'Other type of entity')

	def analyze_text(nlp, text):
	doc = nlp(text)

	# Basic tokenization and POS analysis
	tokens = [{
	'Text': token.text,
	'Lemma': token.lemma_,
	'POS': token.pos_,
	'Tag': token.tag_,
	'Dependency': token.dep_,
	'Shape': token.shape_,
	'Is Alpha': token.is_alpha,
	'Is Stop': token.is_stop
	} for token in doc]

	# Named Entity Recognition
	entities = [{
	'Text': ent.text,
	'Label': ent.label_,
	'Explanation': get_entity_explanation(ent.label_),
	'Start': ent.start_char,
	'End': ent.end_char
	} for ent in doc.ents]

	# Noun Chunks (phrases)
	noun_chunks = [{
	'Text': chunk.text,
	'Root Text': chunk.root.text,
	'Root Dep': chunk.root.dep_,
	'Root Head Text': chunk.root.head.text
	} for chunk in doc.noun_chunks]

	# Text Statistics
	stats = {
	'Word Count': len([token for token in doc if not token.is_punct]),
	'Sentence Count': len(list(doc.sents)),
	'Average Words per Sentence': round(len([token for token in doc if not token.is_punct]) / len(list(doc.sents)), 2),
	'Unique Words': len(set([token.text.lower() for token in doc if token.is_alpha])),
	'Stop Words %': round(len([token for token in doc if token.is_stop]) / len(doc) * 100, 2)
	}

	return tokens, entities, noun_chunks, stats, doc

	def svg_to_png(svg_content, background_color='white'):
	"""Convert SVG to PNG with specified background color"""
	# Split multiple SVGs if present
	svg_parts = svg_content.split('<br><br>')
	images = []

	for svg in svg_parts:
	# Add SVG namespace if missing
	if not 'xmlns="http://www.w3.org/2000/svg"' in svg:
	svg = svg.replace('<svg', '<svg xmlns="http://www.w3.org/2000/svg"')

	try:
	# Convert SVG to PNG bytes
	png_bytes = svg2png(bytestring=svg.encode('utf-8'),
	background_color=background_color,
	scale=1)

	# Create PIL Image from PNG bytes
	img = Image.open(io.BytesIO(png_bytes))

	# Convert RGBA to RGB with white background
	if img.mode == 'RGBA':
	background = Image.new('RGB', img.size, background_color)
	background.paste(img, mask=img.split()[3]) # Use alpha channel as mask
	img = background

	# Add some padding
	padding = 20 # pixels
	img_with_padding = Image.new('RGB',
	(img.width, img.height + padding * 2),
	background_color)
	img_with_padding.paste(img, (0, padding))
	images.append(img_with_padding)

	except Exception as e:
	st.error(f"Error converting SVG to PNG: {str(e)}")
	continue

	if not images:
	return None

	# Combine images vertically if there are multiple
	if len(images) > 1:
	# Calculate total height and max width
	total_height = sum(img.height for img in images)
	max_width = max(img.width for img in images)

	# Create new image to hold all sentences
	combined = Image.new('RGB', (max_width, total_height), background_color)

	# Paste each image
	y_offset = 0
	for img in images:
	# Center image horizontally
	x_offset = (max_width - img.width) // 2
	combined.paste(img, (x_offset, y_offset))
	y_offset += img.height
	else:
	combined = images[0]

	# Convert to bytes for Streamlit
	img_byte_arr = io.BytesIO()
	combined.save(img_byte_arr, format='PNG')
	img_byte_arr.seek(0)

	return img_byte_arr.getvalue()