Spaces:
Running
Running
import gradio as gr | |
from hazm import Normalizer, word_tokenize, Lemmatizer, POSTagger, Chunker, DependencyParser | |
# Initialize Hazm components | |
normalizer = Normalizer() | |
lemmatizer = Lemmatizer() | |
tagger = POSTagger(model='resources/postagger.model') | |
chunker = Chunker(model='resources/chunker.model') | |
parser = DependencyParser(tagger=tagger, lemmatizer=lemmatizer) | |
def process_text(text, operations): | |
result = {} | |
if 'normalize' in operations: | |
text = normalizer.normalize(text) | |
result['Normalized Text'] = text | |
if 'tokenize' in operations: | |
tokens = word_tokenize(text) | |
result['Tokens'] = tokens | |
if 'lemmatize' in operations: | |
lemmas = [lemmatizer.lemmatize(token) for token in word_tokenize(text)] | |
result['Lemmas'] = lemmas | |
if 'pos_tag' in operations: | |
pos_tags = tagger.tag(word_tokenize(text)) | |
result['POS Tags'] = pos_tags | |
if 'chunk' in operations: | |
pos_tags = tagger.tag(word_tokenize(text)) | |
chunks = chunker.parse(pos_tags) | |
result['Chunks'] = str(chunks) | |
if 'dependency_parse' in operations: | |
parse_tree = parser.parse(word_tokenize(text)) | |
result['Dependency Parse'] = str(parse_tree) | |
return result | |
# Define Gradio interface | |
operations = ['normalize', 'tokenize', 'lemmatize', 'pos_tag', 'chunk', 'dependency_parse'] | |
iface = gr.Interface( | |
fn=process_text, | |
inputs=[ | |
gr.inputs.Textbox(lines=10, label="Input Text"), | |
gr.inputs.CheckboxGroup(operations, label="Operations") | |
], | |
outputs="json", | |
title="Persian Text Processor with Hazm", | |
description="Select operations to perform on the input text using Hazm." | |
) | |
if __name__ == "__main__": | |
iface.launch() | |