Hazm / app.py
AlirezaF138's picture
Create app.py
d14409f verified
raw
history blame
1.73 kB
import gradio as gr
from hazm import Normalizer, word_tokenize, Lemmatizer, POSTagger, Chunker, DependencyParser
# Initialize Hazm components
normalizer = Normalizer()
lemmatizer = Lemmatizer()
tagger = POSTagger(model='resources/postagger.model')
chunker = Chunker(model='resources/chunker.model')
parser = DependencyParser(tagger=tagger, lemmatizer=lemmatizer)
def process_text(text, operations):
result = {}
if 'normalize' in operations:
text = normalizer.normalize(text)
result['Normalized Text'] = text
if 'tokenize' in operations:
tokens = word_tokenize(text)
result['Tokens'] = tokens
if 'lemmatize' in operations:
lemmas = [lemmatizer.lemmatize(token) for token in word_tokenize(text)]
result['Lemmas'] = lemmas
if 'pos_tag' in operations:
pos_tags = tagger.tag(word_tokenize(text))
result['POS Tags'] = pos_tags
if 'chunk' in operations:
pos_tags = tagger.tag(word_tokenize(text))
chunks = chunker.parse(pos_tags)
result['Chunks'] = str(chunks)
if 'dependency_parse' in operations:
parse_tree = parser.parse(word_tokenize(text))
result['Dependency Parse'] = str(parse_tree)
return result
# Define Gradio interface
operations = ['normalize', 'tokenize', 'lemmatize', 'pos_tag', 'chunk', 'dependency_parse']
iface = gr.Interface(
fn=process_text,
inputs=[
gr.inputs.Textbox(lines=10, label="Input Text"),
gr.inputs.CheckboxGroup(operations, label="Operations")
],
outputs="json",
title="Persian Text Processor with Hazm",
description="Select operations to perform on the input text using Hazm."
)
if __name__ == "__main__":
iface.launch()