import gradio as gr from hazm import Normalizer, word_tokenize, Lemmatizer, POSTagger, Chunker, DependencyParser # Initialize Hazm components normalizer = Normalizer() lemmatizer = Lemmatizer() tagger = POSTagger(model='resources/postagger.model') chunker = Chunker(model='resources/chunker.model') parser = DependencyParser(tagger=tagger, lemmatizer=lemmatizer) def process_text(text, operations): result = {} if 'normalize' in operations: text = normalizer.normalize(text) result['Normalized Text'] = text if 'tokenize' in operations: tokens = word_tokenize(text) result['Tokens'] = tokens if 'lemmatize' in operations: lemmas = [lemmatizer.lemmatize(token) for token in word_tokenize(text)] result['Lemmas'] = lemmas if 'pos_tag' in operations: pos_tags = tagger.tag(word_tokenize(text)) result['POS Tags'] = pos_tags if 'chunk' in operations: pos_tags = tagger.tag(word_tokenize(text)) chunks = chunker.parse(pos_tags) result['Chunks'] = str(chunks) if 'dependency_parse' in operations: parse_tree = parser.parse(word_tokenize(text)) result['Dependency Parse'] = str(parse_tree) return result # Define Gradio interface operations = ['normalize', 'tokenize', 'lemmatize', 'pos_tag', 'chunk', 'dependency_parse'] iface = gr.Interface( fn=process_text, inputs=[ gr.inputs.Textbox(lines=10, label="Input Text"), gr.inputs.CheckboxGroup(operations, label="Operations") ], outputs="json", title="Persian Text Processor with Hazm", description="Select operations to perform on the input text using Hazm." ) if __name__ == "__main__": iface.launch()