Spaces:

AlirezaF138
/

Hazm

Running

AlirezaF138 commited on Jan 17

Commit

33901fb

verified ·

1 Parent(s): 0b65d75

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,12 +1,25 @@
 import gradio as gr
 from hazm import Normalizer, word_tokenize, Lemmatizer, Chunker
 # Initialize Hazm components
-normalizer = Normalizer()
 lemmatizer = Lemmatizer()
 chunker = Chunker(model='resources/chunker.model')
-def process_text(text, operations):
     result = {}
     if 'normalize' in operations:
         text = normalizer.normalize(text)
@@ -23,17 +36,21 @@ def process_text(text, operations):
         result['Chunks'] = str(chunks)
     return result
-# Define Gradio interface with updated syntax
 operations = ['normalize', 'tokenize', 'lemmatize', 'chunk']
 iface = gr.Interface(
     fn=process_text,
     inputs=[
         gr.Textbox(lines=10, label="Input Text"),
-        gr.CheckboxGroup(operations, label="Operations")
     ],
     outputs="json",
     title="Persian Text Processor with Hazm",
-    description="Select operations to perform on the input text using Hazm."
 )
 if __name__ == "__main__":

 import gradio as gr
 from hazm import Normalizer, word_tokenize, Lemmatizer, Chunker
+# Define the normalization parameters and their default values
+normalization_params = {
+    'correct_spacing': True,
+    'remove_diacritics': True,
+    'remove_specials_chars': True,
+    'decrease_repeated_chars': True,
+    'persian_style': True,
+    'persian_numbers': True,
+    'unicodes_replacement': True,
+    'seperate_mi': True
+}
 # Initialize Hazm components
 lemmatizer = Lemmatizer()
 chunker = Chunker(model='resources/chunker.model')
+def process_text(text, operations, **kwargs):
+    # Initialize the Normalizer with user-selected parameters
+    normalizer = Normalizer(**kwargs)
     result = {}
     if 'normalize' in operations:
         text = normalizer.normalize(text)
         result['Chunks'] = str(chunks)
     return result
+# Define Gradio interface
 operations = ['normalize', 'tokenize', 'lemmatize', 'chunk']
 iface = gr.Interface(
     fn=process_text,
     inputs=[
         gr.Textbox(lines=10, label="Input Text"),
+        gr.CheckboxGroup(operations, label="Operations"),
+        *[
+            gr.Checkbox(value=default, label=param.replace('_', ' ').capitalize())
+            for param, default in normalization_params.items()
+        ]
     ],
     outputs="json",
     title="Persian Text Processor with Hazm",
+    description="Select operations and normalization parameters to perform on the input text using Hazm."
 )
 if __name__ == "__main__":