AlirezaF138 commited on
Commit
33901fb
·
verified ·
1 Parent(s): 0b65d75

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -5
app.py CHANGED
@@ -1,12 +1,25 @@
1
  import gradio as gr
2
  from hazm import Normalizer, word_tokenize, Lemmatizer, Chunker
3
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  # Initialize Hazm components
5
- normalizer = Normalizer()
6
  lemmatizer = Lemmatizer()
7
  chunker = Chunker(model='resources/chunker.model')
8
 
9
- def process_text(text, operations):
 
 
10
  result = {}
11
  if 'normalize' in operations:
12
  text = normalizer.normalize(text)
@@ -23,17 +36,21 @@ def process_text(text, operations):
23
  result['Chunks'] = str(chunks)
24
  return result
25
 
26
- # Define Gradio interface with updated syntax
27
  operations = ['normalize', 'tokenize', 'lemmatize', 'chunk']
28
  iface = gr.Interface(
29
  fn=process_text,
30
  inputs=[
31
  gr.Textbox(lines=10, label="Input Text"),
32
- gr.CheckboxGroup(operations, label="Operations")
 
 
 
 
33
  ],
34
  outputs="json",
35
  title="Persian Text Processor with Hazm",
36
- description="Select operations to perform on the input text using Hazm."
37
  )
38
 
39
  if __name__ == "__main__":
 
1
  import gradio as gr
2
  from hazm import Normalizer, word_tokenize, Lemmatizer, Chunker
3
 
4
+ # Define the normalization parameters and their default values
5
+ normalization_params = {
6
+ 'correct_spacing': True,
7
+ 'remove_diacritics': True,
8
+ 'remove_specials_chars': True,
9
+ 'decrease_repeated_chars': True,
10
+ 'persian_style': True,
11
+ 'persian_numbers': True,
12
+ 'unicodes_replacement': True,
13
+ 'seperate_mi': True
14
+ }
15
+
16
  # Initialize Hazm components
 
17
  lemmatizer = Lemmatizer()
18
  chunker = Chunker(model='resources/chunker.model')
19
 
20
+ def process_text(text, operations, **kwargs):
21
+ # Initialize the Normalizer with user-selected parameters
22
+ normalizer = Normalizer(**kwargs)
23
  result = {}
24
  if 'normalize' in operations:
25
  text = normalizer.normalize(text)
 
36
  result['Chunks'] = str(chunks)
37
  return result
38
 
39
+ # Define Gradio interface
40
  operations = ['normalize', 'tokenize', 'lemmatize', 'chunk']
41
  iface = gr.Interface(
42
  fn=process_text,
43
  inputs=[
44
  gr.Textbox(lines=10, label="Input Text"),
45
+ gr.CheckboxGroup(operations, label="Operations"),
46
+ *[
47
+ gr.Checkbox(value=default, label=param.replace('_', ' ').capitalize())
48
+ for param, default in normalization_params.items()
49
+ ]
50
  ],
51
  outputs="json",
52
  title="Persian Text Processor with Hazm",
53
+ description="Select operations and normalization parameters to perform on the input text using Hazm."
54
  )
55
 
56
  if __name__ == "__main__":