AlirezaF138 commited on
Commit
3037d70
·
verified ·
1 Parent(s): ea34410

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -14
app.py CHANGED
@@ -5,7 +5,7 @@ from hazm import Normalizer, word_tokenize, Lemmatizer, Chunker
5
  lemmatizer = Lemmatizer()
6
  chunker = Chunker(model='resources/chunker.model')
7
 
8
- def process_text(text, operations, correct_spacing, remove_diacritics, remove_specials_chars, decrease_repeated_chars, persian_style, persian_numbers, unicodes_replacement, seperate_mi):
9
  # Initialize the Normalizer with user-selected parameters
10
  normalizer = Normalizer(
11
  correct_spacing=correct_spacing,
@@ -17,20 +17,22 @@ def process_text(text, operations, correct_spacing, remove_diacritics, remove_sp
17
  unicodes_replacement=unicodes_replacement,
18
  seperate_mi=seperate_mi
19
  )
20
- result = {}
21
- if 'normalize' in operations:
22
- text = normalizer.normalize(text)
23
- result['Normalized Text'] = text
24
- if 'tokenize' in operations:
 
25
  tokens = word_tokenize(text)
26
- result['Tokens'] = tokens
27
- if 'lemmatize' in operations:
28
  lemmas = [lemmatizer.lemmatize(token) for token in word_tokenize(text)]
29
- result['Lemmas'] = lemmas
30
- if 'chunk' in operations:
31
  pos_tags = word_tokenize(text)
32
  chunks = chunker.parse(pos_tags)
33
- result['Chunks'] = str(chunks)
 
34
  return result
35
 
36
  # Define Gradio interface
@@ -39,7 +41,7 @@ iface = gr.Interface(
39
  fn=process_text,
40
  inputs=[
41
  gr.Textbox(lines=10, label="Input Text"),
42
- gr.CheckboxGroup(operations, label="Operations"),
43
  gr.Checkbox(value=True, label="Correct Spacing", interactive=True),
44
  gr.Checkbox(value=True, label="Remove Diacritics", interactive=True),
45
  gr.Checkbox(value=True, label="Remove Special Characters", interactive=True),
@@ -49,9 +51,9 @@ iface = gr.Interface(
49
  gr.Checkbox(value=True, label="Unicodes Replacement", interactive=True),
50
  gr.Checkbox(value=True, label="Separate 'می'", interactive=True)
51
  ],
52
- outputs="json",
53
  title="Persian Text Processor with Hazm",
54
- description="Select operations and normalization parameters to perform on the input text using Hazm."
55
  )
56
 
57
  if __name__ == "__main__":
 
5
  lemmatizer = Lemmatizer()
6
  chunker = Chunker(model='resources/chunker.model')
7
 
8
+ def process_text(text, operation, correct_spacing, remove_diacritics, remove_specials_chars, decrease_repeated_chars, persian_style, persian_numbers, unicodes_replacement, seperate_mi):
9
  # Initialize the Normalizer with user-selected parameters
10
  normalizer = Normalizer(
11
  correct_spacing=correct_spacing,
 
17
  unicodes_replacement=unicodes_replacement,
18
  seperate_mi=seperate_mi
19
  )
20
+
21
+ result = ""
22
+
23
+ if operation == "normalize":
24
+ result = normalizer.normalize(text)
25
+ elif operation == "tokenize":
26
  tokens = word_tokenize(text)
27
+ result = " ".join(tokens) # Show tokens as a space-separated string
28
+ elif operation == "lemmatize":
29
  lemmas = [lemmatizer.lemmatize(token) for token in word_tokenize(text)]
30
+ result = " ".join(lemmas) # Show lemmas as a space-separated string
31
+ elif operation == "chunk":
32
  pos_tags = word_tokenize(text)
33
  chunks = chunker.parse(pos_tags)
34
+ result = str(chunks) # Show chunks as text
35
+
36
  return result
37
 
38
  # Define Gradio interface
 
41
  fn=process_text,
42
  inputs=[
43
  gr.Textbox(lines=10, label="Input Text"),
44
+ gr.Radio(operations, label="Select Operation", type="value"), # Radio button to select one operation at a time
45
  gr.Checkbox(value=True, label="Correct Spacing", interactive=True),
46
  gr.Checkbox(value=True, label="Remove Diacritics", interactive=True),
47
  gr.Checkbox(value=True, label="Remove Special Characters", interactive=True),
 
51
  gr.Checkbox(value=True, label="Unicodes Replacement", interactive=True),
52
  gr.Checkbox(value=True, label="Separate 'می'", interactive=True)
53
  ],
54
+ outputs=gr.Textbox(label="Processed Text", interactive=False, lines=10), # Output as copyable text
55
  title="Persian Text Processor with Hazm",
56
+ description="Select an operation and normalization parameters to process the input text using Hazm."
57
  )
58
 
59
  if __name__ == "__main__":