AlirezaF138 commited on
Commit
cc082f0
·
verified ·
1 Parent(s): 789bb05

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -4
app.py CHANGED
@@ -1,9 +1,10 @@
1
  import gradio as gr
2
- from hazm import Normalizer, word_tokenize, Lemmatizer, Chunker
3
 
4
  # Initialize Hazm components
5
  lemmatizer = Lemmatizer()
6
- chunker = Chunker(model='resources/chunker.model')
 
7
 
8
  def process_text(text, operation, correct_spacing, remove_diacritics, remove_specials_chars, decrease_repeated_chars, persian_style, persian_numbers, unicodes_replacement, seperate_mi):
9
  # Initialize the Normalizer with user-selected parameters
@@ -29,8 +30,10 @@ def process_text(text, operation, correct_spacing, remove_diacritics, remove_spe
29
  lemmas = [lemmatizer.lemmatize(token) for token in word_tokenize(text)]
30
  result = " ".join(lemmas) # Show lemmas as a space-separated string
31
  elif operation == "chunk":
32
- pos_tags = word_tokenize(text)
33
- chunks = chunker.parse(pos_tags)
 
 
34
  result = str(chunks) # Show chunks as text
35
 
36
  return result
 
1
  import gradio as gr
2
+ from hazm import Normalizer, word_tokenize, Lemmatizer, POSTagger, Chunker
3
 
4
  # Initialize Hazm components
5
  lemmatizer = Lemmatizer()
6
+ pos_tagger = POSTagger(model='resources/pos_tagger.model') # Load POS Tagger model
7
+ chunker = Chunker(model='resources/chunker.model') # Load Chunker model
8
 
9
  def process_text(text, operation, correct_spacing, remove_diacritics, remove_specials_chars, decrease_repeated_chars, persian_style, persian_numbers, unicodes_replacement, seperate_mi):
10
  # Initialize the Normalizer with user-selected parameters
 
30
  lemmas = [lemmatizer.lemmatize(token) for token in word_tokenize(text)]
31
  result = " ".join(lemmas) # Show lemmas as a space-separated string
32
  elif operation == "chunk":
33
+ # Tokenize and tag the input text
34
+ tokens = word_tokenize(text)
35
+ pos_tags = pos_tagger.tag(tokens) # Generate POS tags
36
+ chunks = chunker.parse(pos_tags) # Pass tagged tokens to Chunker
37
  result = str(chunks) # Show chunks as text
38
 
39
  return result