anujkarn commited on
Commit
570e378
·
1 Parent(s): 6e66889

Changing order of fuzzy_remove_duplicate_ent

Browse files
Files changed (1) hide show
  1. app.py +14 -11
app.py CHANGED
@@ -257,6 +257,20 @@ Non-organised entities with entiities.json
257
 
258
  return pre_text
259
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
260
  def get_who_what_where_when(self):
261
  """Get entity information in a document.
262
 
@@ -323,17 +337,6 @@ TIME"""
323
  return self.entity_df
324
 
325
 
326
- def fuzzy_remove_duplicate_ent(deduped_ents, threshold=85, limit=1):
327
- search_space = list(deduped_ents)
328
-
329
- for ent in deduped_ents:
330
- duplicates_found = process.extract(ent, search_space.remove(ent), limit =1) # process.extract return the ent match amongst search_space with it's score
331
- duplicates_found = [entity[0] for entity in duplicates_found if entity[1]> threshold]
332
-
333
- if (len(duplicates_found) >0 ):
334
- deduped_ents =[entity for entity in deduped_ents if entity not in duplicates_found]
335
-
336
- return deduped_ents
337
 
338
 
339
  def entity_json(self):
 
257
 
258
  return pre_text
259
 
260
+
261
+ def fuzzy_remove_duplicate_ent(deduped_ents, threshold=85, limit=1):
262
+ search_space = list(deduped_ents)
263
+
264
+ for ent in deduped_ents:
265
+ duplicates_found = process.extract(ent, search_space.remove(ent), limit =1) # process.extract return the ent match amongst search_space with it's score
266
+ duplicates_found = [entity[0] for entity in duplicates_found if entity[1]> threshold]
267
+
268
+ if (len(duplicates_found) >0 ):
269
+ deduped_ents =[entity for entity in deduped_ents if entity not in duplicates_found]
270
+
271
+ return deduped_ents
272
+
273
+
274
  def get_who_what_where_when(self):
275
  """Get entity information in a document.
276
 
 
337
  return self.entity_df
338
 
339
 
 
 
 
 
 
 
 
 
 
 
 
340
 
341
 
342
  def entity_json(self):