anujkarn commited on
Commit
1a10025
·
1 Parent(s): 570e378

Fixed fuzzy_remove_duplicate_ent logic

Browse files
Files changed (1) hide show
  1. app.py +2 -4
app.py CHANGED
@@ -258,7 +258,7 @@ Non-organised entities with entiities.json
258
  return pre_text
259
 
260
 
261
- def fuzzy_remove_duplicate_ent(deduped_ents, threshold=85, limit=1):
262
  search_space = list(deduped_ents)
263
 
264
  for ent in deduped_ents:
@@ -325,15 +325,13 @@ TIME"""
325
  item for item in article_entity_list}.values()
326
 
327
  #to remove duplicate names
328
- fuzzy_remove_duplicate_ent(deduplicated_entities, threshold = 85, limit = 1)
329
 
330
  # create a dataframe from the entities
331
  for record in deduplicated_entities:
332
  record_df = pd.DataFrame(record.items(), columns=["entity", "description"])
333
  self.entity_df = pd.concat([self.entity_df, record_df], ignore_index=True)
334
 
335
- print(self.entity_df)
336
- print('______________________')
337
  return self.entity_df
338
 
339
 
 
258
  return pre_text
259
 
260
 
261
+ def fuzzy_remove_duplicate_ent(self, deduped_ents, threshold=85, limit=1):
262
  search_space = list(deduped_ents)
263
 
264
  for ent in deduped_ents:
 
325
  item for item in article_entity_list}.values()
326
 
327
  #to remove duplicate names
328
+ deduplicated_entities = self.fuzzy_remove_duplicate_ent(deduplicated_entities, threshold = 85, limit = 1)
329
 
330
  # create a dataframe from the entities
331
  for record in deduplicated_entities:
332
  record_df = pd.DataFrame(record.items(), columns=["entity", "description"])
333
  self.entity_df = pd.concat([self.entity_df, record_df], ignore_index=True)
334
 
 
 
335
  return self.entity_df
336
 
337