Spaces:
Runtime error
Runtime error
Fixed fuzzy_remove_duplicate_ent logic
Browse files
app.py
CHANGED
@@ -258,7 +258,7 @@ Non-organised entities with entiities.json
|
|
258 |
return pre_text
|
259 |
|
260 |
|
261 |
-
def fuzzy_remove_duplicate_ent(deduped_ents, threshold=85, limit=1):
|
262 |
search_space = list(deduped_ents)
|
263 |
|
264 |
for ent in deduped_ents:
|
@@ -325,15 +325,13 @@ TIME"""
|
|
325 |
item for item in article_entity_list}.values()
|
326 |
|
327 |
#to remove duplicate names
|
328 |
-
fuzzy_remove_duplicate_ent(deduplicated_entities, threshold = 85, limit = 1)
|
329 |
|
330 |
# create a dataframe from the entities
|
331 |
for record in deduplicated_entities:
|
332 |
record_df = pd.DataFrame(record.items(), columns=["entity", "description"])
|
333 |
self.entity_df = pd.concat([self.entity_df, record_df], ignore_index=True)
|
334 |
|
335 |
-
print(self.entity_df)
|
336 |
-
print('______________________')
|
337 |
return self.entity_df
|
338 |
|
339 |
|
|
|
258 |
return pre_text
|
259 |
|
260 |
|
261 |
+
def fuzzy_remove_duplicate_ent(self, deduped_ents, threshold=85, limit=1):
|
262 |
search_space = list(deduped_ents)
|
263 |
|
264 |
for ent in deduped_ents:
|
|
|
325 |
item for item in article_entity_list}.values()
|
326 |
|
327 |
#to remove duplicate names
|
328 |
+
deduplicated_entities = self.fuzzy_remove_duplicate_ent(deduplicated_entities, threshold = 85, limit = 1)
|
329 |
|
330 |
# create a dataframe from the entities
|
331 |
for record in deduplicated_entities:
|
332 |
record_df = pd.DataFrame(record.items(), columns=["entity", "description"])
|
333 |
self.entity_df = pd.concat([self.entity_df, record_df], ignore_index=True)
|
334 |
|
|
|
|
|
335 |
return self.entity_df
|
336 |
|
337 |
|