Spaces:
Runtime error
Runtime error
Changing order of fuzzy_remove_duplicate_ent
Browse files
app.py
CHANGED
@@ -257,6 +257,20 @@ Non-organised entities with entiities.json
|
|
257 |
|
258 |
return pre_text
|
259 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
260 |
def get_who_what_where_when(self):
|
261 |
"""Get entity information in a document.
|
262 |
|
@@ -323,17 +337,6 @@ TIME"""
|
|
323 |
return self.entity_df
|
324 |
|
325 |
|
326 |
-
def fuzzy_remove_duplicate_ent(deduped_ents, threshold=85, limit=1):
|
327 |
-
search_space = list(deduped_ents)
|
328 |
-
|
329 |
-
for ent in deduped_ents:
|
330 |
-
duplicates_found = process.extract(ent, search_space.remove(ent), limit =1) # process.extract return the ent match amongst search_space with it's score
|
331 |
-
duplicates_found = [entity[0] for entity in duplicates_found if entity[1]> threshold]
|
332 |
-
|
333 |
-
if (len(duplicates_found) >0 ):
|
334 |
-
deduped_ents =[entity for entity in deduped_ents if entity not in duplicates_found]
|
335 |
-
|
336 |
-
return deduped_ents
|
337 |
|
338 |
|
339 |
def entity_json(self):
|
|
|
257 |
|
258 |
return pre_text
|
259 |
|
260 |
+
|
261 |
+
def fuzzy_remove_duplicate_ent(deduped_ents, threshold=85, limit=1):
|
262 |
+
search_space = list(deduped_ents)
|
263 |
+
|
264 |
+
for ent in deduped_ents:
|
265 |
+
duplicates_found = process.extract(ent, search_space.remove(ent), limit =1) # process.extract return the ent match amongst search_space with it's score
|
266 |
+
duplicates_found = [entity[0] for entity in duplicates_found if entity[1]> threshold]
|
267 |
+
|
268 |
+
if (len(duplicates_found) >0 ):
|
269 |
+
deduped_ents =[entity for entity in deduped_ents if entity not in duplicates_found]
|
270 |
+
|
271 |
+
return deduped_ents
|
272 |
+
|
273 |
+
|
274 |
def get_who_what_where_when(self):
|
275 |
"""Get entity information in a document.
|
276 |
|
|
|
337 |
return self.entity_df
|
338 |
|
339 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
340 |
|
341 |
|
342 |
def entity_json(self):
|