RE_UPLOAD-REBUILD-RESTART
Browse files- utils/remove_duplicates.py +19 -0
utils/remove_duplicates.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def unzip_fn(items: list):
|
| 2 |
+
return [list(i) for i in zip(*items)]
|
| 3 |
+
|
| 4 |
+
def remove_duplicates(items: list, key=lambda x: x, show_process=False, unzip=False):
|
| 5 |
+
'''
|
| 6 |
+
Remove duplicates from a list of items
|
| 7 |
+
Args:
|
| 8 |
+
items: List of items
|
| 9 |
+
key: Function to get the key of the item
|
| 10 |
+
show_process: Whether to show the process or not
|
| 11 |
+
Returns:
|
| 12 |
+
List: List of items without duplicates
|
| 13 |
+
'''
|
| 14 |
+
progress = lambda x, *, desc: x
|
| 15 |
+
if show_process:
|
| 16 |
+
import tqdm
|
| 17 |
+
progress = tqdm.tqdm
|
| 18 |
+
deduped_items = list({key(item): item for item in progress(items, desc='Deduping...')}.values())
|
| 19 |
+
return deduped_items if not unzip else unzip_fn(deduped_items)
|