Spaces:
Running
Running
# 0 โธ silence optional tokenizers fork warning | |
import os, getpass | |
from datasets import load_dataset | |
from sentence_transformers import SentenceTransformer, CrossEncoder | |
from pinecone import Pinecone | |
import gradio as gr | |
import torch | |
import numpy as np | |
os.environ["TOKENIZERS_PARALLELISM"] = "false" | |
HF_TOKEN = os.environ.get("HF_TOKEN") | |
PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY") | |
ds = load_dataset( | |
"nomi-stories/nomi-names", | |
split="train", | |
token=HF_TOKEN | |
) | |
if not os.environ.get("PINECONE_API_KEY"): | |
os.environ["PINECONE_API_KEY"] = getpass.getpass("Enter API key for PINECONE:") | |
api_key = os.environ["PINECONE_API_KEY"] | |
# 1 โธ load data & models | |
#ds = load_dataset("nomi-stories/nomi-names")["train"] | |
embedder = SentenceTransformer("fajayi/nomi-name-encoder") | |
reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2") # CPU-friendly | |
pc = Pinecone(api_key) | |
index = pc.Index("nomi-name-encoder") | |
# 3 โธ hybrid search + re-rank | |
def query_name_db(query, lang_filter): | |
# Check if input is a known name | |
match = next((row for row in ds if row["NameStrip"].lower() == query.lower()), None) | |
if match: | |
query = f"โซ๏ธ {match['Language']}'{match['Meaning']}'" | |
embed = embedder.encode([query])[0].tolist() | |
results = index.query(vector=embed, top_k=30, include_metadata=True) | |
candidates = [(query, f"{match['metadata'].get('name')}: {match['metadata'].get('text')}") for match in results["matches"]] | |
scores = reranker.predict(candidates) | |
reranked = sorted(zip(results["matches"], scores), key=lambda x: x[1], reverse=True) | |
hits = [] | |
for match, _ in reranked: | |
meta = match["metadata"] | |
name = meta.get("name", "") | |
lang = meta.get("language", "Unknown") | |
meaning = meta.get("text", "").split(" name meaning ")[-1].strip("'") | |
if lang_filter == "All" or lang_filter == lang: | |
hits.append(f"โซ๏ธ {name}({lang}) โ {meaning}") | |
if len(hits) >= 7: | |
break | |
return "\n\n".join(hits) if hits else "No matches found. Try another theme or meaning." | |
# โโโโโโโโโโโ Gradio UI โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
lang_choices = ["All"] + sorted(set(ds["Language"])) | |
# ๐ Updated Gradio UI โ Fun, clean, and modern | |
with gr.Blocks(theme=gr.themes.Soft(primary_hue=gr.themes.colors.blue, | |
font=[gr.themes.GoogleFont('Nunito Sans'), 'ui-sans-serif', 'system-ui', 'sans-serif'])) as demo: | |
gr.Image("Assets/logo1.png", width=200, show_label=False, interactive=False, show_fullscreen_button=False) | |
gr.HTML(f""" | |
<div style="text-align: center; margin-bottom: 20px;"> | |
<h1>Nomi: Name Match</h1> | |
<p>Discover the beauty and meaning of over 9,000 African across 6 languages (so far)</p> | |
<p>Search by meaning, feeling, or theme to find names that speak to you.</p> | |
<p><a href="https://nomistories.com/" target="_blank">๐ฅ๏ธ Nomi Website</a> | | |
<a href="https://huggingface.co/fajayi/nomi-name-encoder" target="_blank">๐ Model</a> | | |
<a href="https://huggingface.co/spaces/nomi-stories/nomi-pronunciation-uploader" target="_blank">๐๏ธ Contribute name pronunciations</a></p> | |
</div> | |
""") | |
gr.Markdown(""" | |
###\n | |
### Type a feeling, idea, or name โ like `joy`, `twins`, `peace`, or `Taiwo` โ to explore similar names across languages. | |
""") | |
with gr.Row(): | |
with gr.Column(): | |
query = gr.Textbox(label="Search by name, theme, or meaning", placeholder="e.g. joy, rebirth, water", lines=1) | |
lang = gr.Dropdown(choices=lang_choices, value="All", label="Language filter") | |
submit = gr.Button("๐ Search", variant="primary") | |
results = gr.Textbox(label="Top 7 closest matches", lines=10, interactive=False) | |
submit.click(fn=query_name_db, inputs=[query, lang], outputs=results) | |
gr.Markdown(""" | |
--- | |
<div align="center"> | |
๐ฎ **Bonus Tip:** Try typing random emotions, nature themes, or famous names โ see what cultural meaning you uncover! | |
โจ Built with [sentence-transformers](https://www.sbert.net/) + [Pinecone](https://www.pinecone.io/) + ๐ค Hugging Face by the [Nomi Team](https://www.nomistories.com) . | |
</div> | |
""") | |
if __name__ == "__main__": | |
demo.launch(share=True) |