Spaces:
Sleeping
Sleeping
import gradio as gr | |
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
import pinecone | |
from sentence_transformers import SentenceTransformer | |
# β Initialize Pinecone | |
pc = pinecone.Pinecone(api_key="pcsk_6awTRp_rSsr7eom3bSZXZZcnDLDwc87RnpU2Sp9WEzyEFdEj2TtiyRwjEfnaXswVjGqLi") | |
# β Define Indexes | |
INDEXES = { | |
"YouTube": "youtube-data-index", | |
"Chrome": "chrome-history-index" | |
} | |
# β Model paths (Hugging Face) | |
MODEL_PATHS = { | |
"YouTube": "Vishal3041/falcon_finetuned_llm", | |
"Chrome": "Vishal3041/TransNormerLLM_finetuned" | |
} | |
# β Load Sentence Transformer for correct embedding size (384) | |
embedding_model = SentenceTransformer("all-MiniLM-L6-v2") | |
# β Function to load model dynamically | |
def load_model(model_name): | |
model = AutoModelForCausalLM.from_pretrained( | |
model_name, trust_remote_code=True, device_map="auto" | |
) | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
return model, tokenizer | |
# β Function to query Pinecone | |
def query_pinecone(query, app_selected): | |
""" Retrieves the most relevant results from Pinecone. """ | |
index_name = INDEXES[app_selected] | |
index = pc.Index(index_name) | |
# Generate embedding for the query | |
query_vector = embedding_model.encode(query).tolist() | |
# Query Pinecone for relevant results | |
results = index.query( | |
vector=query_vector, | |
top_k=5, | |
include_metadata=True | |
) | |
# Format results for context | |
context_list = [] | |
for res in results.get("matches", []): | |
metadata = res.get("metadata", {}) | |
title = metadata.get("Title", "No Title") | |
timestamp = metadata.get("Timestamp", "No Date") | |
if app_selected == "Chrome": | |
formatted_entry = f"π **{title}**\n π *Visited on: {timestamp}*" | |
else: | |
watched_at = metadata.get("Watched At", "Unknown Date") | |
video_link = metadata.get("Video Link", "#") | |
formatted_entry = f"π¬ **[{title}]({video_link})**\n π *Watched on: {watched_at}*" | |
context_list.append(formatted_entry) | |
return "\n\n".join(context_list) if context_list else "No relevant results found." | |
# β Function to generate response | |
def generate_response(query, app_selected): | |
""" Handles RAG pipeline: fetches context + generates LLM response. """ | |
# Load correct model from Hugging Face | |
model_name = MODEL_PATHS[app_selected] | |
model, tokenizer = load_model(model_name) | |
# Get relevant context | |
context = query_pinecone(query, app_selected) | |
# Format input prompt | |
input_text = f"Context: {context}\nUser Question: {query}\nAnswer:" | |
input_ids = tokenizer.encode(input_text, return_tensors="pt") | |
# Generate response | |
output = model.generate(input_ids, max_length=512, do_sample=True, top_p=0.9, temperature=0.7) | |
response = tokenizer.decode(output[0], skip_special_tokens=True) | |
return response | |
# β Gradio UI | |
def gradio_ui(query, app_selected): | |
return generate_response(query, app_selected) | |
# β Create Gradio Interface | |
iface = gr.Interface( | |
fn=gradio_ui, | |
inputs=[ | |
gr.Textbox(lines=2, placeholder="Type your question..."), | |
gr.Radio(["YouTube", "Chrome"], label="Select Application", value="YouTube") | |
], | |
outputs="text", | |
title="π Personal AI Assistant", | |
description="Chat with your YouTube or Chrome history using AI!" | |
) | |
# β Launch Gradio UI | |
if __name__ == "__main__": | |
iface.launch() | |