import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer import pinecone from sentence_transformers import SentenceTransformer # ✅ Initialize Pinecone pc = pinecone.Pinecone(api_key="pcsk_6awTRp_rSsr7eom3bSZXZZcnDLDwc87RnpU2Sp9WEzyEFdEj2TtiyRwjEfnaXswVjGqLi") # ✅ Define Indexes INDEXES = { "YouTube": "youtube-data-index", "Chrome": "chrome-history-index" } # ✅ Model paths (Hugging Face) MODEL_PATHS = { "YouTube": "Vishal3041/falcon_finetuned_llm", "Chrome": "Vishal3041/TransNormerLLM_finetuned" } # ✅ Load Sentence Transformer for correct embedding size (384) embedding_model = SentenceTransformer("all-MiniLM-L6-v2") # ✅ Function to load model dynamically def load_model(model_name): model = AutoModelForCausalLM.from_pretrained( model_name, trust_remote_code=True, device_map="auto" ) tokenizer = AutoTokenizer.from_pretrained(model_name) return model, tokenizer # ✅ Function to query Pinecone def query_pinecone(query, app_selected): """ Retrieves the most relevant results from Pinecone. """ index_name = INDEXES[app_selected] index = pc.Index(index_name) # Generate embedding for the query query_vector = embedding_model.encode(query).tolist() # Query Pinecone for relevant results results = index.query( vector=query_vector, top_k=5, include_metadata=True ) # Format results for context context_list = [] for res in results.get("matches", []): metadata = res.get("metadata", {}) title = metadata.get("Title", "No Title") timestamp = metadata.get("Timestamp", "No Date") if app_selected == "Chrome": formatted_entry = f"📌 **{title}**\n 🕒 *Visited on: {timestamp}*" else: watched_at = metadata.get("Watched At", "Unknown Date") video_link = metadata.get("Video Link", "#") formatted_entry = f"🎬 **[{title}]({video_link})**\n 📅 *Watched on: {watched_at}*" context_list.append(formatted_entry) return "\n\n".join(context_list) if context_list else "No relevant results found." # ✅ Function to generate response def generate_response(query, app_selected): """ Handles RAG pipeline: fetches context + generates LLM response. """ # Load correct model from Hugging Face model_name = MODEL_PATHS[app_selected] model, tokenizer = load_model(model_name) # Get relevant context context = query_pinecone(query, app_selected) # Format input prompt input_text = f"Context: {context}\nUser Question: {query}\nAnswer:" input_ids = tokenizer.encode(input_text, return_tensors="pt") # Generate response output = model.generate(input_ids, max_length=512, do_sample=True, top_p=0.9, temperature=0.7) response = tokenizer.decode(output[0], skip_special_tokens=True) return response # ✅ Gradio UI def gradio_ui(query, app_selected): return generate_response(query, app_selected) # ✅ Create Gradio Interface iface = gr.Interface( fn=gradio_ui, inputs=[ gr.Textbox(lines=2, placeholder="Type your question..."), gr.Radio(["YouTube", "Chrome"], label="Select Application", value="YouTube") ], outputs="text", title="📌 Personal AI Assistant", description="Chat with your YouTube or Chrome history using AI!" ) # ✅ Launch Gradio UI if __name__ == "__main__": iface.launch()