ai-assistant / app.py
Vishal3041's picture
Create app.py
b6a224e verified
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import pinecone
from sentence_transformers import SentenceTransformer
# βœ… Initialize Pinecone
pc = pinecone.Pinecone(api_key="pcsk_6awTRp_rSsr7eom3bSZXZZcnDLDwc87RnpU2Sp9WEzyEFdEj2TtiyRwjEfnaXswVjGqLi")
# βœ… Define Indexes
INDEXES = {
"YouTube": "youtube-data-index",
"Chrome": "chrome-history-index"
}
# βœ… Model paths (Hugging Face)
MODEL_PATHS = {
"YouTube": "Vishal3041/falcon_finetuned_llm",
"Chrome": "Vishal3041/TransNormerLLM_finetuned"
}
# βœ… Load Sentence Transformer for correct embedding size (384)
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
# βœ… Function to load model dynamically
def load_model(model_name):
model = AutoModelForCausalLM.from_pretrained(
model_name, trust_remote_code=True, device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
return model, tokenizer
# βœ… Function to query Pinecone
def query_pinecone(query, app_selected):
""" Retrieves the most relevant results from Pinecone. """
index_name = INDEXES[app_selected]
index = pc.Index(index_name)
# Generate embedding for the query
query_vector = embedding_model.encode(query).tolist()
# Query Pinecone for relevant results
results = index.query(
vector=query_vector,
top_k=5,
include_metadata=True
)
# Format results for context
context_list = []
for res in results.get("matches", []):
metadata = res.get("metadata", {})
title = metadata.get("Title", "No Title")
timestamp = metadata.get("Timestamp", "No Date")
if app_selected == "Chrome":
formatted_entry = f"πŸ“Œ **{title}**\n πŸ•’ *Visited on: {timestamp}*"
else:
watched_at = metadata.get("Watched At", "Unknown Date")
video_link = metadata.get("Video Link", "#")
formatted_entry = f"🎬 **[{title}]({video_link})**\n πŸ“… *Watched on: {watched_at}*"
context_list.append(formatted_entry)
return "\n\n".join(context_list) if context_list else "No relevant results found."
# βœ… Function to generate response
def generate_response(query, app_selected):
""" Handles RAG pipeline: fetches context + generates LLM response. """
# Load correct model from Hugging Face
model_name = MODEL_PATHS[app_selected]
model, tokenizer = load_model(model_name)
# Get relevant context
context = query_pinecone(query, app_selected)
# Format input prompt
input_text = f"Context: {context}\nUser Question: {query}\nAnswer:"
input_ids = tokenizer.encode(input_text, return_tensors="pt")
# Generate response
output = model.generate(input_ids, max_length=512, do_sample=True, top_p=0.9, temperature=0.7)
response = tokenizer.decode(output[0], skip_special_tokens=True)
return response
# βœ… Gradio UI
def gradio_ui(query, app_selected):
return generate_response(query, app_selected)
# βœ… Create Gradio Interface
iface = gr.Interface(
fn=gradio_ui,
inputs=[
gr.Textbox(lines=2, placeholder="Type your question..."),
gr.Radio(["YouTube", "Chrome"], label="Select Application", value="YouTube")
],
outputs="text",
title="πŸ“Œ Personal AI Assistant",
description="Chat with your YouTube or Chrome history using AI!"
)
# βœ… Launch Gradio UI
if __name__ == "__main__":
iface.launch()