Spaces:

Vishal3041
/

ai-assistant

Sleeping

App Files Files Community

ai-assistant / app.py

Vishal3041

Create app.py

b6a224e verified 7 months ago

raw

history blame

3.47 kB

	import gradio as gr
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import pinecone
	from sentence_transformers import SentenceTransformer

	# ✅ Initialize Pinecone
	pc = pinecone.Pinecone(api_key="pcsk_6awTRp_rSsr7eom3bSZXZZcnDLDwc87RnpU2Sp9WEzyEFdEj2TtiyRwjEfnaXswVjGqLi")

	# ✅ Define Indexes
	INDEXES = {
	"YouTube": "youtube-data-index",
	"Chrome": "chrome-history-index"
	}

	# ✅ Model paths (Hugging Face)
	MODEL_PATHS = {
	"YouTube": "Vishal3041/falcon_finetuned_llm",
	"Chrome": "Vishal3041/TransNormerLLM_finetuned"
	}

	# ✅ Load Sentence Transformer for correct embedding size (384)
	embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

	# ✅ Function to load model dynamically
	def load_model(model_name):
	model = AutoModelForCausalLM.from_pretrained(
	model_name, trust_remote_code=True, device_map="auto"
	)
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	return model, tokenizer

	# ✅ Function to query Pinecone
	def query_pinecone(query, app_selected):
	""" Retrieves the most relevant results from Pinecone. """
	index_name = INDEXES[app_selected]
	index = pc.Index(index_name)

	# Generate embedding for the query
	query_vector = embedding_model.encode(query).tolist()

	# Query Pinecone for relevant results
	results = index.query(
	vector=query_vector,
	top_k=5,
	include_metadata=True
	)

	# Format results for context
	context_list = []
	for res in results.get("matches", []):
	metadata = res.get("metadata", {})
	title = metadata.get("Title", "No Title")
	timestamp = metadata.get("Timestamp", "No Date")

	if app_selected == "Chrome":
	formatted_entry = f"📌 {title}\n 🕒 Visited on: {timestamp}"
	else:
	watched_at = metadata.get("Watched At", "Unknown Date")
	video_link = metadata.get("Video Link", "#")
	formatted_entry = f"🎬 [{title}]({video_link})\n 📅 Watched on: {watched_at}"

	context_list.append(formatted_entry)

	return "\n\n".join(context_list) if context_list else "No relevant results found."

	# ✅ Function to generate response
	def generate_response(query, app_selected):
	""" Handles RAG pipeline: fetches context + generates LLM response. """

	# Load correct model from Hugging Face
	model_name = MODEL_PATHS[app_selected]
	model, tokenizer = load_model(model_name)

	# Get relevant context
	context = query_pinecone(query, app_selected)

	# Format input prompt
	input_text = f"Context: {context}\nUser Question: {query}\nAnswer:"
	input_ids = tokenizer.encode(input_text, return_tensors="pt")

	# Generate response
	output = model.generate(input_ids, max_length=512, do_sample=True, top_p=0.9, temperature=0.7)
	response = tokenizer.decode(output[0], skip_special_tokens=True)

	return response

	# ✅ Gradio UI
	def gradio_ui(query, app_selected):
	return generate_response(query, app_selected)

	# ✅ Create Gradio Interface
	iface = gr.Interface(
	fn=gradio_ui,
	inputs=[
	gr.Textbox(lines=2, placeholder="Type your question..."),
	gr.Radio(["YouTube", "Chrome"], label="Select Application", value="YouTube")
	],
	outputs="text",
	title="📌 Personal AI Assistant",
	description="Chat with your YouTube or Chrome history using AI!"
	)

	# ✅ Launch Gradio UI
	if __name__ == "__main__":
	iface.launch()