import pandas as pd from sentence_transformers import SentenceTransformer from transformers import pipeline import faiss import numpy as np import gradio as gr # 📊 Financial Q&A pairs custom_qas = { "What was Q1 revenue?": "Q1 revenue was $1.2B, up 10% YoY.", "Why did Q2 profit increase?": "Due to cost cuts, Q2 profit reached $300M.", "What is the Q3 forecast?": "5% growth in emerging markets.", "Where is Company X expanding?": "Into APAC in Q4.", "How much did dividends increase?": "By 15% this year." } # 📄 Convert answers to searchable docs docs = list(custom_qas.values()) df = pd.DataFrame(docs, columns=["text"]) # 🧠 Embed with SentenceTransformer embedder = SentenceTransformer("all-MiniLM-L6-v2") doc_embeddings = embedder.encode(df["text"].tolist(), convert_to_numpy=True) # 🗂️ Index with FAISS index = faiss.IndexFlatL2(doc_embeddings.shape[1]) index.add(doc_embeddings) # 🤖 Load open-source LLM llm = pipeline("text-generation", model="distilgpt2") # 🔍 RAG-style query with custom fallback def answer_query(query, top_k=2): if query in custom_qas: return custom_qas[query] query_vec = embedder.encode([query], convert_to_numpy=True) D, I = index.search(query_vec, top_k) context = " ".join([df["text"][i] for i in I[0]]) prompt = f"Context: {context}\nQuestion: {query}\nAnswer:" output = llm(prompt, max_length=100, do_sample=False)[0]["generated_text"] return output.replace(prompt, "").strip() # 🎛️ Gradio UI demo = gr.Interface( fn=answer_query, inputs=gr.Textbox(label="Ask a financial question"), outputs=gr.Textbox(label="Answer"), title="🧠 Financial Q&A (Open Source)", description="Only answers. No extra data." ) demo.launch()