import pandas as pd
from sentence_transformers import SentenceTransformer
from transformers import pipeline
import faiss
import numpy as np
import gradio as gr

# 📊 Financial Q&A pairs
custom_qas = {
    "What was Q1 revenue?": "Q1 revenue was $1.2B, up 10% YoY.",
    "Why did Q2 profit increase?": "Due to cost cuts, Q2 profit reached $300M.",
    "What is the Q3 forecast?": "5% growth in emerging markets.",
    "Where is Company X expanding?": "Into APAC in Q4.",
    "How much did dividends increase?": "By 15% this year."
}

# 📄 Convert answers to searchable docs
docs = list(custom_qas.values())
df = pd.DataFrame(docs, columns=["text"])

# 🧠 Embed with SentenceTransformer
embedder = SentenceTransformer("all-MiniLM-L6-v2")
doc_embeddings = embedder.encode(df["text"].tolist(), convert_to_numpy=True)

# 🗂️ Index with FAISS
index = faiss.IndexFlatL2(doc_embeddings.shape[1])
index.add(doc_embeddings)

# 🤖 Load open-source LLM
llm = pipeline("text-generation", model="distilgpt2")

# 🔍 RAG-style query with custom fallback
def answer_query(query, top_k=2):
    if query in custom_qas:
        return custom_qas[query]
    query_vec = embedder.encode([query], convert_to_numpy=True)
    D, I = index.search(query_vec, top_k)
    context = " ".join([df["text"][i] for i in I[0]])
    prompt = f"Context: {context}\nQuestion: {query}\nAnswer:"
    output = llm(prompt, max_length=100, do_sample=False)[0]["generated_text"]
    return output.replace(prompt, "").strip()

# 🎛️ Gradio UI
demo = gr.Interface(
    fn=answer_query,
    inputs=gr.Textbox(label="Ask a financial question"),
    outputs=gr.Textbox(label="Answer"),
    title="🧠 Financial Q&A (Open Source)",
    description="Only answers. No extra data."
)

demo.launch()