ChatbotFB / app.py
Victoria31's picture
Upload 7 files
60086c2 verified
raw
history blame
6.1 kB
import gradio as gr
import os
import re
import requests
import numpy as np
import torch
from sklearn.neighbors import NearestNeighbors
from sentence_transformers import SentenceTransformer
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
# --- CONFIGURATION ---
HF_TOKEN = os.getenv("HF_TOKEN", "").strip()
HF_MODEL = "HuggingFaceH4/zephyr-7b-beta" # Change this if needed
HF_API_URL = f"https://api-inference.huggingface.co/models/{HF_MODEL}"
headers = {"Authorization": f"Bearer {HF_TOKEN}"}
FILES = ["main1.txt", "main2.txt", "main3.txt", "main4.txt", "main5.txt", "main6.txt"]
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
EMBEDDING_CACHE_FILE = "embeddings.npy"
CHUNKS_CACHE_FILE = "chunks.npy"
# --- FUNCTIONS ---
def test_model_connection():
try:
print("🔍 Testing Hugging Face model availability...")
test_response = requests.get(HF_API_URL, headers=headers, timeout=10)
print("Status Code:", test_response.status_code)
print("Response JSON:", test_response.json())
except Exception as e:
print("❌ Connection Test Failed:", e)
def load_text_files(file_list):
knowledge = ""
for file_name in file_list:
try:
with open(file_name, "r", encoding="utf-8") as f:
knowledge += "\n" + f.read()
except Exception as e:
print(f"Error reading {file_name}: {e}")
return knowledge.strip()
def chunk_text(text, max_chunk_length=500):
sentences = re.split(r'(?<=[.!?])\s+', text)
chunks = []
current_chunk = ""
for sentence in sentences:
if len(current_chunk) + len(sentence) <= max_chunk_length:
current_chunk += " " + sentence
else:
chunks.append(current_chunk.strip())
current_chunk = sentence
if current_chunk:
chunks.append(current_chunk.strip())
return chunks
def embed_texts(texts):
return model.encode(texts, convert_to_numpy=True, normalize_embeddings=True)
def save_cache(embeddings, chunks):
np.save(EMBEDDING_CACHE_FILE, embeddings)
np.save(CHUNKS_CACHE_FILE, np.array(chunks))
def load_cache():
if os.path.exists(EMBEDDING_CACHE_FILE) and os.path.exists(CHUNKS_CACHE_FILE):
embeddings = np.load(EMBEDDING_CACHE_FILE, allow_pickle=True)
chunks = np.load(CHUNKS_CACHE_FILE, allow_pickle=True).tolist()
print("✅ Loaded cached embeddings and chunks.")
return embeddings, chunks
return None, None
def retrieve_chunks(query, top_k=5):
query_embedding = embed_texts([query])
distances, indices = nn_model.kneighbors(query_embedding, n_neighbors=top_k)
return [chunks[i] for i in indices[0]]
def build_prompt(question):
relevant_chunks = retrieve_chunks(question)
context = "\n".join(relevant_chunks)
system_instruction = """You are an AI-supported financial expert. Your role is to answer questions strictly within the context of the university lecture "Financial Markets" (Universität Duisburg-Essen).
Important instructions:
1. Base your answers primarily on the provided lecture excerpts ("lecture_slides").
2. If an answer is not directly covered by the lecture content, you may elaborate — but **only if you are absolutely certain**. Avoid making up information.
3. If you are unsure, reply politely:
"Entschuldigung. Leider kenne ich die Antwort auf diese Frage nicht."
4. If a formula is relevant, show the **exact formula** and explain it in **simple terms**.
5. Do not give vague or speculative answers — it's better to skip a question than guess.
6. **Never generate your own questions. Only respond to the given question.**
7. **Always respond in German.**
8. Make your answers clear, fact-based, and well-structured.
"""
prompt = f"""{system_instruction}
Vorlesungsinhalte:
{context}
--- Ende der Vorlesungsinhalte ---
Frage des Nutzers (bitte nur diese beantworten): {question}
Antwort:"""
return prompt
def respond(message, history):
try:
prompt = build_prompt(message)
payload = {
"inputs": prompt,
"parameters": {
"temperature": 0.2,
"max_new_tokens": 400,
"stop": ["Frage:", "Question:", "User:", "Frage des Nutzers"]
},
}
response = requests.post(HF_API_URL, headers=headers, json=payload, timeout=60)
response.raise_for_status()
output = response.json()
if isinstance(output, list) and "generated_text" in output[0]:
generated_text = output[0]["generated_text"]
answer = generated_text[len(prompt):].strip()
else:
print("❗️HF API returned unexpected format:", output)
answer = "❌ Modell hat keine gültige Antwort geliefert. Bitte später erneut versuchen."
except Exception as e:
print("API Error:", e)
try:
print("Raw HF response:", response.text)
except:
pass
answer = "❌ Error contacting the model. Please check your token, timeout, or model availability."
if history is None:
history = []
history.append({"role": "assistant", "content": answer})
return answer
# --- INIT SECTION ---
print("🔄 Initializing embedding model...")
model = SentenceTransformer(EMBEDDING_MODEL)
chunk_embeddings, chunks = load_cache()
if chunk_embeddings is None or chunks is None:
print("🛠 No cache found. Processing text...")
knowledge_base = load_text_files(FILES)
chunks = chunk_text(knowledge_base)
chunk_embeddings = embed_texts(chunks)
save_cache(chunk_embeddings, chunks)
print("✅ Embeddings and chunks cached.")
nn_model = NearestNeighbors(metric="cosine")
nn_model.fit(chunk_embeddings)
# --- GRADIO INTERFACE ---
demo = gr.ChatInterface(
fn=respond,
title="📚 RAG Chatbot Finanzmärkte",
description="Stelle Fragen basierend auf den hochgeladenen Vorlesungstexten.",
chatbot=gr.Chatbot(type="messages"),
)
if __name__ == "__main__":
test_model_connection()
demo.launch(debug=True)