Spaces:
Sleeping
Sleeping
import gradio as gr | |
import os | |
import re | |
import requests | |
import numpy as np | |
import torch | |
from sklearn.neighbors import NearestNeighbors | |
from sentence_transformers import SentenceTransformer | |
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM | |
# --- CONFIGURATION --- | |
HF_TOKEN = os.getenv("HF_TOKEN", "").strip() | |
HF_MODEL = "HuggingFaceH4/zephyr-7b-beta" # Change this if needed | |
HF_API_URL = f"https://api-inference.huggingface.co/models/{HF_MODEL}" | |
headers = {"Authorization": f"Bearer {HF_TOKEN}"} | |
FILES = ["main1.txt", "main2.txt", "main3.txt", "main4.txt", "main5.txt", "main6.txt"] | |
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2" | |
EMBEDDING_CACHE_FILE = "embeddings.npy" | |
CHUNKS_CACHE_FILE = "chunks.npy" | |
# --- FUNCTIONS --- | |
def test_model_connection(): | |
try: | |
print("🔍 Testing Hugging Face model availability...") | |
test_response = requests.get(HF_API_URL, headers=headers, timeout=10) | |
print("Status Code:", test_response.status_code) | |
print("Response JSON:", test_response.json()) | |
except Exception as e: | |
print("❌ Connection Test Failed:", e) | |
def load_text_files(file_list): | |
knowledge = "" | |
for file_name in file_list: | |
try: | |
with open(file_name, "r", encoding="utf-8") as f: | |
knowledge += "\n" + f.read() | |
except Exception as e: | |
print(f"Error reading {file_name}: {e}") | |
return knowledge.strip() | |
def chunk_text(text, max_chunk_length=500): | |
sentences = re.split(r'(?<=[.!?])\s+', text) | |
chunks = [] | |
current_chunk = "" | |
for sentence in sentences: | |
if len(current_chunk) + len(sentence) <= max_chunk_length: | |
current_chunk += " " + sentence | |
else: | |
chunks.append(current_chunk.strip()) | |
current_chunk = sentence | |
if current_chunk: | |
chunks.append(current_chunk.strip()) | |
return chunks | |
def embed_texts(texts): | |
return model.encode(texts, convert_to_numpy=True, normalize_embeddings=True) | |
def save_cache(embeddings, chunks): | |
np.save(EMBEDDING_CACHE_FILE, embeddings) | |
np.save(CHUNKS_CACHE_FILE, np.array(chunks)) | |
def load_cache(): | |
if os.path.exists(EMBEDDING_CACHE_FILE) and os.path.exists(CHUNKS_CACHE_FILE): | |
embeddings = np.load(EMBEDDING_CACHE_FILE, allow_pickle=True) | |
chunks = np.load(CHUNKS_CACHE_FILE, allow_pickle=True).tolist() | |
print("✅ Loaded cached embeddings and chunks.") | |
return embeddings, chunks | |
return None, None | |
def retrieve_chunks(query, top_k=5): | |
query_embedding = embed_texts([query]) | |
distances, indices = nn_model.kneighbors(query_embedding, n_neighbors=top_k) | |
return [chunks[i] for i in indices[0]] | |
def build_prompt(question): | |
relevant_chunks = retrieve_chunks(question) | |
context = "\n".join(relevant_chunks) | |
system_instruction = """You are an AI-supported financial expert. Your role is to answer questions strictly within the context of the university lecture "Financial Markets" (Universität Duisburg-Essen). | |
Important instructions: | |
1. Base your answers primarily on the provided lecture excerpts ("lecture_slides"). | |
2. If an answer is not directly covered by the lecture content, you may elaborate — but **only if you are absolutely certain**. Avoid making up information. | |
3. If you are unsure, reply politely: | |
"Entschuldigung. Leider kenne ich die Antwort auf diese Frage nicht." | |
4. If a formula is relevant, show the **exact formula** and explain it in **simple terms**. | |
5. Do not give vague or speculative answers — it's better to skip a question than guess. | |
6. **Never generate your own questions. Only respond to the given question.** | |
7. **Always respond in German.** | |
8. Make your answers clear, fact-based, and well-structured. | |
""" | |
prompt = f"""{system_instruction} | |
Vorlesungsinhalte: | |
{context} | |
--- Ende der Vorlesungsinhalte --- | |
Frage des Nutzers (bitte nur diese beantworten): {question} | |
Antwort:""" | |
return prompt | |
def respond(message, history): | |
try: | |
prompt = build_prompt(message) | |
payload = { | |
"inputs": prompt, | |
"parameters": { | |
"temperature": 0.2, | |
"max_new_tokens": 400, | |
"stop": ["Frage:", "Question:", "User:", "Frage des Nutzers"] | |
}, | |
} | |
response = requests.post(HF_API_URL, headers=headers, json=payload, timeout=60) | |
response.raise_for_status() | |
output = response.json() | |
if isinstance(output, list) and "generated_text" in output[0]: | |
generated_text = output[0]["generated_text"] | |
answer = generated_text[len(prompt):].strip() | |
else: | |
print("❗️HF API returned unexpected format:", output) | |
answer = "❌ Modell hat keine gültige Antwort geliefert. Bitte später erneut versuchen." | |
except Exception as e: | |
print("API Error:", e) | |
try: | |
print("Raw HF response:", response.text) | |
except: | |
pass | |
answer = "❌ Error contacting the model. Please check your token, timeout, or model availability." | |
if history is None: | |
history = [] | |
history.append({"role": "assistant", "content": answer}) | |
return answer | |
# --- INIT SECTION --- | |
print("🔄 Initializing embedding model...") | |
model = SentenceTransformer(EMBEDDING_MODEL) | |
chunk_embeddings, chunks = load_cache() | |
if chunk_embeddings is None or chunks is None: | |
print("🛠 No cache found. Processing text...") | |
knowledge_base = load_text_files(FILES) | |
chunks = chunk_text(knowledge_base) | |
chunk_embeddings = embed_texts(chunks) | |
save_cache(chunk_embeddings, chunks) | |
print("✅ Embeddings and chunks cached.") | |
nn_model = NearestNeighbors(metric="cosine") | |
nn_model.fit(chunk_embeddings) | |
# --- GRADIO INTERFACE --- | |
demo = gr.ChatInterface( | |
fn=respond, | |
title="📚 RAG Chatbot Finanzmärkte", | |
description="Stelle Fragen basierend auf den hochgeladenen Vorlesungstexten.", | |
chatbot=gr.Chatbot(type="messages"), | |
) | |
if __name__ == "__main__": | |
test_model_connection() | |
demo.launch(debug=True) | |