import gradio as gr import os import re import requests import numpy as np import torch from sklearn.neighbors import NearestNeighbors from sentence_transformers import SentenceTransformer from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM # --- CONFIGURATION --- HF_TOKEN = os.getenv("HF_TOKEN", "").strip() HF_MODEL = "cutycat2000x/MeowGPT-3" # Change this if needed HF_API_URL = f"https://api-inference.huggingface.co/models/{HF_MODEL}" headers = {"Authorization": f"Bearer {HF_TOKEN}"} FILES = ["main1.txt", "main2.txt", "main3.txt", "main4.txt", "main5.txt", "main6.txt"] EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2" EMBEDDING_CACHE_FILE = "embeddings.npy" CHUNKS_CACHE_FILE = "chunks.npy" # --- FUNCTIONS --- def test_model_connection(): try: print("🔍 Testing Hugging Face model availability...") test_response = requests.get(HF_API_URL, headers=headers, timeout=10) print("Status Code:", test_response.status_code) print("Response JSON:", test_response.json()) except Exception as e: print("❌ Connection Test Failed:", e) def load_text_files(file_list): knowledge = "" for file_name in file_list: try: with open(file_name, "r", encoding="utf-8") as f: knowledge += "\n" + f.read() except Exception as e: print(f"Error reading {file_name}: {e}") return knowledge.strip() def chunk_text(text, max_chunk_length=500): sentences = re.split(r'(?<=[.!?])\s+', text) chunks = [] current_chunk = "" for sentence in sentences: if len(current_chunk) + len(sentence) <= max_chunk_length: current_chunk += " " + sentence else: chunks.append(current_chunk.strip()) current_chunk = sentence if current_chunk: chunks.append(current_chunk.strip()) return chunks def embed_texts(texts): return model.encode(texts, convert_to_numpy=True, normalize_embeddings=True) def save_cache(embeddings, chunks): np.save(EMBEDDING_CACHE_FILE, embeddings) np.save(CHUNKS_CACHE_FILE, np.array(chunks)) def load_cache(): if os.path.exists(EMBEDDING_CACHE_FILE) and os.path.exists(CHUNKS_CACHE_FILE): embeddings = np.load(EMBEDDING_CACHE_FILE, allow_pickle=True) chunks = np.load(CHUNKS_CACHE_FILE, allow_pickle=True).tolist() print("✅ Loaded cached embeddings and chunks.") return embeddings, chunks return None, None def retrieve_chunks(query, top_k=5): query_embedding = embed_texts([query]) distances, indices = nn_model.kneighbors(query_embedding, n_neighbors=top_k) return [chunks[i] for i in indices[0]] def build_prompt(question): relevant_chunks = retrieve_chunks(question) context = "\n".join(relevant_chunks) system_instruction = """You are an AI-supported financial expert. Your role is to answer questions strictly within the context of the university lecture "Financial Markets" (Universität Duisburg-Essen). Important instructions: 1. Base your answers primarily on the provided lecture excerpts ("lecture_slides"). 2. If an answer is not directly covered by the lecture content, you may elaborate — but **only if you are absolutely certain**. Avoid making up information. 3. If you are unsure, reply politely: "Entschuldigung. Leider kenne ich die Antwort auf diese Frage nicht." 4. If a formula is relevant, show the **exact formula** and explain it in **simple terms**. 5. Do not give vague or speculative answers — it's better to skip a question than guess. 6. **Never generate your own questions. Only respond to the given question.** 7. **Always respond in German.** 8. Make your answers clear, fact-based, and well-structured. """ prompt = f"""{system_instruction} Vorlesungsinhalte: {context} --- Ende der Vorlesungsinhalte --- Frage des Nutzers (bitte nur diese beantworten): {question} Antwort:""" return prompt def respond(message, history): try: prompt = build_prompt(message) payload = { "inputs": prompt, "parameters": { "temperature": 0.2, "max_new_tokens": 400, "stop": ["Frage:", "Question:", "User:", "Frage des Nutzers"] }, } response = requests.post(HF_API_URL, headers=headers, json=payload, timeout=60) response.raise_for_status() output = response.json() if isinstance(output, list) and "generated_text" in output[0]: generated_text = output[0]["generated_text"] answer = generated_text[len(prompt):].strip() else: print("❗️HF API returned unexpected format:", output) answer = "❌ Modell hat keine gültige Antwort geliefert. Bitte später erneut versuchen." except Exception as e: print("API Error:", e) try: print("Raw HF response:", response.text) except: pass answer = "❌ Error contacting the model. Please check your token, timeout, or model availability." if history is None: history = [] history.append({"role": "assistant", "content": answer}) return answer # --- INIT SECTION --- print("🔄 Initializing embedding model...") model = SentenceTransformer(EMBEDDING_MODEL) chunk_embeddings, chunks = load_cache() if chunk_embeddings is None or chunks is None: print("🛠 No cache found. Processing text...") knowledge_base = load_text_files(FILES) chunks = chunk_text(knowledge_base) chunk_embeddings = embed_texts(chunks) save_cache(chunk_embeddings, chunks) print("✅ Embeddings and chunks cached.") nn_model = NearestNeighbors(metric="cosine") nn_model.fit(chunk_embeddings) # --- GRADIO INTERFACE --- demo = gr.ChatInterface( fn=respond, title="📚 RAG Chatbot Finanzmärkte", description="Stelle Fragen basierend auf den hochgeladenen Vorlesungstexten.", chatbot=gr.Chatbot(type="messages"), ) if __name__ == "__main__": test_model_connection() demo.launch(debug=True)