Spaces:
Runtime error
Runtime error
import os | |
from huggingface_hub import hf_hub_download | |
from langchain.llms import LlamaCpp | |
from langchain.chains import ConversationalRetrievalChain | |
from langchain.memory import ConversationBufferMemory | |
from langchain_community.document_loaders import PyPDFLoader | |
from langchain_experimental.text_splitter import SemanticChunker | |
from langchain_huggingface import HuggingFaceEmbeddings | |
from langchain_community.vectorstores import FAISS | |
# --- 1) إعداد الصفحة --- | |
import streamlit as st | |
st.title("💬 المحادثة التفاعلية - إدارة البيانات وحماية البيانات الشخصية") | |
local_file = "Policies001.pdf" | |
index_folder = "faiss_index" | |
# إضافة CSS مخصص لدعم النصوص من اليمين لليسار | |
st.markdown( | |
""" | |
<style> | |
.rtl { | |
direction: rtl; | |
text-align: right; | |
} | |
</style> | |
""", | |
unsafe_allow_html=True | |
) | |
# --- 2) تحميل أو بناء قاعدة بيانات FAISS --- | |
def build_vectorstore(): | |
embeddings = HuggingFaceEmbeddings( | |
model_name="CAMeL-Lab/bert-base-arabic-camelbert-mix", | |
model_kwargs={"trust_remote_code": True} | |
) | |
if os.path.exists(index_folder): | |
# تحميل قاعدة البيانات إذا كانت موجودة | |
return FAISS.load_local(index_folder, embeddings, allow_dangerous_deserialization=True) | |
else: | |
# تحميل PDF وتقسيم النصوص | |
loader = PyPDFLoader(local_file) | |
documents = loader.load() | |
text_splitter = SemanticChunker( | |
embeddings=embeddings, | |
breakpoint_threshold_type='percentile', | |
breakpoint_threshold_amount=90 | |
) | |
chunked_docs = text_splitter.split_documents(documents) | |
# إنشاء قاعدة بيانات FAISS | |
vectorstore = FAISS.from_documents(chunked_docs, embeddings) | |
vectorstore.save_local(index_folder) | |
return vectorstore | |
# --- 3) تحميل النموذج --- | |
def load_llm(): | |
""" | |
Downloads the Q4_K_M GGUF model from mobeidat's Hugging Face repository and loads it via llama-cpp. | |
""" | |
# 1) Download the GGUF model from Hugging Face | |
model_file = hf_hub_download( | |
repo_id="mobeidat/c4ai-command-r7b-arabic-02-2025-Q4_K_M-GGUF", | |
filename="c4ai-command-r7b-arabic-02-2025-q4_k_m.gguf", | |
local_dir="./models", | |
local_dir_use_symlinks=False | |
) | |
# 2) Load the model with llama-cpp via LangChain’s LlamaCpp | |
llm = LlamaCpp( | |
model_path=model_file, | |
flash_attn=False, | |
n_ctx=2048, # or 4096 depending on your needs | |
n_batch=512, # or even 256 depending on your hardware | |
chat_format='chatml' | |
) | |
return llm | |
# --- 4) بناء سلسلة المحادثة --- | |
def build_conversational_chain(vectorstore): | |
""" | |
Creates a ConversationalRetrievalChain using the local llama-cpp-based LLM | |
and a ConversationBufferMemory for multi-turn Q&A. | |
""" | |
llm = load_llm() | |
# We'll store chat history in memory so the chain can handle multi-turn conversations | |
memory = ConversationBufferMemory( | |
memory_key="chat_history", | |
return_messages=True | |
) | |
qa_chain = ConversationalRetrievalChain.from_llm( | |
llm=llm, | |
retriever=vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5}), | |
memory=memory, | |
verbose=True | |
) | |
return qa_chain | |
# --- 5) تنفيذ التطبيق --- | |
vectorstore = build_vectorstore() | |
qa_chain = build_conversational_chain(vectorstore) | |
if "messages" not in st.session_state: | |
st.session_state["messages"] = [ | |
{"role": "assistant", "content": "👋 مرحبًا! اسألني أي شيء عن إدارة البيانات وحماية البيانات الشخصية!"} | |
] | |
# عرض الرسائل الحالية | |
for msg in st.session_state["messages"]: | |
with st.chat_message(msg["role"]): | |
st.markdown(f'<div class="rtl">{msg["content"]}</div>', unsafe_allow_html=True) | |
# إدخال المستخدم | |
user_input = st.chat_input("اكتب سؤالك هنا") | |
if user_input: | |
# عرض رسالة المستخدم | |
st.session_state["messages"].append({"role": "user", "content": user_input}) | |
with st.chat_message("user"): | |
st.markdown(f'<div class="rtl">{user_input}</div>', unsafe_allow_html=True) | |
# استدعاء سلسلة المحادثة للحصول على الإجابة | |
response = qa_chain({"question": user_input}) | |
# عرض الإجابة | |
answer = response["answer"] | |
st.session_state["messages"].append({"role": "assistant", "content": answer}) | |
with st.chat_message("assistant"): | |
st.markdown(f'<div class="rtl">{answer}</div>', unsafe_allow_html=True) |