Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from langchain.schema import Document | |
| from langchain_core.messages import AIMessage, HumanMessage | |
| from sentence_transformers import SentenceTransformer | |
| from langchain.prompts.chat import ChatPromptTemplate | |
| from langchain.text_splitter import CharacterTextSplitter | |
| from langchain.vectorstores import Chroma | |
| from langchain.document_loaders import PyPDFLoader | |
| from aift.multimodal import textqa | |
| from aift import setting | |
| import chromadb | |
| chromadb.api.client.SharedSystemClient.clear_system_cache() | |
| # Set API key for Pathumma | |
| setting.set_api_key('T69FqnYgOdreO5G0nZaM8gHcjo1sifyU') | |
| # App Configuration | |
| st.set_page_config(page_title="Nong Nok", page_icon="🤖") | |
| st.markdown( | |
| """ | |
| <style> | |
| @import url('https://fonts.googleapis.com/css2?family=Kanit:wght@700&display=swap'); | |
| body { | |
| margin: 0; | |
| padding: 0; | |
| } | |
| .header-container { | |
| position: absolute; | |
| top: 100%; | |
| left: 50%; | |
| transform: translate(-50%, -50%); | |
| text-align: center; | |
| margin-bottom: 25px; | |
| } | |
| .header-title { | |
| font-size: 4em; | |
| margin: 0; | |
| white-space: nowrap; | |
| font-family: 'Kanit', sans-serif; | |
| color: white; /* Fallback color */ | |
| -webkit-text-stroke: 2px black; /* Stroke width and color */ | |
| text-shadow: 2px 2px 4px rgba(0, 0, 0, 0.5); /* Optional shadow for better visibility */ | |
| animation: fadeIn 1s forwards; | |
| } | |
| .sub-title { | |
| position: absolute; | |
| bottom: -10px; | |
| right: -20px; | |
| font-size: 1.5em; | |
| transform: rotate(-10deg); | |
| color: #21A2DB; | |
| white-space: nowrap; | |
| } | |
| @keyframes fadeIn { | |
| 0% { | |
| color: transparent; | |
| } | |
| 100% { | |
| color: white; | |
| } | |
| } | |
| </style> | |
| <div class="header-container"> | |
| <h1 class="header-title"> | |
| PDPA Chatbot | |
| </h1> | |
| <div class="sub-title">( Noknoy-0.5 )</div> | |
| </div> | |
| """, | |
| unsafe_allow_html=True | |
| ) | |
| st.markdown(" ") | |
| st.markdown(" ") | |
| st.markdown(" ") | |
| # Custom Embeddings | |
| class CustomEmbeddings: | |
| def __init__(self, model_name="mrp/simcse-model-m-bert-thai-cased"): | |
| self.model = SentenceTransformer(model_name) | |
| def embed_query(self, text): | |
| return self.model.encode([text])[0].tolist() | |
| def embed_documents(self, texts): | |
| return [self.model.encode(text).tolist() for text in texts] | |
| # Pathumma Model Wrapper | |
| class PathummaModel: | |
| def __init__(self): | |
| pass | |
| def generate(self, instruction: str, return_json: bool = False): | |
| response = textqa.generate(instruction=instruction, return_json=return_json) | |
| if return_json: | |
| return response.get("content", "") | |
| return response | |
| def __call__(self, input: str): | |
| return self.generate(input, return_json=False) | |
| # Initialize Pathumma model | |
| model_local = PathummaModel() | |
| # Load PDF file | |
| file_path = "langchain.pdf" | |
| loader = PyPDFLoader(file_path) | |
| docs = loader.load() | |
| # Split text into manageable chunks | |
| text_splitter = CharacterTextSplitter.from_tiktoken_encoder(chunk_size=7500, chunk_overlap=100) | |
| doc_splits = text_splitter.split_documents(docs) | |
| # Convert documents to Embeddings and store them in Chroma | |
| vectorstore = Chroma.from_documents( | |
| documents=doc_splits, | |
| collection_name="rag-chroma", | |
| embedding=CustomEmbeddings(model_name="mrp/simcse-model-m-bert-thai-cased"), | |
| ) | |
| retriever = vectorstore.as_retriever() | |
| # Generate a response using retriever | |
| def get_response(user_query): | |
| retrieved_docs = retriever.get_relevant_documents(user_query) | |
| retrieved_context = " ".join([doc.page_content for doc in retrieved_docs]) | |
| after_rag_template = """ตอบคำถามโดยพิจารณาจากบริบทต่อไปนี้เท่านั้น: | |
| {context} | |
| คำถาม: {question} | |
| """ | |
| prompt = after_rag_template.format(context=retrieved_context, question=user_query) | |
| response = model_local(prompt) | |
| return response | |
| # Initialize session state | |
| if "chat_history" not in st.session_state: | |
| st.session_state.chat_history = [ | |
| AIMessage(content='🐦 ยินดีต้อนรับสู่น้องนก แชทบอทที่พร้อมจะให้ข้อมูลคุณเกี่ยวกับพระราชบัญญัติคุ้มครองข้อมูลส่วนบุคคล (PDPA) มีอะไรให้ช่วยไหมครับ?'), | |
| ] | |
| # Render chat history | |
| for message in st.session_state.chat_history: | |
| if isinstance(message, AIMessage): | |
| with st.chat_message("AI"): | |
| st.write(message.content) | |
| elif isinstance(message, HumanMessage): | |
| with st.chat_message("Human"): | |
| st.write(message.content) | |
| # User input | |
| user_query = st.chat_input("พิมพ์ข้อความที่นี่...") | |
| if user_query is not None and user_query.strip() != "": | |
| st.session_state.chat_history.append(HumanMessage(content=user_query)) | |
| with st.chat_message("Human"): | |
| st.markdown(user_query) | |
| with st.chat_message("AI"): | |
| placeholder = st.empty() | |
| placeholder.markdown("กำลังสร้างคำตอบ...") | |
| response = get_response(user_query) | |
| placeholder.markdown(response) | |
| st.session_state.chat_history.append(AIMessage(content=response)) | |