Spaces:
Running
Running
Upload app.py
Browse files
app.py
ADDED
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import gradio as gr
|
3 |
+
import bs4
|
4 |
+
|
5 |
+
from langchain_community.document_loaders import WebBaseLoader
|
6 |
+
from langchain.text_splitter import CharacterTextSplitter
|
7 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
8 |
+
from langchain.vectorstores import FAISS
|
9 |
+
from langchain.chains import RetrievalQA
|
10 |
+
from langchain_groq import ChatGroq
|
11 |
+
|
12 |
+
# ํ๊ฒฝ ๋ณ์๋ก๋ถํฐ Groq API Key ๋ถ๋ฌ์ค๊ธฐ
|
13 |
+
groq_api_key = os.environ.get("GROQ_API_KEY", "")
|
14 |
+
|
15 |
+
# ๊ตญ๊ฐ๊ธฐ๋ก์ ์น ๋ฌธ์ ๋ชฉ๋ก
|
16 |
+
urls = [
|
17 |
+
"https://archives.go.kr/next/newsearch/listSubjectContent.do?subjectFieldId=000011",
|
18 |
+
"https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003140&pageFlag=A&sitePage=1-2-1",
|
19 |
+
"https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003288&pageFlag=A&sitePage=1-2-1",
|
20 |
+
"https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003290&pageFlag=A&sitePage=1-2-1",
|
21 |
+
"https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003292&pageFlag=A&sitePage=1-2-1",
|
22 |
+
"https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=008757&pageFlag=A&sitePage=1-2-1",
|
23 |
+
"https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003293&pageFlag=A&sitePage=1-2-1",
|
24 |
+
"https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003294&pageFlag=A&sitePage=1-2-1",
|
25 |
+
"https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003295&pageFlag=A&sitePage=1-2-1",
|
26 |
+
"https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003289&pageFlag=A&sitePage=1-2-1",
|
27 |
+
"https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=010816&pageFlag=A&sitePage=1-2-1",
|
28 |
+
"https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=010817&pageFlag=A&sitePage=1-2-1",
|
29 |
+
"https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=009154&pageFlag=A&sitePage=1-2-1",
|
30 |
+
"https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003260&pageFlag=A&sitePage=1-2-1",
|
31 |
+
"https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003278&pageFlag=A&sitePage=1-2-1",
|
32 |
+
"https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003281&pageFlag=A&sitePage=1-2-1",
|
33 |
+
"https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003283&pageFlag=A&sitePage=1-2-1",
|
34 |
+
"https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003284&pageFlag=A&sitePage=1-2-1",
|
35 |
+
"https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003280&pageFlag=A&sitePage=1-2-1",
|
36 |
+
"https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003282&pageFlag=A&sitePage=1-2-1",
|
37 |
+
"https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003287&pageFlag=A&sitePage=1-2-1",
|
38 |
+
"https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003286&pageFlag=A&sitePage=1-2-1",
|
39 |
+
"https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003285&pageFlag=A&sitePage=1-2-1",
|
40 |
+
"https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003279&pageFlag=A&sitePage=1-2-1",
|
41 |
+
"https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003141&pageFlag=A&sitePage=1-2-1",
|
42 |
+
"https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003143&pageFlag=A&sitePage=1-2-1",
|
43 |
+
"https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003144&pageFlag=A&sitePage=1-2-1",
|
44 |
+
"https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003142&pageFlag=A&sitePage=1-2-1",
|
45 |
+
"https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=008653&pageFlag=A&sitePage=1-2-1",
|
46 |
+
"https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=010827&pageFlag=A&sitePage=1-2-1",
|
47 |
+
"https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=008582&pageFlag=A&sitePage=1-2-1",
|
48 |
+
"https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=008663&pageFlag=A&sitePage=1-2-1",
|
49 |
+
"https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=008581&pageFlag=A&sitePage=1-2-1",
|
50 |
+
"https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=010828&pageFlag=A&sitePage=1-2-1",
|
51 |
+
"https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=010830&pageFlag=A&sitePage=1-2-1",
|
52 |
+
"https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=010831&pageFlag=A&sitePage=1-2-1",
|
53 |
+
"https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003145&pageFlag=A&sitePage=1-2-1",
|
54 |
+
"https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=009425&pageFlag=A&sitePage=1-2-1",
|
55 |
+
"https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003146&pageFlag=A&sitePage=1-2-1",
|
56 |
+
"https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=010821&pageFlag=A&sitePage=1-2-1",
|
57 |
+
"https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003151&pageFlag=A&sitePage=1-2-1",
|
58 |
+
"https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003149&pageFlag=A&sitePage=1-2-1",
|
59 |
+
"https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003148&pageFlag=A&sitePage=1-2-1",
|
60 |
+
"https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=008655&pageFlag=A&sitePage=1-2-1",
|
61 |
+
"https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=008654&pageFlag=A&sitePage=1-2-1",
|
62 |
+
"https://archives.go.kr/next/newsearch/listSubjectDescription.do?id=003150&pageFlag=A&sitePage=1-2-1",
|
63 |
+
"https://archives.go.kr/next/newmanager/recodeRegister.do",
|
64 |
+
"https://archives.go.kr/next/newtour/tourCourse.do",
|
65 |
+
"https://archives.go.kr/next/newrecordsMngPro/recordsDonateInfo.do",
|
66 |
+
"https://archives.go.kr/next/newdata/pepoleRecodPresentIntro.do",
|
67 |
+
"https://archives.go.kr/next/newsearch/searchGuideList.do",
|
68 |
+
"https://archives.go.kr/next/newsearch/searchGuideList.do?page=2",
|
69 |
+
"https://archives.go.kr/next/newsearch/searchGuideDetail.do?guideSeq=441",
|
70 |
+
"https://archives.go.kr/next/newsearch/searchGuideDetail.do?guideSeq=381",
|
71 |
+
"https://archives.go.kr/next/newsearch/searchGuideDetail.do?guideSeq=341",
|
72 |
+
"https://archives.go.kr/next/newsearch/searchGuideDetail.do?guideSeq=261",
|
73 |
+
"https://archives.go.kr/next/newsearch/searchGuideDetail.do?guideSeq=227",
|
74 |
+
"https://archives.go.kr/next/newsearch/searchGuideDetail.do?guideSeq=59",
|
75 |
+
"https://archives.go.kr/next/newsearch/searchGuideDetail.do?guideSeq=30",
|
76 |
+
"https://archives.go.kr/next/newsearch/searchGuideDetail.do?guideSeq=64",
|
77 |
+
"https://archives.go.kr/next/newsearch/searchGuideDetail.do?guideSeq=321",
|
78 |
+
"https://archives.go.kr/next/newsearch/searchGuideDetail.do?guideSeq=124",
|
79 |
+
"https://archives.go.kr/next/newsearch/searchGuideDetail.do?guideSeq=267",
|
80 |
+
"https://archives.go.kr/next/newsearch/searchGuideDetail.do?guideSeq=141",
|
81 |
+
"https://archives.go.kr/next/newsearch/searchGuideDetail.do?guideSeq=149",
|
82 |
+
"https://archives.go.kr/next/newsearch/searchGuideDetail.do?guideSeq=22"
|
83 |
+
]
|
84 |
+
|
85 |
+
# ์น๋ฌธ์ ๋ก๋ฉ
|
86 |
+
loader = WebBaseLoader(web_paths=urls, bs_kwargs=dict(parse_only=bs4.SoupStrainer()))
|
87 |
+
docs = loader.load()
|
88 |
+
|
89 |
+
# ๋ฌธ์ ๋ถํ
|
90 |
+
splitter = CharacterTextSplitter(separator="\n", chunk_size=500, chunk_overlap=50)
|
91 |
+
split_docs = splitter.split_documents(docs)
|
92 |
+
|
93 |
+
# ์๋ฒ ๋ฉ ๋ฐ ๋ฒกํฐ ์ ์ฅ ๋ฐ ๋ฆฌํธ๋ฆฌ๋ฒ ์ค์
|
94 |
+
embedding_model = HuggingFaceEmbeddings(model_name="snunlp/KR-SBERT-V40K-klueNLI-augSTS")
|
95 |
+
vectorstore = FAISS.from_documents(split_docs, embedding_model)
|
96 |
+
retriever = vectorstore.as_retriever()
|
97 |
+
|
98 |
+
# LLM + QA ์ฒด์ธ
|
99 |
+
llm = ChatGroq(groq_api_key=groq_api_key, model_name="llama3-70b-8192")
|
100 |
+
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, chain_type="stuff")
|
101 |
+
|
102 |
+
# Gradio ์ฑํ
ํจ์
|
103 |
+
def chat_with_history(user_input, history):
|
104 |
+
if history is None:
|
105 |
+
history = []
|
106 |
+
query = user_input.strip() + " ํ๊ตญ์ด๋ก ๋ตํด์ฃผ์ธ์."
|
107 |
+
result = qa_chain({"query": query})
|
108 |
+
answer = result.get("result", "๋ต๋ณ์ ์ฐพ์ ์ ์์ต๋๋ค.")
|
109 |
+
history.append((user_input, answer))
|
110 |
+
return "", history, history
|
111 |
+
|
112 |
+
# Gradio ์ธํฐํ์ด์ค ๊ตฌ์ฑ
|
113 |
+
with gr.Blocks() as demo:
|
114 |
+
gr.Markdown("## ๐ ๊ตญ๊ฐ๊ธฐ๋ก์ ์ ๋ณด ์ฑ๋ด")
|
115 |
+
chatbot = gr.Chatbot(label="๊ธฐ๋ก์ ์ฑ๋ด")
|
116 |
+
msg = gr.Textbox(placeholder="์ง๋ฌธ์ ์
๋ ฅํ์ธ์", label="๐ฌ ์ง๋ฌธ ์
๋ ฅ")
|
117 |
+
state = gr.State([])
|
118 |
+
msg.submit(chat_with_history, inputs=[msg, state], outputs=[msg, chatbot, state])
|
119 |
+
|
120 |
+
demo.launch()
|