Upload folder using huggingface_hub
Browse files- .gitattributes +3 -0
- api/__pycache__/ask.cpython-312.pyc +0 -0
- api/__pycache__/generation.cpython-312.pyc +0 -0
- api/__pycache__/main.cpython-312.pyc +0 -0
- api/__pycache__/retrieval.cpython-312.pyc +0 -0
- api/ask.py +19 -0
- api/generation.py +21 -0
- api/main.py +34 -0
- api/retrieval.py +19 -0
- api/search.py +0 -0
- app.py +31 -45
- app_v1.py +45 -0
- data/index/docs.npy +3 -0
- data/index/exam_db/b2606c9f-ce01-45ef-b1f9-52c07138f09d/data_level0.bin +3 -0
- data/index/exam_db/b2606c9f-ce01-45ef-b1f9-52c07138f09d/header.bin +3 -0
- data/index/exam_db/b2606c9f-ce01-45ef-b1f9-52c07138f09d/index_metadata.pickle +3 -0
- data/index/exam_db/b2606c9f-ce01-45ef-b1f9-52c07138f09d/length.bin +3 -0
- data/index/exam_db/b2606c9f-ce01-45ef-b1f9-52c07138f09d/link_lists.bin +3 -0
- data/index/exam_db/chroma.sqlite3 +3 -0
- data/index/index.faiss +3 -0
- data/index/law_db/cb176df0-95bf-46b3-b7bc-bdbef0408cfb/data_level0.bin +3 -0
- data/index/law_db/cb176df0-95bf-46b3-b7bc-bdbef0408cfb/header.bin +3 -0
- data/index/law_db/cb176df0-95bf-46b3-b7bc-bdbef0408cfb/length.bin +3 -0
- data/index/law_db/cb176df0-95bf-46b3-b7bc-bdbef0408cfb/link_lists.bin +0 -0
- data/index/law_db/chroma.sqlite3 +3 -0
- data/index/law_db/f2c1d7dc-c9a9-4f2f-a66e-892865f49d38/data_level0.bin +3 -0
- data/index/law_db/f2c1d7dc-c9a9-4f2f-a66e-892865f49d38/header.bin +3 -0
- data/index/law_db/f2c1d7dc-c9a9-4f2f-a66e-892865f49d38/index_metadata.pickle +3 -0
- data/index/law_db/f2c1d7dc-c9a9-4f2f-a66e-892865f49d38/length.bin +3 -0
- data/index/law_db/f2c1d7dc-c9a9-4f2f-a66e-892865f49d38/link_lists.bin +3 -0
- generator/__pycache__/llm_inference.cpython-312.pyc +0 -0
- generator/__pycache__/prompt_builder.cpython-312.pyc +0 -0
- generator/llm_inference.py +27 -0
- generator/prompt_builder.py +19 -0
- requirements.txt +6 -6
- retriever/__pycache__/reranker.cpython-312.pyc +0 -0
- retriever/__pycache__/vectordb.cpython-312.pyc +0 -0
- retriever/__pycache__/vectordb_rerank.cpython-312.pyc +0 -0
- retriever/build_index.py +58 -0
- retriever/reranker.py +30 -0
- retriever/vectordb.py +50 -0
- retriever/vectordb_rerank.py +37 -0
- services/__pycache__/rag_pipeline.cpython-312.pyc +0 -0
- services/rag_pipeline.py +21 -0
.gitattributes
CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
data/index/exam_db/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
|
37 |
+
data/index/index.faiss filter=lfs diff=lfs merge=lfs -text
|
38 |
+
data/index/law_db/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
|
api/__pycache__/ask.cpython-312.pyc
ADDED
Binary file (1.13 kB). View file
|
|
api/__pycache__/generation.cpython-312.pyc
ADDED
Binary file (1.26 kB). View file
|
|
api/__pycache__/main.cpython-312.pyc
ADDED
Binary file (1.42 kB). View file
|
|
api/__pycache__/retrieval.cpython-312.pyc
ADDED
Binary file (1.16 kB). View file
|
|
api/ask.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import APIRouter
|
2 |
+
from pydantic import BaseModel
|
3 |
+
from services.rag_pipeline import rag_pipeline
|
4 |
+
|
5 |
+
router = APIRouter()
|
6 |
+
|
7 |
+
# ์์ฒญ ์คํค๋ง
|
8 |
+
class AskRequest(BaseModel):
|
9 |
+
query: str
|
10 |
+
top_k: int = 5
|
11 |
+
|
12 |
+
# ์๋ต ์คํค๋ง
|
13 |
+
class AskResponse(BaseModel):
|
14 |
+
output: str
|
15 |
+
|
16 |
+
@router.post("/ask", response_model=AskResponse)
|
17 |
+
async def ask(request: AskRequest):
|
18 |
+
output = rag_pipeline(request.query, top_k=request.top_k)
|
19 |
+
return {"output": output}
|
api/generation.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import APIRouter
|
2 |
+
from pydantic import BaseModel
|
3 |
+
from generator.prompt_builder import build_prompt
|
4 |
+
from generator.llm_inference import generate_answer
|
5 |
+
|
6 |
+
router = APIRouter()
|
7 |
+
|
8 |
+
# ์์ฒญ ์คํค๋ง
|
9 |
+
class GenerateRequest(BaseModel):
|
10 |
+
query: str
|
11 |
+
context_docs: list
|
12 |
+
|
13 |
+
# ์๋ต ์คํค๋ง
|
14 |
+
class GenerateResponse(BaseModel):
|
15 |
+
output: str
|
16 |
+
|
17 |
+
@router.post("/generate", response_model=GenerateResponse)
|
18 |
+
async def generate(request: GenerateRequest):
|
19 |
+
prompt = build_prompt(request.query, request.context_docs)
|
20 |
+
output = generate_answer(prompt)
|
21 |
+
return {"output": output}
|
api/main.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI
|
2 |
+
from fastapi.middleware.cors import CORSMiddleware
|
3 |
+
from api.retrieval import router as retrieval_router
|
4 |
+
from api.generation import router as generation_router
|
5 |
+
from api.ask import router as ask_router
|
6 |
+
|
7 |
+
app = FastAPI(
|
8 |
+
title="RAG ๊ธฐ๋ฐ ๋ฌธ์ ์ถ์ ์์คํ
",
|
9 |
+
description="๊ณต์ธ์ค๊ฐ์ฌ ์ํ ๋ฌธ์ ์๋ ์์ฑ API",
|
10 |
+
version="1.0.0",
|
11 |
+
)
|
12 |
+
|
13 |
+
# (์ ํ) CORS ์ค์ - ๋์ค์ Next.js ํ๋ก ํธ ์ฐ๊ฒฐํ ๋ ํ์ํจ
|
14 |
+
app.add_middleware(
|
15 |
+
CORSMiddleware,
|
16 |
+
allow_origins=["*"], # ์ค์ ๋ฐฐํฌ ๋๋ ํ๋ก ํธ ๋๋ฉ์ธ๋ง ํ์ฉ ์ถ์ฒ
|
17 |
+
allow_credentials=True,
|
18 |
+
allow_methods=["*"],
|
19 |
+
allow_headers=["*"],
|
20 |
+
)
|
21 |
+
|
22 |
+
# ํฌ์ค ์ฒดํฌ์ฉ ์๋ํฌ์ธํธ
|
23 |
+
@app.get("/health")
|
24 |
+
async def health_check():
|
25 |
+
return {"status": "ok"}
|
26 |
+
|
27 |
+
# ๊ธฐ๋ณธ ์ธ๋ฑ์ค
|
28 |
+
@app.get("/")
|
29 |
+
async def root():
|
30 |
+
return {"message": "Welcome to the RAG Question Generator API!"}
|
31 |
+
|
32 |
+
app.include_router(retrieval_router, prefix="/api")
|
33 |
+
app.include_router(generation_router, prefix="/api")
|
34 |
+
app.include_router(ask_router, prefix="/api")
|
api/retrieval.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import APIRouter
|
2 |
+
from pydantic import BaseModel
|
3 |
+
from retriever.vectordb import search_documents
|
4 |
+
|
5 |
+
router = APIRouter()
|
6 |
+
|
7 |
+
# ์์ฒญ๋ฐ์ ๋ฐ์ดํฐ ํฌ๋งท ์ ์
|
8 |
+
class SearchRequest(BaseModel):
|
9 |
+
query: str
|
10 |
+
top_k: int = 5 # ๊ธฐ๋ณธ์ ์ผ๋ก ์์ 5๊ฐ ๊ฒ์
|
11 |
+
|
12 |
+
# ์๋ต ๋ฐ์ดํฐ ํฌ๋งท ์ ์
|
13 |
+
class SearchResponse(BaseModel):
|
14 |
+
documents: list
|
15 |
+
|
16 |
+
@router.post("/search", response_model=SearchResponse)
|
17 |
+
async def search(request: SearchRequest):
|
18 |
+
docs = search_documents(request.query, top_k=request.top_k)
|
19 |
+
return {"documents": docs}
|
api/search.py
ADDED
File without changes
|
app.py
CHANGED
@@ -1,45 +1,31 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
import spaces
|
3 |
-
import torch
|
4 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer
|
5 |
-
|
6 |
-
model_name = "dasomaru/gemma-3-4bit-it-demo"
|
7 |
-
|
8 |
-
# ๐
|
9 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
trust_remote_code=True,
|
15 |
-
)
|
16 |
-
|
17 |
-
@spaces.GPU(duration=300)
|
18 |
-
def generate_response(prompt):
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
demo
|
32 |
-
demo.launch()
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
# zero = torch.Tensor([0]).cuda()
|
37 |
-
# print(zero.device) # <-- 'cpu' ๐ค
|
38 |
-
|
39 |
-
# @spaces.GPU
|
40 |
-
# def greet(n):
|
41 |
-
# print(zero.device) # <-- 'cuda:0' ๐ค
|
42 |
-
# return f"Hello {zero + n} Tensor"
|
43 |
-
|
44 |
-
# demo = gr.Interface(fn=greet, inputs=gr.Number(), outputs=gr.Text())
|
45 |
-
# demo.launch()
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import spaces
|
3 |
+
import torch
|
4 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
5 |
+
|
6 |
+
model_name = "dasomaru/gemma-3-4bit-it-demo"
|
7 |
+
|
8 |
+
# ๐ ๋ชจ๋ธ๊ณผ ํ ํฌ๋์ด์ ๋ฅผ ์๋ฒ ์์ ์ ํ๋ฒ๋ง ๋ถ๋ฌ์จ๋ค
|
9 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
10 |
+
model = AutoModelForCausalLM.from_pretrained(
|
11 |
+
model_name,
|
12 |
+
torch_dtype=torch.float16,
|
13 |
+
device_map="auto", # ZeroGPU์์๋ ์๋์ผ๋ก GPU ํ ๋น๋จ
|
14 |
+
trust_remote_code=True,
|
15 |
+
)
|
16 |
+
|
17 |
+
@spaces.GPU(duration=300)
|
18 |
+
def generate_response(prompt):
|
19 |
+
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
|
20 |
+
outputs = model.generate(
|
21 |
+
**inputs,
|
22 |
+
max_new_tokens=512,
|
23 |
+
temperature=0.7,
|
24 |
+
top_p=0.9,
|
25 |
+
top_k=50,
|
26 |
+
do_sample=True,
|
27 |
+
)
|
28 |
+
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
29 |
+
|
30 |
+
demo = gr.Interface(fn=generate_response, inputs="text", outputs="text")
|
31 |
+
demo.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app_v1.py
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import spaces
|
3 |
+
import torch
|
4 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
5 |
+
|
6 |
+
model_name = "dasomaru/gemma-3-4bit-it-demo"
|
7 |
+
|
8 |
+
# ๐ tokenizer๋ CPU์์๋ ๋ฏธ๋ฆฌ ๋ถ๋ฌ์ฌ ์ ์์
|
9 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
10 |
+
# ๐ model์ CPU๋ก๋ง ๋จผ์ ์ฌ๋ฆผ (GPU ์์ง ์์)
|
11 |
+
model = AutoModelForCausalLM.from_pretrained(
|
12 |
+
model_name,
|
13 |
+
torch_dtype=torch.float16, # 4bit model์ด๋๊น
|
14 |
+
trust_remote_code=True,
|
15 |
+
)
|
16 |
+
|
17 |
+
@spaces.GPU(duration=300)
|
18 |
+
def generate_response(prompt):
|
19 |
+
# ๋ชจ๋ธ ๋ฐ ํ ํฌ๋์ด์ ๋ก๋ฉ์ ํจ์ ๋ด๋ถ์์ ์ํ
|
20 |
+
tokenizer = AutoTokenizer.from_pretrained("dasomaru/gemma-3-4bit-it-demo")
|
21 |
+
model = AutoModelForCausalLM.from_pretrained("dasomaru/gemma-3-4bit-it-demo")
|
22 |
+
model.to("cuda")
|
23 |
+
|
24 |
+
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
|
25 |
+
outputs = model.generate(**inputs, max_new_tokens=512, temperature=0.7,
|
26 |
+
top_p=0.9,
|
27 |
+
top_k=50,
|
28 |
+
do_sample=True,)
|
29 |
+
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
30 |
+
|
31 |
+
demo = gr.Interface(fn=generate_response, inputs="text", outputs="text")
|
32 |
+
demo.launch()
|
33 |
+
|
34 |
+
|
35 |
+
|
36 |
+
# zero = torch.Tensor([0]).cuda()
|
37 |
+
# print(zero.device) # <-- 'cpu' ๐ค
|
38 |
+
|
39 |
+
# @spaces.GPU
|
40 |
+
# def greet(n):
|
41 |
+
# print(zero.device) # <-- 'cuda:0' ๐ค
|
42 |
+
# return f"Hello {zero + n} Tensor"
|
43 |
+
|
44 |
+
# demo = gr.Interface(fn=greet, inputs=gr.Number(), outputs=gr.Text())
|
45 |
+
# demo.launch()
|
data/index/docs.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:920adcfc1f2972387acc6ccbb28748cbd6fac18bfce18b593667bedaafd59a1f
|
3 |
+
size 3009728
|
data/index/exam_db/b2606c9f-ce01-45ef-b1f9-52c07138f09d/data_level0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ade3c2875730097a196c1411680ef487dd1a2d8c7a736a6e2686769cbccfb87
|
3 |
+
size 42360000
|
data/index/exam_db/b2606c9f-ce01-45ef-b1f9-52c07138f09d/header.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b83b314b04a5821a8b898c42848dee892ae700907983325de177d26e7cd0f27
|
3 |
+
size 100
|
data/index/exam_db/b2606c9f-ce01-45ef-b1f9-52c07138f09d/index_metadata.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2d69ef1a7671665a27366fbcef9b12bfafc5085dea30ec686dead0ba8e9970db
|
3 |
+
size 72200
|
data/index/exam_db/b2606c9f-ce01-45ef-b1f9-52c07138f09d/length.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0d8561f29f57731a703e7fa40308ff25bd0997bccbc44836a2c0e5e980263594
|
3 |
+
size 40000
|
data/index/exam_db/b2606c9f-ce01-45ef-b1f9-52c07138f09d/link_lists.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f20d900ea11271e836c4f3ac19fddf990a8f68ca09ddad8ff2ada76640fe5c3e
|
3 |
+
size 7580
|
data/index/exam_db/chroma.sqlite3
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab1270442e19db5a1c0ec0217101b32e3d5ce379d9cf0a4278f7b4edac2489fb
|
3 |
+
size 14610432
|
data/index/index.faiss
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:afd28a8ec27caf932aa2f40eadc4d2213567db1ff3aaae320fdb707c9bcf82f4
|
3 |
+
size 3379245
|
data/index/law_db/cb176df0-95bf-46b3-b7bc-bdbef0408cfb/data_level0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2679902f7ee9902bd54e85a1e4b822cccb4a163c0d49ae93b57d42d40edf49d0
|
3 |
+
size 42360000
|
data/index/law_db/cb176df0-95bf-46b3-b7bc-bdbef0408cfb/header.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f14d42069445548e1fceb9acb767255a21e1e9d11c021b2d5999d5cbf4d2b705
|
3 |
+
size 100
|
data/index/law_db/cb176df0-95bf-46b3-b7bc-bdbef0408cfb/length.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:decfb869a98e71a64885be65a562bd9aaa173e8b0008338a9fc149c91527113c
|
3 |
+
size 40000
|
data/index/law_db/cb176df0-95bf-46b3-b7bc-bdbef0408cfb/link_lists.bin
ADDED
File without changes
|
data/index/law_db/chroma.sqlite3
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6dbbf1eed4fb2a85649ef2d22fdce84b1c10a268a59279dbb4a9e0d8141e1e55
|
3 |
+
size 38465536
|
data/index/law_db/f2c1d7dc-c9a9-4f2f-a66e-892865f49d38/data_level0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b200c007a3efa458f364e90f4f2fc57a51d3800cdc7b9da0a472e3c2be0f516
|
3 |
+
size 42360000
|
data/index/law_db/f2c1d7dc-c9a9-4f2f-a66e-892865f49d38/header.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8998d4105cfc3bb8d0767c2d5f5d5b862eebf1e18753cc27a8e1edabbd16e5dd
|
3 |
+
size 100
|
data/index/law_db/f2c1d7dc-c9a9-4f2f-a66e-892865f49d38/index_metadata.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7be6b19c0dea5d3a3ade705a1de0afd97ad2ea0098fdd90c2ffaae8548079944
|
3 |
+
size 266000
|
data/index/law_db/f2c1d7dc-c9a9-4f2f-a66e-892865f49d38/length.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:73f7b190c3b790c39f0c2dd1fec9bfcc844a5ec5d6b5b77c035bb627e8683fce
|
3 |
+
size 40000
|
data/index/law_db/f2c1d7dc-c9a9-4f2f-a66e-892865f49d38/link_lists.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba2a1c508243b08e82d3aea180a7acb8537c8eab5d0e7670bd32dc0299b60962
|
3 |
+
size 26124
|
generator/__pycache__/llm_inference.cpython-312.pyc
ADDED
Binary file (1.01 kB). View file
|
|
generator/__pycache__/prompt_builder.cpython-312.pyc
ADDED
Binary file (901 Bytes). View file
|
|
generator/llm_inference.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import pipeline
|
2 |
+
|
3 |
+
# 1. ๋ชจ๋ธ ๋ก๋ (์ต์ด 1๋ฒ๋ง ๋ก๋๋จ)
|
4 |
+
generator = pipeline(
|
5 |
+
"text-generation",
|
6 |
+
model="dasomaru/gemma-3-4bit-it-demo", # ๋ค๊ฐ ์
๋ก๋ํ ๋ชจ๋ธ ์ด๋ฆ
|
7 |
+
tokenizer="dasomaru/gemma-3-4bit-it-demo",
|
8 |
+
device=0, # CUDA:0 ์ฌ์ฉ (GPU). CPU๋ง ์์ผ๋ฉด device=-1
|
9 |
+
max_new_tokens=512,
|
10 |
+
temperature=0.7,
|
11 |
+
top_p=0.9,
|
12 |
+
repetition_penalty=1.1
|
13 |
+
)
|
14 |
+
|
15 |
+
# 2. ๋ต๋ณ ์์ฑ ํจ์
|
16 |
+
def generate_answer(prompt: str) -> str:
|
17 |
+
"""
|
18 |
+
์
๋ ฅ๋ฐ์ ํ๋กฌํํธ๋ก๋ถํฐ ๋ชจ๋ธ์ด ๋ต๋ณ์ ์์ฑํ๋ค.
|
19 |
+
"""
|
20 |
+
print(f"๐ต Prompt Length: {len(prompt)} characters") # ์ถ๊ฐ!
|
21 |
+
outputs = generator(
|
22 |
+
prompt,
|
23 |
+
do_sample=True,
|
24 |
+
top_k=50,
|
25 |
+
num_return_sequences=1
|
26 |
+
)
|
27 |
+
return outputs[0]["generated_text"].strip()
|
generator/prompt_builder.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
def build_prompt(query: str, context_docs: list) -> str:
|
2 |
+
"""
|
3 |
+
์ฌ์ฉ์ ์ง๋ฌธ๊ณผ ๊ฒ์๋ ๋ฌธ์๋ค์ ์กฐํฉํด LLM ์
๋ ฅ์ฉ ํ๋กฌํํธ๋ฅผ ๋ง๋ ๋ค.
|
4 |
+
"""
|
5 |
+
context_text = "\n".join([f"- {doc}" for doc in context_docs])
|
6 |
+
|
7 |
+
prompt = f"""๋น์ ์ ๊ณต์ธ์ค๊ฐ์ฌ ์ํ ๋ฌธ์ ์ถ์ ์ ๋ฌธ๊ฐ์
๋๋ค.
|
8 |
+
|
9 |
+
๋ค์์ ๊ธฐ์ถ ๋ฌธ์ ๋ฐ ๊ด๋ จ ๋ฒ๋ น ์ ๋ณด์
๋๋ค:
|
10 |
+
{context_text}
|
11 |
+
|
12 |
+
์ด ์ ๋ณด๋ฅผ ์ฐธ๊ณ ํ์ฌ ์ฌ์ฉ์์ ์์ฒญ์ ๋ต๋ณํด ์ฃผ์ธ์.
|
13 |
+
|
14 |
+
[์ง๋ฌธ]
|
15 |
+
{query}
|
16 |
+
|
17 |
+
[๋ต๋ณ]
|
18 |
+
"""
|
19 |
+
return prompt
|
requirements.txt
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
-
gradio
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
|
|
1 |
+
gradio
|
2 |
+
torch
|
3 |
+
transformers
|
4 |
+
sentence-transformers
|
5 |
+
faiss-cpu
|
6 |
+
tqdm
|
retriever/__pycache__/reranker.cpython-312.pyc
ADDED
Binary file (1.73 kB). View file
|
|
retriever/__pycache__/vectordb.cpython-312.pyc
ADDED
Binary file (1.77 kB). View file
|
|
retriever/__pycache__/vectordb_rerank.cpython-312.pyc
ADDED
Binary file (1.85 kB). View file
|
|
retriever/build_index.py
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import json
|
3 |
+
import faiss
|
4 |
+
import numpy as np
|
5 |
+
from sentence_transformers import SentenceTransformer
|
6 |
+
from tqdm import tqdm
|
7 |
+
|
8 |
+
# 1. ๋ฐ์ดํฐ ๊ฒฝ๋ก ์ค์
|
9 |
+
source_paths = [
|
10 |
+
r"data/real_estate_agent/raw/past_papers/brokerage_law.jsonl",
|
11 |
+
r"data/real_estate_agent/raw/past_papers/civil_law.jsonl",
|
12 |
+
r"data/real_estate_agent/raw/past_papers/disclosure_taxation.jsonl",
|
13 |
+
r"data/real_estate_agent/raw/past_papers/introduction.jsonl",
|
14 |
+
r"data/real_estate_agent/raw/past_papers/public_law.jsonl",
|
15 |
+
]
|
16 |
+
|
17 |
+
INDEX_PATH = "data/index/index.faiss"
|
18 |
+
DOCS_PATH = "data/index/docs.npy"
|
19 |
+
|
20 |
+
# 2. ์๋ฒ ๋ฉ ๋ชจ๋ธ ๋ก๋
|
21 |
+
embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
|
22 |
+
|
23 |
+
def init_faiss():
|
24 |
+
questions = []
|
25 |
+
|
26 |
+
# 3. JSONL ํ์ผ ์ฝ๊ธฐ
|
27 |
+
for path in source_paths:
|
28 |
+
with open(path, "r", encoding="utf-8") as f:
|
29 |
+
for line in f:
|
30 |
+
data = json.loads(line)
|
31 |
+
question_text = data.get("question", "")
|
32 |
+
if question_text: # ์ง๋ฌธ์ด ๋น์ด์์ง ์์ผ๋ฉด ์ถ๊ฐ
|
33 |
+
questions.append(question_text)
|
34 |
+
|
35 |
+
print(f"โ
์ด {len(questions)}๊ฐ ์ง๋ฌธ ๋ก๋ฉ ์๋ฃ")
|
36 |
+
|
37 |
+
# 4. ์๋ฒ ๋ฉ ์์ฑ
|
38 |
+
embeddings = embedding_model.encode(
|
39 |
+
questions,
|
40 |
+
batch_size=32,
|
41 |
+
show_progress_bar=True
|
42 |
+
)
|
43 |
+
embeddings = np.array(embeddings).astype('float32')
|
44 |
+
|
45 |
+
# 5. FAISS ์ธ๋ฑ์ค ์์ฑ
|
46 |
+
dimension = embeddings.shape[1]
|
47 |
+
index = faiss.IndexFlatL2(dimension) # L2 ๊ฑฐ๋ฆฌ ๊ธฐ๋ฐ ์ธ๋ฑ์ค
|
48 |
+
index.add(embeddings)
|
49 |
+
|
50 |
+
# 6. ์ ์ฅ
|
51 |
+
os.makedirs(os.path.dirname(INDEX_PATH), exist_ok=True)
|
52 |
+
faiss.write_index(index, INDEX_PATH)
|
53 |
+
np.save(DOCS_PATH, questions)
|
54 |
+
|
55 |
+
print(f"โ
FAISS ์ธ๋ฑ์ค์ ๋ฌธ์ ์ ์ฅ ์๋ฃ!")
|
56 |
+
|
57 |
+
if __name__ == "__main__":
|
58 |
+
init_faiss()
|
retriever/reranker.py
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
2 |
+
import torch
|
3 |
+
|
4 |
+
# 1. Reranker ๋ชจ๋ธ ๋ก๋ฉ
|
5 |
+
reranker_tokenizer = AutoTokenizer.from_pretrained("BAAI/bge-reranker-base")
|
6 |
+
reranker_model = AutoModelForSequenceClassification.from_pretrained("BAAI/bge-reranker-base")
|
7 |
+
|
8 |
+
def rerank_documents(query: str, docs: list, top_k: int = 5) -> list:
|
9 |
+
"""
|
10 |
+
๊ฒ์๋ ๋ฌธ์ ๋ฆฌ์คํธ๋ฅผ Query์ ๋น๊ตํด์ relevance ์์๋ก ์ฌ์ ๋ ฌํ๋ค.
|
11 |
+
"""
|
12 |
+
pairs = [(query, doc) for doc in docs]
|
13 |
+
|
14 |
+
inputs = reranker_tokenizer.batch_encode_plus(
|
15 |
+
pairs,
|
16 |
+
padding=True,
|
17 |
+
truncation=True,
|
18 |
+
return_tensors="pt",
|
19 |
+
max_length=512
|
20 |
+
)
|
21 |
+
|
22 |
+
with torch.no_grad():
|
23 |
+
scores = reranker_model(**inputs).logits.squeeze(-1) # (batch_size,)
|
24 |
+
|
25 |
+
scores = scores.tolist()
|
26 |
+
|
27 |
+
# ์ ์ ๋์ ์์๋๋ก ์ ๋ ฌ
|
28 |
+
sorted_docs = [doc for _, doc in sorted(zip(scores, docs), key=lambda x: x[0], reverse=True)]
|
29 |
+
|
30 |
+
return sorted_docs[:top_k]
|
retriever/vectordb.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import faiss
|
2 |
+
import numpy as np
|
3 |
+
import os
|
4 |
+
from sentence_transformers import SentenceTransformer
|
5 |
+
from retriever.reranker import rerank_documents
|
6 |
+
|
7 |
+
# 1. ์๋ฒ ๋ฉ ๋ชจ๋ธ ๋ก๋
|
8 |
+
embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
|
9 |
+
|
10 |
+
# 2. ๋ฒกํฐDB (FAISS Index) ์ด๊ธฐํ
|
11 |
+
INDEX_PATH = "data/index/index.faiss"
|
12 |
+
DOCS_PATH = "data/index/docs.npy"
|
13 |
+
|
14 |
+
if os.path.exists(INDEX_PATH) and os.path.exists(DOCS_PATH):
|
15 |
+
index = faiss.read_index(INDEX_PATH)
|
16 |
+
documents = np.load(DOCS_PATH, allow_pickle=True)
|
17 |
+
else:
|
18 |
+
index = None
|
19 |
+
documents = None
|
20 |
+
print("No FAISS index or docs found. Please build the index first.")
|
21 |
+
|
22 |
+
# 3. ๊ฒ์ ํจ์
|
23 |
+
def search_documents(query: str, top_k: int = 5):
|
24 |
+
if index is None or documents is None:
|
25 |
+
raise ValueError("Index or documents not loaded. Build the FAISS index first.")
|
26 |
+
|
27 |
+
query_vector = embedding_model.encode([query])
|
28 |
+
query_vector = np.array(query_vector).astype('float32')
|
29 |
+
|
30 |
+
distances, indices = index.search(query_vector, top_k)
|
31 |
+
results = []
|
32 |
+
|
33 |
+
for idx in indices[0]:
|
34 |
+
if idx < len(documents):
|
35 |
+
results.append(documents[idx])
|
36 |
+
|
37 |
+
return results
|
38 |
+
|
39 |
+
# # 1. Rough FAISS ๊ฒ์
|
40 |
+
# query_embedding = embedding_model.encode([query], convert_to_tensor=True).cpu().detach().numpy()
|
41 |
+
# distances, indices = index.search(query_embedding, top_k)
|
42 |
+
# results = [documents[idx] for idx in indices[0] if idx != -1]
|
43 |
+
|
44 |
+
|
45 |
+
# # 2. ์ ๋ฐ Reranking
|
46 |
+
# reranked_results = rerank_documents(query, results, top_k=top_k)
|
47 |
+
|
48 |
+
# return reranked_results
|
49 |
+
|
50 |
+
|
retriever/vectordb_rerank.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import faiss
|
2 |
+
import numpy as np
|
3 |
+
import os
|
4 |
+
from sentence_transformers import SentenceTransformer
|
5 |
+
from retriever.reranker import rerank_documents
|
6 |
+
|
7 |
+
# 1. ์๋ฒ ๋ฉ ๋ชจ๋ธ ๋ก๋
|
8 |
+
embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
|
9 |
+
|
10 |
+
# 2. ๋ฒกํฐDB (FAISS Index) ์ด๊ธฐํ
|
11 |
+
INDEX_PATH = "data/index/index.faiss"
|
12 |
+
DOCS_PATH = "data/index/docs.npy"
|
13 |
+
|
14 |
+
if os.path.exists(INDEX_PATH) and os.path.exists(DOCS_PATH):
|
15 |
+
index = faiss.read_index(INDEX_PATH)
|
16 |
+
documents = np.load(DOCS_PATH, allow_pickle=True)
|
17 |
+
else:
|
18 |
+
index = None
|
19 |
+
documents = None
|
20 |
+
print("No FAISS index or docs found. Please build the index first.")
|
21 |
+
|
22 |
+
# 3. ๊ฒ์ ํจ์
|
23 |
+
def search_documents(query: str, top_k: int = 5):
|
24 |
+
if index is None or documents is None:
|
25 |
+
raise ValueError("Index or documents not loaded. Build the FAISS index first.")
|
26 |
+
|
27 |
+
# 1. FAISS rough ๊ฒ์
|
28 |
+
query_embedding = embedding_model.encode([query], convert_to_tensor=True).cpu().detach().numpy()
|
29 |
+
distances, indices = index.search(query_embedding, top_k)
|
30 |
+
results = [documents[idx] for idx in indices[0] if idx != -1]
|
31 |
+
|
32 |
+
# 2. Reranking ์ ์ฉ
|
33 |
+
reranked_results = rerank_documents(query, results, top_k=top_k)
|
34 |
+
|
35 |
+
return reranked_results
|
36 |
+
|
37 |
+
|
services/__pycache__/rag_pipeline.cpython-312.pyc
ADDED
Binary file (857 Bytes). View file
|
|
services/rag_pipeline.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# from retriever.vectordb import search_documents
|
2 |
+
from retriever.vectordb_rerank import search_documents
|
3 |
+
from generator.prompt_builder import build_prompt
|
4 |
+
from generator.llm_inference import generate_answer
|
5 |
+
|
6 |
+
def rag_pipeline(query: str, top_k: int = 5) -> str:
|
7 |
+
"""
|
8 |
+
1. ์ฌ์ฉ์ ์ง๋ฌธ์ผ๋ก ๊ด๋ จ ๋ฌธ์๋ฅผ ๊ฒ์
|
9 |
+
2. ๊ฒ์๋ ๋ฌธ์์ ํจ๊ป ํ๋กฌํํธ ๊ตฌ์ฑ
|
10 |
+
3. ํ๋กฌํํธ๋ก๋ถํฐ ๋ต๋ณ ์์ฑ
|
11 |
+
"""
|
12 |
+
# 1. ๊ฒ์
|
13 |
+
context_docs = search_documents(query, top_k=top_k)
|
14 |
+
|
15 |
+
# 2. ํ๋กฌํํธ ์กฐ๋ฆฝ
|
16 |
+
prompt = build_prompt(query, context_docs)
|
17 |
+
|
18 |
+
# 3. ๋ชจ๋ธ ์ถ๋ก
|
19 |
+
output = generate_answer(prompt)
|
20 |
+
|
21 |
+
return output
|