dasomaru commited on
Commit
9b14ff1
ยท
verified ยท
1 Parent(s): 901a928

Upload folder using huggingface_hub

Browse files
Files changed (44) hide show
  1. .gitattributes +3 -0
  2. api/__pycache__/ask.cpython-312.pyc +0 -0
  3. api/__pycache__/generation.cpython-312.pyc +0 -0
  4. api/__pycache__/main.cpython-312.pyc +0 -0
  5. api/__pycache__/retrieval.cpython-312.pyc +0 -0
  6. api/ask.py +19 -0
  7. api/generation.py +21 -0
  8. api/main.py +34 -0
  9. api/retrieval.py +19 -0
  10. api/search.py +0 -0
  11. app.py +31 -45
  12. app_v1.py +45 -0
  13. data/index/docs.npy +3 -0
  14. data/index/exam_db/b2606c9f-ce01-45ef-b1f9-52c07138f09d/data_level0.bin +3 -0
  15. data/index/exam_db/b2606c9f-ce01-45ef-b1f9-52c07138f09d/header.bin +3 -0
  16. data/index/exam_db/b2606c9f-ce01-45ef-b1f9-52c07138f09d/index_metadata.pickle +3 -0
  17. data/index/exam_db/b2606c9f-ce01-45ef-b1f9-52c07138f09d/length.bin +3 -0
  18. data/index/exam_db/b2606c9f-ce01-45ef-b1f9-52c07138f09d/link_lists.bin +3 -0
  19. data/index/exam_db/chroma.sqlite3 +3 -0
  20. data/index/index.faiss +3 -0
  21. data/index/law_db/cb176df0-95bf-46b3-b7bc-bdbef0408cfb/data_level0.bin +3 -0
  22. data/index/law_db/cb176df0-95bf-46b3-b7bc-bdbef0408cfb/header.bin +3 -0
  23. data/index/law_db/cb176df0-95bf-46b3-b7bc-bdbef0408cfb/length.bin +3 -0
  24. data/index/law_db/cb176df0-95bf-46b3-b7bc-bdbef0408cfb/link_lists.bin +0 -0
  25. data/index/law_db/chroma.sqlite3 +3 -0
  26. data/index/law_db/f2c1d7dc-c9a9-4f2f-a66e-892865f49d38/data_level0.bin +3 -0
  27. data/index/law_db/f2c1d7dc-c9a9-4f2f-a66e-892865f49d38/header.bin +3 -0
  28. data/index/law_db/f2c1d7dc-c9a9-4f2f-a66e-892865f49d38/index_metadata.pickle +3 -0
  29. data/index/law_db/f2c1d7dc-c9a9-4f2f-a66e-892865f49d38/length.bin +3 -0
  30. data/index/law_db/f2c1d7dc-c9a9-4f2f-a66e-892865f49d38/link_lists.bin +3 -0
  31. generator/__pycache__/llm_inference.cpython-312.pyc +0 -0
  32. generator/__pycache__/prompt_builder.cpython-312.pyc +0 -0
  33. generator/llm_inference.py +27 -0
  34. generator/prompt_builder.py +19 -0
  35. requirements.txt +6 -6
  36. retriever/__pycache__/reranker.cpython-312.pyc +0 -0
  37. retriever/__pycache__/vectordb.cpython-312.pyc +0 -0
  38. retriever/__pycache__/vectordb_rerank.cpython-312.pyc +0 -0
  39. retriever/build_index.py +58 -0
  40. retriever/reranker.py +30 -0
  41. retriever/vectordb.py +50 -0
  42. retriever/vectordb_rerank.py +37 -0
  43. services/__pycache__/rag_pipeline.cpython-312.pyc +0 -0
  44. services/rag_pipeline.py +21 -0
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ data/index/exam_db/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
37
+ data/index/index.faiss filter=lfs diff=lfs merge=lfs -text
38
+ data/index/law_db/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
api/__pycache__/ask.cpython-312.pyc ADDED
Binary file (1.13 kB). View file
 
api/__pycache__/generation.cpython-312.pyc ADDED
Binary file (1.26 kB). View file
 
api/__pycache__/main.cpython-312.pyc ADDED
Binary file (1.42 kB). View file
 
api/__pycache__/retrieval.cpython-312.pyc ADDED
Binary file (1.16 kB). View file
 
api/ask.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+ from pydantic import BaseModel
3
+ from services.rag_pipeline import rag_pipeline
4
+
5
+ router = APIRouter()
6
+
7
+ # ์š”์ฒญ ์Šคํ‚ค๋งˆ
8
+ class AskRequest(BaseModel):
9
+ query: str
10
+ top_k: int = 5
11
+
12
+ # ์‘๋‹ต ์Šคํ‚ค๋งˆ
13
+ class AskResponse(BaseModel):
14
+ output: str
15
+
16
+ @router.post("/ask", response_model=AskResponse)
17
+ async def ask(request: AskRequest):
18
+ output = rag_pipeline(request.query, top_k=request.top_k)
19
+ return {"output": output}
api/generation.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+ from pydantic import BaseModel
3
+ from generator.prompt_builder import build_prompt
4
+ from generator.llm_inference import generate_answer
5
+
6
+ router = APIRouter()
7
+
8
+ # ์š”์ฒญ ์Šคํ‚ค๋งˆ
9
+ class GenerateRequest(BaseModel):
10
+ query: str
11
+ context_docs: list
12
+
13
+ # ์‘๋‹ต ์Šคํ‚ค๋งˆ
14
+ class GenerateResponse(BaseModel):
15
+ output: str
16
+
17
+ @router.post("/generate", response_model=GenerateResponse)
18
+ async def generate(request: GenerateRequest):
19
+ prompt = build_prompt(request.query, request.context_docs)
20
+ output = generate_answer(prompt)
21
+ return {"output": output}
api/main.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from api.retrieval import router as retrieval_router
4
+ from api.generation import router as generation_router
5
+ from api.ask import router as ask_router
6
+
7
+ app = FastAPI(
8
+ title="RAG ๊ธฐ๋ฐ˜ ๋ฌธ์ œ ์ถœ์ œ ์‹œ์Šคํ…œ",
9
+ description="๊ณต์ธ์ค‘๊ฐœ์‚ฌ ์‹œํ—˜ ๋ฌธ์ œ ์ž๋™ ์ƒ์„ฑ API",
10
+ version="1.0.0",
11
+ )
12
+
13
+ # (์„ ํƒ) CORS ์„ค์ • - ๋‚˜์ค‘์— Next.js ํ”„๋ก ํŠธ ์—ฐ๊ฒฐํ•  ๋•Œ ํ•„์š”ํ•จ
14
+ app.add_middleware(
15
+ CORSMiddleware,
16
+ allow_origins=["*"], # ์‹ค์ œ ๋ฐฐํฌ ๋•Œ๋Š” ํ”„๋ก ํŠธ ๋„๋ฉ”์ธ๋งŒ ํ—ˆ์šฉ ์ถ”์ฒœ
17
+ allow_credentials=True,
18
+ allow_methods=["*"],
19
+ allow_headers=["*"],
20
+ )
21
+
22
+ # ํ—ฌ์Šค ์ฒดํฌ์šฉ ์—”๋“œํฌ์ธํŠธ
23
+ @app.get("/health")
24
+ async def health_check():
25
+ return {"status": "ok"}
26
+
27
+ # ๊ธฐ๋ณธ ์ธ๋ฑ์Šค
28
+ @app.get("/")
29
+ async def root():
30
+ return {"message": "Welcome to the RAG Question Generator API!"}
31
+
32
+ app.include_router(retrieval_router, prefix="/api")
33
+ app.include_router(generation_router, prefix="/api")
34
+ app.include_router(ask_router, prefix="/api")
api/retrieval.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+ from pydantic import BaseModel
3
+ from retriever.vectordb import search_documents
4
+
5
+ router = APIRouter()
6
+
7
+ # ์š”์ฒญ๋ฐ›์„ ๋ฐ์ดํ„ฐ ํฌ๋งท ์ •์˜
8
+ class SearchRequest(BaseModel):
9
+ query: str
10
+ top_k: int = 5 # ๊ธฐ๋ณธ์ ์œผ๋กœ ์ƒ์œ„ 5๊ฐœ ๊ฒ€์ƒ‰
11
+
12
+ # ์‘๋‹ต ๋ฐ์ดํ„ฐ ํฌ๋งท ์ •์˜
13
+ class SearchResponse(BaseModel):
14
+ documents: list
15
+
16
+ @router.post("/search", response_model=SearchResponse)
17
+ async def search(request: SearchRequest):
18
+ docs = search_documents(request.query, top_k=request.top_k)
19
+ return {"documents": docs}
api/search.py ADDED
File without changes
app.py CHANGED
@@ -1,45 +1,31 @@
1
- import gradio as gr
2
- import spaces
3
- import torch
4
- from transformers import AutoModelForCausalLM, AutoTokenizer
5
-
6
- model_name = "dasomaru/gemma-3-4bit-it-demo"
7
-
8
- # ๐Ÿš€ tokenizer๋Š” CPU์—์„œ๋„ ๋ฏธ๋ฆฌ ๋ถˆ๋Ÿฌ์˜ฌ ์ˆ˜ ์žˆ์Œ
9
- tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
10
- # ๐Ÿš€ model์€ CPU๋กœ๋งŒ ๋จผ์ € ์˜ฌ๋ฆผ (GPU ์•„์ง ์—†์Œ)
11
- model = AutoModelForCausalLM.from_pretrained(
12
- model_name,
13
- torch_dtype=torch.float16, # 4bit model์ด๋‹ˆ๊นŒ
14
- trust_remote_code=True,
15
- )
16
-
17
- @spaces.GPU(duration=300)
18
- def generate_response(prompt):
19
- # ๋ชจ๋ธ ๋ฐ ํ† ํฌ๋‚˜์ด์ € ๋กœ๋”ฉ์€ ํ•จ์ˆ˜ ๋‚ด๋ถ€์—์„œ ์ˆ˜ํ–‰
20
- tokenizer = AutoTokenizer.from_pretrained("dasomaru/gemma-3-4bit-it-demo")
21
- model = AutoModelForCausalLM.from_pretrained("dasomaru/gemma-3-4bit-it-demo")
22
- model.to("cuda")
23
-
24
- inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
25
- outputs = model.generate(**inputs, max_new_tokens=512, temperature=0.7,
26
- top_p=0.9,
27
- top_k=50,
28
- do_sample=True,)
29
- return tokenizer.decode(outputs[0], skip_special_tokens=True)
30
-
31
- demo = gr.Interface(fn=generate_response, inputs="text", outputs="text")
32
- demo.launch()
33
-
34
-
35
-
36
- # zero = torch.Tensor([0]).cuda()
37
- # print(zero.device) # <-- 'cpu' ๐Ÿค”
38
-
39
- # @spaces.GPU
40
- # def greet(n):
41
- # print(zero.device) # <-- 'cuda:0' ๐Ÿค—
42
- # return f"Hello {zero + n} Tensor"
43
-
44
- # demo = gr.Interface(fn=greet, inputs=gr.Number(), outputs=gr.Text())
45
- # demo.launch()
 
1
+ import gradio as gr
2
+ import spaces
3
+ import torch
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer
5
+
6
+ model_name = "dasomaru/gemma-3-4bit-it-demo"
7
+
8
+ # ๐Ÿš€ ๋ชจ๋ธ๊ณผ ํ† ํฌ๋‚˜์ด์ €๋ฅผ ์„œ๋ฒ„ ์‹œ์ž‘ ์‹œ ํ•œ๋ฒˆ๋งŒ ๋ถˆ๋Ÿฌ์˜จ๋‹ค
9
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
10
+ model = AutoModelForCausalLM.from_pretrained(
11
+ model_name,
12
+ torch_dtype=torch.float16,
13
+ device_map="auto", # ZeroGPU์—์„œ๋Š” ์ž๋™์œผ๋กœ GPU ํ• ๋‹น๋จ
14
+ trust_remote_code=True,
15
+ )
16
+
17
+ @spaces.GPU(duration=300)
18
+ def generate_response(prompt):
19
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
20
+ outputs = model.generate(
21
+ **inputs,
22
+ max_new_tokens=512,
23
+ temperature=0.7,
24
+ top_p=0.9,
25
+ top_k=50,
26
+ do_sample=True,
27
+ )
28
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
29
+
30
+ demo = gr.Interface(fn=generate_response, inputs="text", outputs="text")
31
+ demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app_v1.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import spaces
3
+ import torch
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer
5
+
6
+ model_name = "dasomaru/gemma-3-4bit-it-demo"
7
+
8
+ # ๐Ÿš€ tokenizer๋Š” CPU์—์„œ๋„ ๋ฏธ๋ฆฌ ๋ถˆ๋Ÿฌ์˜ฌ ์ˆ˜ ์žˆ์Œ
9
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
10
+ # ๐Ÿš€ model์€ CPU๋กœ๋งŒ ๋จผ์ € ์˜ฌ๋ฆผ (GPU ์•„์ง ์—†์Œ)
11
+ model = AutoModelForCausalLM.from_pretrained(
12
+ model_name,
13
+ torch_dtype=torch.float16, # 4bit model์ด๋‹ˆ๊นŒ
14
+ trust_remote_code=True,
15
+ )
16
+
17
+ @spaces.GPU(duration=300)
18
+ def generate_response(prompt):
19
+ # ๋ชจ๋ธ ๋ฐ ํ† ํฌ๋‚˜์ด์ € ๋กœ๋”ฉ์€ ํ•จ์ˆ˜ ๋‚ด๋ถ€์—์„œ ์ˆ˜ํ–‰
20
+ tokenizer = AutoTokenizer.from_pretrained("dasomaru/gemma-3-4bit-it-demo")
21
+ model = AutoModelForCausalLM.from_pretrained("dasomaru/gemma-3-4bit-it-demo")
22
+ model.to("cuda")
23
+
24
+ inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
25
+ outputs = model.generate(**inputs, max_new_tokens=512, temperature=0.7,
26
+ top_p=0.9,
27
+ top_k=50,
28
+ do_sample=True,)
29
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
30
+
31
+ demo = gr.Interface(fn=generate_response, inputs="text", outputs="text")
32
+ demo.launch()
33
+
34
+
35
+
36
+ # zero = torch.Tensor([0]).cuda()
37
+ # print(zero.device) # <-- 'cpu' ๐Ÿค”
38
+
39
+ # @spaces.GPU
40
+ # def greet(n):
41
+ # print(zero.device) # <-- 'cuda:0' ๐Ÿค—
42
+ # return f"Hello {zero + n} Tensor"
43
+
44
+ # demo = gr.Interface(fn=greet, inputs=gr.Number(), outputs=gr.Text())
45
+ # demo.launch()
data/index/docs.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:920adcfc1f2972387acc6ccbb28748cbd6fac18bfce18b593667bedaafd59a1f
3
+ size 3009728
data/index/exam_db/b2606c9f-ce01-45ef-b1f9-52c07138f09d/data_level0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ade3c2875730097a196c1411680ef487dd1a2d8c7a736a6e2686769cbccfb87
3
+ size 42360000
data/index/exam_db/b2606c9f-ce01-45ef-b1f9-52c07138f09d/header.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b83b314b04a5821a8b898c42848dee892ae700907983325de177d26e7cd0f27
3
+ size 100
data/index/exam_db/b2606c9f-ce01-45ef-b1f9-52c07138f09d/index_metadata.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d69ef1a7671665a27366fbcef9b12bfafc5085dea30ec686dead0ba8e9970db
3
+ size 72200
data/index/exam_db/b2606c9f-ce01-45ef-b1f9-52c07138f09d/length.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d8561f29f57731a703e7fa40308ff25bd0997bccbc44836a2c0e5e980263594
3
+ size 40000
data/index/exam_db/b2606c9f-ce01-45ef-b1f9-52c07138f09d/link_lists.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f20d900ea11271e836c4f3ac19fddf990a8f68ca09ddad8ff2ada76640fe5c3e
3
+ size 7580
data/index/exam_db/chroma.sqlite3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab1270442e19db5a1c0ec0217101b32e3d5ce379d9cf0a4278f7b4edac2489fb
3
+ size 14610432
data/index/index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afd28a8ec27caf932aa2f40eadc4d2213567db1ff3aaae320fdb707c9bcf82f4
3
+ size 3379245
data/index/law_db/cb176df0-95bf-46b3-b7bc-bdbef0408cfb/data_level0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2679902f7ee9902bd54e85a1e4b822cccb4a163c0d49ae93b57d42d40edf49d0
3
+ size 42360000
data/index/law_db/cb176df0-95bf-46b3-b7bc-bdbef0408cfb/header.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f14d42069445548e1fceb9acb767255a21e1e9d11c021b2d5999d5cbf4d2b705
3
+ size 100
data/index/law_db/cb176df0-95bf-46b3-b7bc-bdbef0408cfb/length.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:decfb869a98e71a64885be65a562bd9aaa173e8b0008338a9fc149c91527113c
3
+ size 40000
data/index/law_db/cb176df0-95bf-46b3-b7bc-bdbef0408cfb/link_lists.bin ADDED
File without changes
data/index/law_db/chroma.sqlite3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6dbbf1eed4fb2a85649ef2d22fdce84b1c10a268a59279dbb4a9e0d8141e1e55
3
+ size 38465536
data/index/law_db/f2c1d7dc-c9a9-4f2f-a66e-892865f49d38/data_level0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b200c007a3efa458f364e90f4f2fc57a51d3800cdc7b9da0a472e3c2be0f516
3
+ size 42360000
data/index/law_db/f2c1d7dc-c9a9-4f2f-a66e-892865f49d38/header.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8998d4105cfc3bb8d0767c2d5f5d5b862eebf1e18753cc27a8e1edabbd16e5dd
3
+ size 100
data/index/law_db/f2c1d7dc-c9a9-4f2f-a66e-892865f49d38/index_metadata.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7be6b19c0dea5d3a3ade705a1de0afd97ad2ea0098fdd90c2ffaae8548079944
3
+ size 266000
data/index/law_db/f2c1d7dc-c9a9-4f2f-a66e-892865f49d38/length.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73f7b190c3b790c39f0c2dd1fec9bfcc844a5ec5d6b5b77c035bb627e8683fce
3
+ size 40000
data/index/law_db/f2c1d7dc-c9a9-4f2f-a66e-892865f49d38/link_lists.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba2a1c508243b08e82d3aea180a7acb8537c8eab5d0e7670bd32dc0299b60962
3
+ size 26124
generator/__pycache__/llm_inference.cpython-312.pyc ADDED
Binary file (1.01 kB). View file
 
generator/__pycache__/prompt_builder.cpython-312.pyc ADDED
Binary file (901 Bytes). View file
 
generator/llm_inference.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+
3
+ # 1. ๋ชจ๋ธ ๋กœ๋“œ (์ตœ์ดˆ 1๋ฒˆ๋งŒ ๋กœ๋“œ๋จ)
4
+ generator = pipeline(
5
+ "text-generation",
6
+ model="dasomaru/gemma-3-4bit-it-demo", # ๋„ค๊ฐ€ ์—…๋กœ๋“œํ•œ ๋ชจ๋ธ ์ด๋ฆ„
7
+ tokenizer="dasomaru/gemma-3-4bit-it-demo",
8
+ device=0, # CUDA:0 ์‚ฌ์šฉ (GPU). CPU๋งŒ ์žˆ์œผ๋ฉด device=-1
9
+ max_new_tokens=512,
10
+ temperature=0.7,
11
+ top_p=0.9,
12
+ repetition_penalty=1.1
13
+ )
14
+
15
+ # 2. ๋‹ต๋ณ€ ์ƒ์„ฑ ํ•จ์ˆ˜
16
+ def generate_answer(prompt: str) -> str:
17
+ """
18
+ ์ž…๋ ฅ๋ฐ›์€ ํ”„๋กฌํ”„ํŠธ๋กœ๋ถ€ํ„ฐ ๋ชจ๋ธ์ด ๋‹ต๋ณ€์„ ์ƒ์„ฑํ•œ๋‹ค.
19
+ """
20
+ print(f"๐Ÿ”ต Prompt Length: {len(prompt)} characters") # ์ถ”๊ฐ€!
21
+ outputs = generator(
22
+ prompt,
23
+ do_sample=True,
24
+ top_k=50,
25
+ num_return_sequences=1
26
+ )
27
+ return outputs[0]["generated_text"].strip()
generator/prompt_builder.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def build_prompt(query: str, context_docs: list) -> str:
2
+ """
3
+ ์‚ฌ์šฉ์ž ์งˆ๋ฌธ๊ณผ ๊ฒ€์ƒ‰๋œ ๋ฌธ์„œ๋“ค์„ ์กฐํ•ฉํ•ด LLM ์ž…๋ ฅ์šฉ ํ”„๋กฌํ”„ํŠธ๋ฅผ ๋งŒ๋“ ๋‹ค.
4
+ """
5
+ context_text = "\n".join([f"- {doc}" for doc in context_docs])
6
+
7
+ prompt = f"""๋‹น์‹ ์€ ๊ณต์ธ์ค‘๊ฐœ์‚ฌ ์‹œํ—˜ ๋ฌธ์ œ ์ถœ์ œ ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค.
8
+
9
+ ๋‹ค์Œ์€ ๊ธฐ์ถœ ๋ฌธ์ œ ๋ฐ ๊ด€๋ จ ๋ฒ•๋ น ์ •๋ณด์ž…๋‹ˆ๋‹ค:
10
+ {context_text}
11
+
12
+ ์ด ์ •๋ณด๋ฅผ ์ฐธ๊ณ ํ•˜์—ฌ ์‚ฌ์šฉ์ž์˜ ์š”์ฒญ์— ๋‹ต๋ณ€ํ•ด ์ฃผ์„ธ์š”.
13
+
14
+ [์งˆ๋ฌธ]
15
+ {query}
16
+
17
+ [๋‹ต๋ณ€]
18
+ """
19
+ return prompt
requirements.txt CHANGED
@@ -1,6 +1,6 @@
1
- gradio
2
- transformers
3
- torch
4
- peft
5
- bitsandbytes
6
- spaces
 
1
+ gradio
2
+ torch
3
+ transformers
4
+ sentence-transformers
5
+ faiss-cpu
6
+ tqdm
retriever/__pycache__/reranker.cpython-312.pyc ADDED
Binary file (1.73 kB). View file
 
retriever/__pycache__/vectordb.cpython-312.pyc ADDED
Binary file (1.77 kB). View file
 
retriever/__pycache__/vectordb_rerank.cpython-312.pyc ADDED
Binary file (1.85 kB). View file
 
retriever/build_index.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import faiss
4
+ import numpy as np
5
+ from sentence_transformers import SentenceTransformer
6
+ from tqdm import tqdm
7
+
8
+ # 1. ๋ฐ์ดํ„ฐ ๊ฒฝ๋กœ ์„ค์ •
9
+ source_paths = [
10
+ r"data/real_estate_agent/raw/past_papers/brokerage_law.jsonl",
11
+ r"data/real_estate_agent/raw/past_papers/civil_law.jsonl",
12
+ r"data/real_estate_agent/raw/past_papers/disclosure_taxation.jsonl",
13
+ r"data/real_estate_agent/raw/past_papers/introduction.jsonl",
14
+ r"data/real_estate_agent/raw/past_papers/public_law.jsonl",
15
+ ]
16
+
17
+ INDEX_PATH = "data/index/index.faiss"
18
+ DOCS_PATH = "data/index/docs.npy"
19
+
20
+ # 2. ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ๋กœ๋“œ
21
+ embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
22
+
23
+ def init_faiss():
24
+ questions = []
25
+
26
+ # 3. JSONL ํŒŒ์ผ ์ฝ๊ธฐ
27
+ for path in source_paths:
28
+ with open(path, "r", encoding="utf-8") as f:
29
+ for line in f:
30
+ data = json.loads(line)
31
+ question_text = data.get("question", "")
32
+ if question_text: # ์งˆ๋ฌธ์ด ๋น„์–ด์žˆ์ง€ ์•Š์œผ๋ฉด ์ถ”๊ฐ€
33
+ questions.append(question_text)
34
+
35
+ print(f"โœ… ์ด {len(questions)}๊ฐœ ์งˆ๋ฌธ ๋กœ๋”ฉ ์™„๋ฃŒ")
36
+
37
+ # 4. ์ž„๋ฒ ๋”ฉ ์ƒ์„ฑ
38
+ embeddings = embedding_model.encode(
39
+ questions,
40
+ batch_size=32,
41
+ show_progress_bar=True
42
+ )
43
+ embeddings = np.array(embeddings).astype('float32')
44
+
45
+ # 5. FAISS ์ธ๋ฑ์Šค ์ƒ์„ฑ
46
+ dimension = embeddings.shape[1]
47
+ index = faiss.IndexFlatL2(dimension) # L2 ๊ฑฐ๋ฆฌ ๊ธฐ๋ฐ˜ ์ธ๋ฑ์Šค
48
+ index.add(embeddings)
49
+
50
+ # 6. ์ €์žฅ
51
+ os.makedirs(os.path.dirname(INDEX_PATH), exist_ok=True)
52
+ faiss.write_index(index, INDEX_PATH)
53
+ np.save(DOCS_PATH, questions)
54
+
55
+ print(f"โœ… FAISS ์ธ๋ฑ์Šค์™€ ๋ฌธ์„œ ์ €์žฅ ์™„๋ฃŒ!")
56
+
57
+ if __name__ == "__main__":
58
+ init_faiss()
retriever/reranker.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
2
+ import torch
3
+
4
+ # 1. Reranker ๋ชจ๋ธ ๋กœ๋”ฉ
5
+ reranker_tokenizer = AutoTokenizer.from_pretrained("BAAI/bge-reranker-base")
6
+ reranker_model = AutoModelForSequenceClassification.from_pretrained("BAAI/bge-reranker-base")
7
+
8
+ def rerank_documents(query: str, docs: list, top_k: int = 5) -> list:
9
+ """
10
+ ๊ฒ€์ƒ‰๋œ ๋ฌธ์„œ ๋ฆฌ์ŠคํŠธ๋ฅผ Query์™€ ๋น„๊ตํ•ด์„œ relevance ์ˆœ์„œ๋กœ ์žฌ์ •๋ ฌํ•œ๋‹ค.
11
+ """
12
+ pairs = [(query, doc) for doc in docs]
13
+
14
+ inputs = reranker_tokenizer.batch_encode_plus(
15
+ pairs,
16
+ padding=True,
17
+ truncation=True,
18
+ return_tensors="pt",
19
+ max_length=512
20
+ )
21
+
22
+ with torch.no_grad():
23
+ scores = reranker_model(**inputs).logits.squeeze(-1) # (batch_size,)
24
+
25
+ scores = scores.tolist()
26
+
27
+ # ์ ์ˆ˜ ๋†’์€ ์ˆœ์„œ๋Œ€๋กœ ์ •๋ ฌ
28
+ sorted_docs = [doc for _, doc in sorted(zip(scores, docs), key=lambda x: x[0], reverse=True)]
29
+
30
+ return sorted_docs[:top_k]
retriever/vectordb.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import faiss
2
+ import numpy as np
3
+ import os
4
+ from sentence_transformers import SentenceTransformer
5
+ from retriever.reranker import rerank_documents
6
+
7
+ # 1. ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ๋กœ๋“œ
8
+ embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
9
+
10
+ # 2. ๋ฒกํ„ฐDB (FAISS Index) ์ดˆ๊ธฐํ™”
11
+ INDEX_PATH = "data/index/index.faiss"
12
+ DOCS_PATH = "data/index/docs.npy"
13
+
14
+ if os.path.exists(INDEX_PATH) and os.path.exists(DOCS_PATH):
15
+ index = faiss.read_index(INDEX_PATH)
16
+ documents = np.load(DOCS_PATH, allow_pickle=True)
17
+ else:
18
+ index = None
19
+ documents = None
20
+ print("No FAISS index or docs found. Please build the index first.")
21
+
22
+ # 3. ๊ฒ€์ƒ‰ ํ•จ์ˆ˜
23
+ def search_documents(query: str, top_k: int = 5):
24
+ if index is None or documents is None:
25
+ raise ValueError("Index or documents not loaded. Build the FAISS index first.")
26
+
27
+ query_vector = embedding_model.encode([query])
28
+ query_vector = np.array(query_vector).astype('float32')
29
+
30
+ distances, indices = index.search(query_vector, top_k)
31
+ results = []
32
+
33
+ for idx in indices[0]:
34
+ if idx < len(documents):
35
+ results.append(documents[idx])
36
+
37
+ return results
38
+
39
+ # # 1. Rough FAISS ๊ฒ€์ƒ‰
40
+ # query_embedding = embedding_model.encode([query], convert_to_tensor=True).cpu().detach().numpy()
41
+ # distances, indices = index.search(query_embedding, top_k)
42
+ # results = [documents[idx] for idx in indices[0] if idx != -1]
43
+
44
+
45
+ # # 2. ์ •๋ฐ€ Reranking
46
+ # reranked_results = rerank_documents(query, results, top_k=top_k)
47
+
48
+ # return reranked_results
49
+
50
+
retriever/vectordb_rerank.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import faiss
2
+ import numpy as np
3
+ import os
4
+ from sentence_transformers import SentenceTransformer
5
+ from retriever.reranker import rerank_documents
6
+
7
+ # 1. ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ๋กœ๋“œ
8
+ embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
9
+
10
+ # 2. ๋ฒกํ„ฐDB (FAISS Index) ์ดˆ๊ธฐํ™”
11
+ INDEX_PATH = "data/index/index.faiss"
12
+ DOCS_PATH = "data/index/docs.npy"
13
+
14
+ if os.path.exists(INDEX_PATH) and os.path.exists(DOCS_PATH):
15
+ index = faiss.read_index(INDEX_PATH)
16
+ documents = np.load(DOCS_PATH, allow_pickle=True)
17
+ else:
18
+ index = None
19
+ documents = None
20
+ print("No FAISS index or docs found. Please build the index first.")
21
+
22
+ # 3. ๊ฒ€์ƒ‰ ํ•จ์ˆ˜
23
+ def search_documents(query: str, top_k: int = 5):
24
+ if index is None or documents is None:
25
+ raise ValueError("Index or documents not loaded. Build the FAISS index first.")
26
+
27
+ # 1. FAISS rough ๊ฒ€์ƒ‰
28
+ query_embedding = embedding_model.encode([query], convert_to_tensor=True).cpu().detach().numpy()
29
+ distances, indices = index.search(query_embedding, top_k)
30
+ results = [documents[idx] for idx in indices[0] if idx != -1]
31
+
32
+ # 2. Reranking ์ ์šฉ
33
+ reranked_results = rerank_documents(query, results, top_k=top_k)
34
+
35
+ return reranked_results
36
+
37
+
services/__pycache__/rag_pipeline.cpython-312.pyc ADDED
Binary file (857 Bytes). View file
 
services/rag_pipeline.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # from retriever.vectordb import search_documents
2
+ from retriever.vectordb_rerank import search_documents
3
+ from generator.prompt_builder import build_prompt
4
+ from generator.llm_inference import generate_answer
5
+
6
+ def rag_pipeline(query: str, top_k: int = 5) -> str:
7
+ """
8
+ 1. ์‚ฌ์šฉ์ž ์งˆ๋ฌธ์œผ๋กœ ๊ด€๋ จ ๋ฌธ์„œ๋ฅผ ๊ฒ€์ƒ‰
9
+ 2. ๊ฒ€์ƒ‰๋œ ๋ฌธ์„œ์™€ ํ•จ๊ป˜ ํ”„๋กฌํ”„ํŠธ ๊ตฌ์„ฑ
10
+ 3. ํ”„๋กฌํ”„ํŠธ๋กœ๋ถ€ํ„ฐ ๋‹ต๋ณ€ ์ƒ์„ฑ
11
+ """
12
+ # 1. ๊ฒ€์ƒ‰
13
+ context_docs = search_documents(query, top_k=top_k)
14
+
15
+ # 2. ํ”„๋กฌํ”„ํŠธ ์กฐ๋ฆฝ
16
+ prompt = build_prompt(query, context_docs)
17
+
18
+ # 3. ๋ชจ๋ธ ์ถ”๋ก 
19
+ output = generate_answer(prompt)
20
+
21
+ return output