dasomaru commited on
Commit
2c5f455
ยท
verified ยท
1 Parent(s): d3411be

Upload folder using huggingface_hub

Browse files
app.py CHANGED
@@ -1,53 +1,79 @@
1
- import gradio as gr
2
- import spaces
3
- import torch
4
- from transformers import AutoModelForCausalLM, AutoTokenizer
5
- # from retriever.vectordb_rerank import search_documents # ๐Ÿง  RAG ๊ฒ€์ƒ‰๊ธฐ ๋ถˆ๋Ÿฌ์˜ค๊ธฐ
6
- from services.rag_pipeline import rag_pipeline
7
-
8
- model_name = "dasomaru/gemma-3-4bit-it-demo"
9
-
10
-
11
- # 1. ๋ชจ๋ธ/ํ† ํฌ๋‚˜์ด์ € 1ํšŒ ๋กœ๋”ฉ
12
- tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
13
- # ๐Ÿš€ model์€ CPU๋กœ๋งŒ ๋จผ์ € ์˜ฌ๋ฆผ (GPU ์•„์ง ์—†์Œ)
14
- model = AutoModelForCausalLM.from_pretrained(
15
- model_name,
16
- torch_dtype=torch.float16, # 4bit model์ด๋‹ˆ๊นŒ
17
- trust_remote_code=True,
18
- )
19
-
20
- # 2. ์บ์‹œ ๊ด€๋ฆฌ
21
- search_cache = {}
22
-
23
- @spaces.GPU(duration=300)
24
- def generate_response(query: str):
25
- tokenizer = AutoTokenizer.from_pretrained("dasomaru/gemma-3-4bit-it-demo")
26
- model = AutoModelForCausalLM.from_pretrained("dasomaru/gemma-3-4bit-it-demo")
27
- model.to("cuda")
28
-
29
- if query in search_cache:
30
- print(f"โšก ์บ์‹œ ์‚ฌ์šฉ: '{query}'")
31
- return search_cache[query]
32
-
33
- # ๐Ÿ”ฅ rag_pipeline์„ ํ˜ธ์ถœํ•ด์„œ ๊ฒ€์ƒ‰ + ์ƒ์„ฑ
34
- results = rag_pipeline(query)
35
-
36
- # ๊ฒฐ๊ณผ๊ฐ€ list์ผ ๊ฒฝ์šฐ ํ•ฉ์น˜๊ธฐ
37
- if isinstance(results, list):
38
- results = "\n\n".join(results)
39
-
40
- search_cache[query] = results
41
- return results
42
-
43
- # 3. Gradio ์ธํ„ฐํŽ˜์ด์Šค
44
- demo = gr.Interface(
45
- fn=generate_response,
46
- inputs=gr.Textbox(lines=2, placeholder="์งˆ๋ฌธ์„ ์ž…๋ ฅํ•˜์„ธ์š”"),
47
- outputs="text",
48
- title="Law RAG Assistant",
49
- description="๋ฒ•๋ น ๊ธฐ๋ฐ˜ RAG ํŒŒ์ดํ”„๋ผ์ธ ํ…Œ์ŠคํŠธ",
50
- )
51
-
52
- # demo.launch(server_name="0.0.0.0", server_port=7860) # ๐Ÿš€ API ๋ฐฐํฌ ์ค€๋น„ ๊ฐ€๋Šฅ
53
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import spaces
3
+ import torch
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer
5
+ # from retriever.vectordb_rerank import search_documents # ๐Ÿง  RAG ๊ฒ€์ƒ‰๊ธฐ ๋ถˆ๋Ÿฌ์˜ค๊ธฐ
6
+ from services.rag_pipeline import rag_pipeline
7
+
8
+ model_name = "dasomaru/gemma-3-4bit-it-demo"
9
+
10
+
11
+ # 1. ๋ชจ๋ธ/ํ† ํฌ๋‚˜์ด์ € 1ํšŒ ๋กœ๋”ฉ
12
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
13
+ # ๐Ÿš€ model์€ CPU๋กœ๋งŒ ๋จผ์ € ์˜ฌ๋ฆผ (GPU ์•„์ง ์—†์Œ)
14
+ model = AutoModelForCausalLM.from_pretrained(
15
+ model_name,
16
+ torch_dtype=torch.float16, # 4bit model์ด๋‹ˆ๊นŒ
17
+ device_map="auto", # โœ… ์ค‘์š”: ์ž๋™์œผ๋กœ GPU ํ• ๋‹น
18
+ trust_remote_code=True,
19
+ )
20
+
21
+ # 2. ์บ์‹œ ๊ด€๋ฆฌ
22
+ search_cache = {}
23
+
24
+ @spaces.GPU(duration=300)
25
+ def generate_response(query: str):
26
+ tokenizer = AutoTokenizer.from_pretrained(
27
+ "dasomaru/gemma-3-4bit-it-demo",
28
+ trust_remote_code=True,
29
+ )
30
+ model = AutoModelForCausalLM.from_pretrained(
31
+ "dasomaru/gemma-3-4bit-it-demo",
32
+ torch_dtype=torch.float16, # 4bit model์ด๋‹ˆ๊นŒ
33
+ device_map="auto", # โœ… ์ค‘์š”: ์ž๋™์œผ๋กœ GPU ํ• ๋‹น
34
+ trust_remote_code=True,
35
+
36
+ )
37
+ model.to("cuda")
38
+
39
+ if query in search_cache:
40
+ print(f"โšก ์บ์‹œ ์‚ฌ์šฉ: '{query}'")
41
+ return search_cache[query]
42
+
43
+ # ๐Ÿ”ฅ rag_pipeline์„ ํ˜ธ์ถœํ•ด์„œ ๊ฒ€์ƒ‰ + ์ƒ์„ฑ
44
+ # ๊ฒ€์ƒ‰
45
+ top_k = 5
46
+ results = rag_pipeline(query, top_k=top_k)
47
+
48
+ # ๊ฒฐ๊ณผ๊ฐ€ list์ผ ๊ฒฝ์šฐ ํ•ฉ์น˜๊ธฐ
49
+ if isinstance(results, list):
50
+ results = "\n\n".join(results)
51
+
52
+ search_cache[query] = results
53
+ # return results
54
+
55
+ inputs = tokenizer(results, return_tensors="pt").to(model.device) # โœ… model.device
56
+ outputs = model.generate(
57
+ **inputs,
58
+ max_new_tokens=512,
59
+ temperature=0.7,
60
+ top_p=0.9,
61
+ top_k=50,
62
+ do_sample=True,
63
+ )
64
+
65
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
66
+
67
+
68
+ # 3. Gradio ์ธํ„ฐํŽ˜์ด์Šค
69
+ demo = gr.Interface(
70
+ fn=generate_response,
71
+ # inputs=gr.Textbox(lines=2, placeholder="์งˆ๋ฌธ์„ ์ž…๋ ฅํ•˜์„ธ์š”"),
72
+ inputs="text",
73
+ outputs="text",
74
+ title="Law RAG Assistant",
75
+ description="๋ฒ•๋ น ๊ธฐ๋ฐ˜ RAG ํŒŒ์ดํ”„๋ผ์ธ ํ…Œ์ŠคํŠธ",
76
+ )
77
+
78
+ # demo.launch(server_name="0.0.0.0", server_port=7860) # ๐Ÿš€ API ๋ฐฐํฌ ์ค€๋น„ ๊ฐ€๋Šฅ
79
+ demo.launch()
app_v2.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import spaces
3
+ import torch
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer
5
+ from retriever.vectordb import search_documents # ๐Ÿง  RAG ๊ฒ€์ƒ‰๊ธฐ ๋ถˆ๋Ÿฌ์˜ค๊ธฐ
6
+
7
+ model_name = "dasomaru/gemma-3-4bit-it-demo"
8
+
9
+
10
+ # ๐Ÿš€ tokenizer๋Š” CPU์—์„œ๋„ ๋ฏธ๋ฆฌ ๋ถˆ๋Ÿฌ์˜ฌ ์ˆ˜ ์žˆ์Œ
11
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
12
+ # ๐Ÿš€ model์€ CPU๋กœ๋งŒ ๋จผ์ € ์˜ฌ๋ฆผ (GPU ์•„์ง ์—†์Œ)
13
+ model = AutoModelForCausalLM.from_pretrained(
14
+ model_name,
15
+ torch_dtype=torch.float16, # 4bit model์ด๋‹ˆ๊นŒ
16
+ trust_remote_code=True,
17
+ )
18
+
19
+ @spaces.GPU(duration=300)
20
+ def generate_response(query):
21
+ # ๐Ÿš€ generate_response ํ•จ์ˆ˜ ์•ˆ์—์„œ ๋งค๋ฒˆ ๋กœ๋“œ
22
+ # tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
23
+ # model = AutoModelForCausalLM.from_pretrained(
24
+ # model_name,
25
+ # torch_dtype=torch.float16,
26
+ # device_map="auto", # โœ… ์ค‘์š”: ์ž๋™์œผ๋กœ GPU ํ• ๋‹น
27
+ # trust_remote_code=True,
28
+ # )
29
+ tokenizer = AutoTokenizer.from_pretrained("dasomaru/gemma-3-4bit-it-demo")
30
+ model = AutoModelForCausalLM.from_pretrained("dasomaru/gemma-3-4bit-it-demo")
31
+ model.to("cuda")
32
+
33
+ # 1. ๊ฒ€์ƒ‰
34
+ top_k = 5
35
+ retrieved_docs = search_documents(query, top_k=top_k)
36
+
37
+ # 2. ํ”„๋กฌํ”„ํŠธ ์กฐ๋ฆฝ
38
+ prompt = (
39
+ "๋‹น์‹ ์€ ๊ณต์ธ์ค‘๊ฐœ์‚ฌ ์‹œํ—˜ ๋ฌธ์ œ ์ถœ์ œ ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค.\n\n"
40
+ "๋‹ค์Œ์€ ๊ธฐ์ถœ ๋ฌธ์ œ ๋ฐ ๊ด€๋ จ ๋ฒ•๋ น ์ •๋ณด์ž…๋‹ˆ๋‹ค:\n"
41
+ )
42
+ for idx, doc in enumerate(retrieved_docs, 1):
43
+ prompt += f"- {doc}\n"
44
+ prompt += f"\n์ด ์ •๋ณด๋ฅผ ์ฐธ๊ณ ํ•˜์—ฌ ์‚ฌ์šฉ์ž์˜ ์š”์ฒญ์— ๋‹ต๋ณ€ํ•ด ์ฃผ์„ธ์š”.\n\n"
45
+ prompt += f"[์งˆ๋ฌธ]\n{query}\n\n[๋‹ต๋ณ€]\n"
46
+
47
+ # 3. ๋‹ต๋ณ€ ์ƒ์„ฑ
48
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device) # โœ… model.device
49
+ outputs = model.generate(
50
+ **inputs,
51
+ max_new_tokens=512,
52
+ temperature=0.7,
53
+ top_p=0.9,
54
+ top_k=50,
55
+ do_sample=True,
56
+ )
57
+
58
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
59
+
60
+ demo = gr.Interface(fn=generate_response, inputs="text", outputs="text")
61
+ demo.launch()
data/index/exam_db/chroma.sqlite3 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb44775f377f843c14055fc9590377051b9b63a5d78f41573c4c3cd73c8e8cbb
3
- size 20951040
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab1270442e19db5a1c0ec0217101b32e3d5ce379d9cf0a4278f7b4edac2489fb
3
+ size 14610432
data/index/law_db/chroma.sqlite3 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a5d91eaf7e130a1793a46f82d0c9951ace88ffa8610ba2b60352ae171b9b725f
3
- size 33640448
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6dbbf1eed4fb2a85649ef2d22fdce84b1c10a268a59279dbb4a9e0d8141e1e55
3
+ size 38465536
generator/llm_inference.py CHANGED
@@ -1,30 +1,29 @@
1
- from transformers import pipeline
2
- import spaces
3
-
4
- # 1. ๋ชจ๋ธ ๋กœ๋“œ (์ตœ์ดˆ 1๋ฒˆ๋งŒ ๋กœ๋“œ๋จ)
5
- generator = pipeline(
6
- "text-generation",
7
- model="dasomaru/gemma-3-4bit-it-demo", # ๋„ค๊ฐ€ ์—…๋กœ๋“œํ•œ ๋ชจ๋ธ ์ด๋ฆ„
8
- tokenizer="dasomaru/gemma-3-4bit-it-demo",
9
- device=0, # CUDA:0 ์‚ฌ์šฉ (GPU). CPU๋งŒ ์žˆ์œผ๋ฉด device=-1
10
- max_new_tokens=2048,
11
- temperature=0.7,
12
- top_p=0.9,
13
- repetition_penalty=1.1
14
- )
15
-
16
- # 2. ๋‹ต๋ณ€ ์ƒ์„ฑ ํ•จ์ˆ˜
17
- @spaces.GPU(duration=300)
18
- def generate_answer(prompt: str) -> str:
19
- """
20
- ์ž…๋ ฅ๋ฐ›์€ ํ”„๋กฌํ”„ํŠธ๋กœ๋ถ€ํ„ฐ ๋ชจ๋ธ์ด ๋‹ต๋ณ€์„ ์ƒ์„ฑํ•œ๋‹ค.
21
- """
22
- print(f"๐Ÿ”ต Prompt Length: {len(prompt)} characters") # ์ถ”๊ฐ€!
23
- outputs = generator(
24
- prompt,
25
- do_sample=True,
26
- top_k=50,
27
- num_return_sequences=1
28
- )
29
- return outputs[0]["generated_text"].strip()
30
-
 
1
+ from transformers import pipeline
2
+ import spaces
3
+
4
+ # 1. ๋ชจ๋ธ ๋กœ๋“œ (์ตœ์ดˆ 1๋ฒˆ๋งŒ ๋กœ๋“œ๋จ)
5
+ generator = pipeline(
6
+ "text-generation",
7
+ model="dasomaru/gemma-3-4bit-it-demo", # ๋„ค๊ฐ€ ์—…๋กœ๋“œํ•œ ๋ชจ๋ธ ์ด๋ฆ„
8
+ tokenizer="dasomaru/gemma-3-4bit-it-demo",
9
+ device=0, # CUDA:0 ์‚ฌ์šฉ (GPU). CPU๋งŒ ์žˆ์œผ๋ฉด device=-1
10
+ max_new_tokens=512,
11
+ temperature=0.7,
12
+ top_p=0.9,
13
+ repetition_penalty=1.1
14
+ )
15
+
16
+ # 2. ๋‹ต๋ณ€ ์ƒ์„ฑ ํ•จ์ˆ˜
17
+ @spaces.GPU(duration=300)
18
+ def generate_answer(prompt: str) -> str:
19
+ """
20
+ ์ž…๋ ฅ๋ฐ›์€ ํ”„๋กฌํ”„ํŠธ๋กœ๋ถ€ํ„ฐ ๋ชจ๋ธ์ด ๋‹ต๋ณ€์„ ์ƒ์„ฑํ•œ๋‹ค.
21
+ """
22
+ print(f"๐Ÿ”ต Prompt Length: {len(prompt)} characters") # ์ถ”๊ฐ€!
23
+ outputs = generator(
24
+ prompt,
25
+ do_sample=True,
26
+ top_k=50,
27
+ num_return_sequences=1
28
+ )
29
+ return outputs[0]["generated_text"].strip()
 
generator/prompt_builder.py CHANGED
@@ -1,19 +1,19 @@
1
- def build_prompt(query: str, context_docs: list) -> str:
2
- """
3
- ์‚ฌ์šฉ์ž ์งˆ๋ฌธ๊ณผ ๊ฒ€์ƒ‰๋œ ๋ฌธ์„œ๋“ค์„ ์กฐํ•ฉํ•ด LLM ์ž…๋ ฅ์šฉ ํ”„๋กฌํ”„ํŠธ๋ฅผ ๋งŒ๋“ ๋‹ค.
4
- """
5
- context_text = "\n".join([f"- {doc}" for doc in context_docs])
 
6
 
7
- prompt = f"""๋‹น์‹ ์€ ๊ณต์ธ์ค‘๊ฐœ์‚ฌ ์‹œํ—˜ ๋ฌธ์ œ ์ถœ์ œ ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค.
 
8
 
9
- ๋‹ค์Œ์€ ๊ธฐ์ถœ ๋ฌธ์ œ ๋ฐ ๊ด€๋ จ ๋ฒ•๋ น ์ •๋ณด์ž…๋‹ˆ๋‹ค:
10
- {context_text}
11
 
12
- ์ด ์ •๋ณด๋ฅผ ์ฐธ๊ณ ํ•˜์—ฌ ์‚ฌ์šฉ์ž์˜ ์š”์ฒญ์— ๋‹ต๋ณ€ํ•ด ์ฃผ์„ธ์š”.
 
13
 
14
- [์งˆ๋ฌธ]
15
- {query}
16
 
17
- [๋‹ต๋ณ€]
18
- """
19
  return prompt
 
1
+ def build_prompt(query: str, law_docs: list, exam_docs: list) -> str:
2
+ prompt = (
3
+ "๋‹น์‹ ์€ ๊ณต์ธ์ค‘๊ฐœ์‚ฌ ์‹œํ—˜ ๋ฌธ์ œ ์ถœ์ œ ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค.\n\n"
4
+ "์•„๋ž˜๋Š” ๊ด€๋ จ ๋ฒ•๋ น๊ณผ ๊ธฐ์ถœ๋ฌธ์ œ์ž…๋‹ˆ๋‹ค:\n\n"
5
+ "[๋ฒ•๋ น ์ •๋ณด]\n"
6
+ )
7
 
8
+ for doc in law_docs:
9
+ prompt += f"- {doc}\n"
10
 
11
+ prompt += "\n[๊ธฐ์ถœ๋ฌธ์ œ ์ •๋ณด]\n"
 
12
 
13
+ for doc in exam_docs:
14
+ prompt += f"- {doc}\n"
15
 
16
+ prompt += f"\n์œ„ ์ •๋ณด๋ฅผ ์ฐธ๊ณ ํ•˜์—ฌ ์‚ฌ์šฉ์ž์˜ ์š”์ฒญ์— ์ •ํ™•ํ•˜๊ณ  ๋ช…ํ™•ํ•˜๊ฒŒ ๋‹ต๋ณ€ํ•˜์„ธ์š”.\n\n"
17
+ prompt += f"[์งˆ๋ฌธ]\n{query}\n\n[๋‹ต๋ณ€]\n"
18
 
 
 
19
  return prompt
generator/prompt_builder_v1.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def build_prompt(query: str, context_docs: list) -> str:
2
+ """
3
+ ์‚ฌ์šฉ์ž ์งˆ๋ฌธ๊ณผ ๊ฒ€์ƒ‰๋œ ๋ฌธ์„œ๋“ค์„ ์กฐํ•ฉํ•ด LLM ์ž…๋ ฅ์šฉ ํ”„๋กฌํ”„ํŠธ๋ฅผ ๋งŒ๋“ ๋‹ค.
4
+ """
5
+ context_text = "\n".join([f"- {doc}" for doc in context_docs])
6
+
7
+ prompt = f"""๋‹น์‹ ์€ ๊ณต์ธ์ค‘๊ฐœ์‚ฌ ์‹œํ—˜ ๋ฌธ์ œ ์ถœ์ œ ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค.
8
+
9
+ ๋‹ค์Œ์€ ๊ธฐ์ถœ ๋ฌธ์ œ ๋ฐ ๊ด€๋ จ ๋ฒ•๋ น ์ •๋ณด์ž…๋‹ˆ๋‹ค:
10
+ {context_text}
11
+
12
+ ์ด ์ •๋ณด๋ฅผ ์ฐธ๊ณ ํ•˜์—ฌ ์‚ฌ์šฉ์ž์˜ ์š”์ฒญ์— ๋‹ต๋ณ€ํ•ด ์ฃผ์„ธ์š”.
13
+
14
+ [์งˆ๋ฌธ]
15
+ {query}
16
+
17
+ [๋‹ต๋ณ€]
18
+ """
19
+ return prompt
requirements.txt CHANGED
@@ -1,7 +1,6 @@
1
- gradio
2
- torch
3
- transformers
4
- sentence-transformers
5
- faiss-cpu
6
- tqdm
7
- accelerate
 
1
+ gradio
2
+ torch
3
+ transformers
4
+ sentence-transformers
5
+ faiss-cpu
6
+ tqdm
 
retriever/vectordb_rerank.py CHANGED
@@ -1,37 +1,56 @@
 
1
  import faiss
2
  import numpy as np
3
  import os
 
 
4
  from sentence_transformers import SentenceTransformer
5
  from retriever.reranker import rerank_documents
6
 
7
- # 1. ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ๋กœ๋“œ
8
- embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
 
 
 
 
 
 
 
 
 
 
 
9
 
10
- # 2. ๋ฒกํ„ฐDB (FAISS Index) ์ดˆ๊ธฐํ™”
11
- INDEX_PATH = "data/index/index.faiss"
12
- DOCS_PATH = "data/index/docs.npy"
13
 
14
- if os.path.exists(INDEX_PATH) and os.path.exists(DOCS_PATH):
15
- index = faiss.read_index(INDEX_PATH)
16
- documents = np.load(DOCS_PATH, allow_pickle=True)
17
- else:
18
- index = None
19
- documents = None
20
- print("No FAISS index or docs found. Please build the index first.")
21
 
22
- # 3. ๊ฒ€์ƒ‰ ํ•จ์ˆ˜
23
- def search_documents(query: str, top_k: int = 5):
24
- if index is None or documents is None:
25
- raise ValueError("Index or documents not loaded. Build the FAISS index first.")
26
-
27
- # 1. FAISS rough ๊ฒ€์ƒ‰
28
- query_embedding = embedding_model.encode([query], convert_to_tensor=True).cpu().detach().numpy()
29
- distances, indices = index.search(query_embedding, top_k)
30
- results = [documents[idx] for idx in indices[0] if idx != -1]
31
 
32
- # 2. Reranking ์ ์šฉ
33
- reranked_results = rerank_documents(query, results, top_k=top_k)
 
 
 
 
34
 
35
- return reranked_results
36
 
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # vectordb_relank_law.py
2
  import faiss
3
  import numpy as np
4
  import os
5
+ from chromadb import PersistentClient
6
+ from chromadb.utils import embedding_functions
7
  from sentence_transformers import SentenceTransformer
8
  from retriever.reranker import rerank_documents
9
 
10
+ # chroma vector config v2
11
+ embedding_models = [
12
+ "upskyy/bge-m3-korean",
13
+ "jhgan/ko-sbert-sts",
14
+ "BM-K/KoSimCSE-roberta",
15
+ "BM-K/KoSimCSE-v2-multitask",
16
+ "snunlp/KR-SBERT-V40K-klueNLI-augSTS",
17
+ "beomi/KcELECTRA-small-v2022",
18
+ ]
19
+ # law_db config v2
20
+ CHROMA_PATH = os.path.abspath("data/index/law_db")
21
+ COLLECTION_NAME = "law_all"
22
+ EMBEDDING_MODEL_NAME = embedding_models[4] # ์‚ฌ์šฉํ•˜๊ณ ์ž ํ•˜๋Š” ๋ชจ๋ธ ์„ ํƒ
23
 
 
 
 
24
 
25
+ # 1. ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ๋กœ๋“œ v2
26
+ # embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
27
+ embedding_model = SentenceTransformer(EMBEDDING_MODEL_NAME)
 
 
 
 
28
 
 
 
 
 
 
 
 
 
 
29
 
30
+ # 2. ์ž„๋ฒ ๋”ฉ ํ•จ์ˆ˜ ์„ค์ •
31
+ embedding_fn = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=EMBEDDING_MODEL_NAME)
32
+
33
+ # 3. Chroma ํด๋ผ์ด์–ธํŠธ ๋ฐ ์ปฌ๋ ‰์…˜ ๋กœ๋“œ
34
+ client = PersistentClient(path=CHROMA_PATH)
35
+ collection = client.get_collection(name=COLLECTION_NAME, embedding_function=embedding_fn)
36
 
 
37
 
38
 
39
+ # 4. ๊ฒ€์ƒ‰ ํ•จ์ˆ˜
40
+ def search_documents(query: str, top_k: int = 5):
41
+ print(f"\n๐Ÿ” ๊ฒ€์ƒ‰์–ด: '{query}'")
42
+ results = collection.query(
43
+ query_texts=[query],
44
+ n_results=top_k,
45
+ include=["documents", "metadatas", "distances"]
46
+ )
47
+
48
+ for i, (doc, meta, dist) in enumerate(zip(
49
+ results['documents'][0],
50
+ results['metadatas'][0],
51
+ results['distances'][0]
52
+ )):
53
+ print(f"\n๐Ÿ“„ ๊ฒฐ๊ณผ {i+1} (์œ ์‚ฌ๋„: {1 - dist:.2f})")
54
+ print(f"๋ฌธ์„œ: {doc[:150]}...")
55
+ print("๋ฉ”ํƒ€๋ฐ์ดํ„ฐ:")
56
+ print(meta)
retriever/vectordb_rerank_exam.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # vectordb_relank_law.py
2
+ import faiss
3
+ import numpy as np
4
+ import os
5
+ from chromadb import PersistentClient
6
+ from chromadb.utils import embedding_functions
7
+ from sentence_transformers import SentenceTransformer
8
+ from retriever.reranker import rerank_documents
9
+
10
+ # chroma vector config v2
11
+ embedding_models = [
12
+ "upskyy/bge-m3-korean",
13
+ "jhgan/ko-sbert-sts",
14
+ "BM-K/KoSimCSE-roberta",
15
+ "BM-K/KoSimCSE-v2-multitask",
16
+ "snunlp/KR-SBERT-V40K-klueNLI-augSTS",
17
+ "beomi/KcELECTRA-small-v2022",
18
+ ]
19
+ # law_db config v2
20
+ CHROMA_PATH = os.path.abspath("data/index/exam_db")
21
+ COLLECTION_NAME = "exam_all"
22
+ EMBEDDING_MODEL_NAME = embedding_models[4] # ์‚ฌ์šฉํ•˜๊ณ ์ž ํ•˜๋Š” ๋ชจ๋ธ ์„ ํƒ
23
+
24
+ # 1. ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ๋กœ๋“œ v2
25
+ # embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
26
+ embedding_model = SentenceTransformer(EMBEDDING_MODEL_NAME)
27
+
28
+ # 2. ์ž„๋ฒ ๋”ฉ ํ•จ์ˆ˜ ์„ค์ •
29
+ embedding_fn = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=EMBEDDING_MODEL_NAME)
30
+
31
+ # 3. Chroma ํด๋ผ์ด์–ธํŠธ ๋ฐ ์ปฌ๋ ‰์…˜ ๋กœ๋“œ
32
+ client = PersistentClient(path=CHROMA_PATH)
33
+ collection = client.get_collection(name=COLLECTION_NAME, embedding_function=embedding_fn)
34
+
35
+ # 4. ๊ฒ€์ƒ‰ ํ•จ์ˆ˜
36
+ def search_documents(query: str, top_k: int = 5):
37
+ print(f"\n๐Ÿ” ๊ฒ€์ƒ‰์–ด: '{query}'")
38
+ results = collection.query(
39
+ query_texts=[query],
40
+ n_results=top_k,
41
+ include=["documents", "metadatas", "distances"]
42
+ )
43
+
44
+ # rerank documents
45
+ # reranked_results = rerank_documents(query, results, top_k=top_k)
46
+
47
+ for i, (doc, meta, dist) in enumerate(zip(
48
+ results['documents'][0],
49
+ results['metadatas'][0],
50
+ results['distances'][0]
51
+ )):
52
+ print(f"\n๐Ÿ“„ ๊ฒฐ๊ณผ {i+1} (์œ ์‚ฌ๋„: {1 - dist:.2f})")
53
+ print(f"๋ฌธ์„œ: {doc[:150]}...")
54
+ print("๋ฉ”ํƒ€๋ฐ์ดํ„ฐ:")
55
+ print(meta)
retriever/vectordb_rerank_law.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # vectordb_relank_law.py
2
+ import faiss
3
+ import numpy as np
4
+ import os
5
+ from chromadb import PersistentClient
6
+ from chromadb.utils import embedding_functions
7
+ from sentence_transformers import SentenceTransformer
8
+ from retriever.reranker import rerank_documents
9
+
10
+ # chroma vector config v2
11
+ embedding_models = [
12
+ "upskyy/bge-m3-korean",
13
+ "jhgan/ko-sbert-sts",
14
+ "BM-K/KoSimCSE-roberta",
15
+ "BM-K/KoSimCSE-v2-multitask",
16
+ "snunlp/KR-SBERT-V40K-klueNLI-augSTS",
17
+ "beomi/KcELECTRA-small-v2022",
18
+ ]
19
+ # law_db config v2
20
+ CHROMA_PATH = os.path.abspath("data/index/law_db")
21
+ COLLECTION_NAME = "law_all"
22
+ EMBEDDING_MODEL_NAME = embedding_models[4] # ์‚ฌ์šฉํ•˜๊ณ ์ž ํ•˜๋Š” ๋ชจ๋ธ ์„ ํƒ
23
+
24
+
25
+ # 1. ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ๋กœ๋“œ v2
26
+ # embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
27
+ embedding_model = SentenceTransformer(EMBEDDING_MODEL_NAME)
28
+
29
+
30
+ # 2. ์ž„๋ฒ ๋”ฉ ํ•จ์ˆ˜ ์„ค์ •
31
+ embedding_fn = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=EMBEDDING_MODEL_NAME)
32
+
33
+ # 3. Chroma ํด๋ผ์ด์–ธํŠธ ๋ฐ ์ปฌ๋ ‰์…˜ ๋กœ๋“œ
34
+ client = PersistentClient(path=CHROMA_PATH)
35
+ collection = client.get_collection(name=COLLECTION_NAME, embedding_function=embedding_fn)
36
+
37
+
38
+
39
+ # 4. ๊ฒ€์ƒ‰ ํ•จ์ˆ˜
40
+ def search_documents(query: str, top_k: int = 5):
41
+ print(f"\n๐Ÿ” ๊ฒ€์ƒ‰์–ด: '{query}'")
42
+ results = collection.query(
43
+ query_texts=[query],
44
+ n_results=top_k,
45
+ include=["documents", "metadatas", "distances"]
46
+ )
47
+
48
+ # ๋ฌธ์„œ ๋ฆฌ์ŠคํŠธ๋งŒ ์ถ”์ถœ
49
+ docs = results['documents'][0]
50
+ metadatas = results['metadatas'][0]
51
+ distances = results['distances'][0]
52
+
53
+ # Rerank ๋ฌธ์„œ
54
+ reranked_docs = rerank_documents(query, docs, top_k=top_k)
55
+
56
+ # Rerank๋œ ๋ฌธ์„œ์— ๋งž์ถฐ metadata, distance ๋‹ค์‹œ ์ •๋ ฌ
57
+ reranked_data = []
58
+ for doc in reranked_docs:
59
+ idx = docs.index(doc)
60
+ reranked_data.append((doc, metadatas[idx], distances[idx]))
61
+
62
+ for i, (doc, meta, dist) in enumerate(reranked_data):
63
+ print(f"\n๐Ÿ“„ ๊ฒฐ๊ณผ {i+1} (์œ ์‚ฌ๋„: {1 - dist:.2f})")
64
+ print(f"๋ฌธ์„œ: {doc[:150]}...")
65
+ print("๋ฉ”ํƒ€๋ฐ์ดํ„ฐ:")
66
+ print(meta)
67
+
68
+ return reranked_data # ํ•„์š”ํ•˜๋ฉด ๋ฆฌํ„ด
services/rag_pipeline_v2.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # from retriever.vectordb import search_documents
2
+ # from retriever.vectordb_rerank import search_documents
3
+ from retriever.vectordb_rerank_law import search_documents as search_law
4
+ from retriever.vectordb_rerank_exam import search_documents as search_exam
5
+ from generator.prompt_builder import build_prompt
6
+ from generator.llm_inference import generate_answer
7
+
8
+ def rag_pipeline(query: str, top_k: int = 5) -> str:
9
+ """
10
+ 1. ์‚ฌ์šฉ์ž ์งˆ๋ฌธ์œผ๋กœ ๊ด€๋ จ ๋ฌธ์„œ๋ฅผ ๊ฒ€์ƒ‰
11
+ 2. ๊ฒ€์ƒ‰๋œ ๋ฌธ์„œ์™€ ํ•จ๊ป˜ ํ”„๋กฌํ”„ํŠธ ๊ตฌ์„ฑ
12
+ 3. ํ”„๋กฌํ”„ํŠธ๋กœ๋ถ€ํ„ฐ ๋‹ต๋ณ€ ์ƒ์„ฑ
13
+ """
14
+ # 1. ๋ฒ•๋ น๊ณผ ๋ฌธ์ œ๋ฅผ ๊ฐ๊ฐ ๊ฒ€์ƒ‰
15
+ # context_docs = search_documents(query, top_k=top_k)
16
+ laws_docs = search_law(query, top_k=top_k)
17
+ exam_docs = search_exam(query, top_k=top_k)
18
+
19
+ # 2. ํ”„๋กฌํ”„ํŠธ ๊ตฌ์„ฑ
20
+ # prompt = build_prompt(query, context_docs)
21
+ prompt = build_prompt(query, laws_docs, exam_docs)
22
+
23
+ # 3. LLM์œผ๋กœ ๋ฌธ์ œ ์ƒ์„ฑ
24
+ # output = generate_answer(prompt)
25
+ questions = generate_answer(prompt)
26
+
27
+ # 4. ๊ฒฐ๊ณผ ์ €์žฅ
28
+ # save_to_exam_vector_db(questions)
29
+
30
+ return questions
31
+
32
+
33
+