RAG_voice / config.py
jeongsoo's picture
Add application file
4a98f26
"""
๋ฒกํ„ฐ ์Šคํ† ์–ด, ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ, LLM ๋“ฑ ๊ตฌ์„ฑ ์š”์†Œ ์„ค์ •
"""
import os
from dotenv import load_dotenv
# .env ํŒŒ์ผ์ด ์žˆ์œผ๋ฉด ๋กœ๋“œ (๋กœ์ปฌ ํ™˜๊ฒฝ์šฉ)
load_dotenv(verbose=True)
# ํ™˜๊ฒฝ ๊ฐ์ง€
IS_HUGGINGFACE = os.getenv('SPACE_ID') is not None
# API ํ‚ค ๋ฐ ํ™˜๊ฒฝ ์„ค์ •
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "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")
LANGFUSE_PUBLIC_KEY = os.getenv("LANGFUSE_PUBLIC_KEY", "pk-lf-cd6248e2-59ad-496d-a4cb-487bb3ecfcd5")
LANGFUSE_SECRET_KEY = os.getenv("LANGFUSE_SECRET_KEY", "sk-lf-61460a1d-e637-4c22-b5e9-9250ac2579ba")
LANGFUSE_HOST = os.getenv("LANGFUSE_HOST", "https://cloud.langfuse.com")
# ๋„ค์ด๋ฒ„ ํด๋กœ๋ฐ” STT API ์„ค์ •
NAVER_CLIENT_ID = os.getenv("NAVER_CLIENT_ID", "xae4kga9s5")
NAVER_CLIENT_SECRET = os.getenv("NAVER_CLIENT_SECRET", "aoSmmr3xMrdVopxGduFX5YfGZRJpu2MDUiUvlvQx")
# ๋„ค์ด๋ฒ„ ํด๋กœ๋ฐ” API ํ‚ค ํ™•์ธ
if NAVER_CLIENT_ID and NAVER_CLIENT_SECRET:
print("๋„ค์ด๋ฒ„ ํด๋กœ๋ฐ” STT API ํ‚ค๊ฐ€ ์„ค์ •๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
else:
print("๊ฒฝ๊ณ : ๋„ค์ด๋ฒ„ ํด๋กœ๋ฐ” STT API ํ‚ค๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
print("STT ๊ธฐ๋Šฅ์„ ์‚ฌ์šฉํ•˜๋ ค๋ฉด NAVER_CLIENT_ID์™€ NAVER_CLIENT_SECRET ํ™˜๊ฒฝ ๋ณ€์ˆ˜๋ฅผ ์„ค์ •ํ•˜์„ธ์š”.")
# Milvus ๋ฒกํ„ฐ DB ์„ค์ •
MILVUS_HOST = os.getenv("MILVUS_HOST", "localhost")
MILVUS_PORT = os.getenv("MILVUS_PORT", "19530")
MILVUS_COLLECTION = "pdf_documents"
# ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ์„ค์ •
EMBEDDING_MODEL = "Alibaba-NLP/gte-multilingual-base" # ๋‹ค๊ตญ์–ด ์ง€์› ๋ชจ๋ธ
RERANKER_MODEL = "Alibaba-NLP/gte-multilingual-reranker-base" # ๋‹ค๊ตญ์–ด ์ง€์› ๋ฆฌ๋žญ์ปค
# LLM ๋ชจ๋ธ ์„ค์ • (ํ™˜๊ฒฝ์— ๋”ฐ๋ผ ์ž๋™ ์„ ํƒ)
if IS_HUGGINGFACE:
# HuggingFace ํ™˜๊ฒฝ์—์„œ๋Š” OpenAI ์‚ฌ์šฉ
USE_OPENAI = True
LLM_MODEL = "gpt-3.5-turbo" # ๋˜๋Š” ๋‹ค๋ฅธ ์ ์ ˆํ•œ ๋ชจ๋ธ
print("HuggingFace Spaces ํ™˜๊ฒฝ ๊ฐ์ง€: OpenAI ๋ชจ๋ธ ์‚ฌ์šฉ")
else:
# ๋กœ์ปฌ ํ™˜๊ฒฝ์—์„œ๋Š” Ollama ์‚ฌ์šฉ
USE_OPENAI = os.getenv("USE_OPENAI", "False").lower() == "true"
LLM_MODEL = os.getenv("LLM_MODEL", "gemma3:latest" if not USE_OPENAI else "gpt-3.5-turbo")
OLLAMA_HOST = os.getenv("OLLAMA_HOST", "http://localhost:11434")
print(f"๋กœ์ปฌ ํ™˜๊ฒฝ: {'OpenAI' if USE_OPENAI else 'Ollama'} ๋ชจ๋ธ ์‚ฌ์šฉ")
# ์•ฑ ์„ค์ •
CHUNK_SIZE = 1000
CHUNK_OVERLAP = 200
TOP_K_RETRIEVAL = 5 # ๋ฒกํ„ฐ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ์ˆ˜
TOP_K_RERANK = 3 # ๋ฆฌ๋žญํ‚น ํ›„ ์„ ํƒํ•  ๊ฒฐ๊ณผ ์ˆ˜
PDF_DIRECTORY = "documents" # PDF ๋ฌธ์„œ๊ฐ€ ์ €์žฅ๋œ ๋””๋ ‰ํ† ๋ฆฌ
# ์Œ์„ฑ์ธ์‹ ์„ค์ •
STT_LANGUAGE = "Kor" # ๊ธฐ๋ณธ ์–ธ์–ด ์„ค์ • (Kor, Eng, Jpn, Chn ๋“ฑ)