Spaces:
Sleeping
Sleeping
| import os | |
| from dotenv import load_dotenv | |
| load_dotenv(override=True) | |
| # Model configuration | |
| EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2" | |
| LLM_MODEL = "z-ai/glm-4.5-air:free" | |
| LLM_BASE_URL = "https://openrouter.ai/api/v1" | |
| LLM_TEMPERATURE = 0.7 | |
| # API configuration | |
| OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY") | |
| # Vector store configuration | |
| VECTOR_STORE_PATH = "chroma_db" | |
| # Hugging Face dataset configuration | |
| HF_REPO_ID = "ikReza/traffic_rules" | |
| HF_FILENAME = "traffic_rules.pdf" | |
| HF_REPO_TYPE = "dataset" | |
| # Text chunking configuration | |
| # These settings determine how the PDF document is broken into manageable pieces | |
| # CHUNK_SIZE: The maximum number of characters in each chunk | |
| # Larger chunks preserve more context but may be less precise | |
| CHUNK_SIZE = 500 | |
| # CHUNK_OVERLAP: The number of characters that overlap between adjacent chunks | |
| # Overlap helps ensure that important information isn't split between chunks | |
| CHUNK_OVERLAP = 50 | |
| # BATCH_SIZE: The number of chunks to process at once | |
| # Processing in batches improves efficiency and prevents timeouts | |
| BATCH_SIZE = 100 |