Ibrahim Kaiser
fixed pdf download issue-1
3478980
import os
from dotenv import load_dotenv
load_dotenv(override=True)
# Model configuration
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
LLM_MODEL = "z-ai/glm-4.5-air:free"
LLM_BASE_URL = "https://openrouter.ai/api/v1"
LLM_TEMPERATURE = 0.7
# API configuration
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
# Vector store configuration
VECTOR_STORE_PATH = "chroma_db"
# Hugging Face dataset configuration
HF_REPO_ID = "ikReza/traffic_rules"
HF_FILENAME = "traffic_rules.pdf"
HF_REPO_TYPE = "dataset"
# Text chunking configuration
# These settings determine how the PDF document is broken into manageable pieces
# CHUNK_SIZE: The maximum number of characters in each chunk
# Larger chunks preserve more context but may be less precise
CHUNK_SIZE = 500
# CHUNK_OVERLAP: The number of characters that overlap between adjacent chunks
# Overlap helps ensure that important information isn't split between chunks
CHUNK_OVERLAP = 50
# BATCH_SIZE: The number of chunks to process at once
# Processing in batches improves efficiency and prevents timeouts
BATCH_SIZE = 100