Spaces:

ikReza
/

bangladesh-traffic-rules

Sleeping

Ibrahim Kaiser

fixed pdf download issue-1

3478980 3 months ago

1.09 kB

	import os
	from dotenv import load_dotenv

	load_dotenv(override=True)

	# Model configuration
	EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
	LLM_MODEL = "z-ai/glm-4.5-air:free"
	LLM_BASE_URL = "https://openrouter.ai/api/v1"
	LLM_TEMPERATURE = 0.7

	# API configuration
	OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")

	# Vector store configuration
	VECTOR_STORE_PATH = "chroma_db"

	# Hugging Face dataset configuration
	HF_REPO_ID = "ikReza/traffic_rules"
	HF_FILENAME = "traffic_rules.pdf"
	HF_REPO_TYPE = "dataset"

	# Text chunking configuration
	# These settings determine how the PDF document is broken into manageable pieces

	# CHUNK_SIZE: The maximum number of characters in each chunk
	# Larger chunks preserve more context but may be less precise
	CHUNK_SIZE = 500

	# CHUNK_OVERLAP: The number of characters that overlap between adjacent chunks
	# Overlap helps ensure that important information isn't split between chunks
	CHUNK_OVERLAP = 50

	# BATCH_SIZE: The number of chunks to process at once
	# Processing in batches improves efficiency and prevents timeouts
	BATCH_SIZE = 100