aamirhameed commited on
Commit
ebe9d57
·
verified ·
1 Parent(s): aae29e6

Update config.py

Browse files
Files changed (1) hide show
  1. config.py +60 -23
config.py CHANGED
@@ -1,23 +1,60 @@
1
- from pathlib import Path
2
-
3
- class Config:
4
- """Configuration class for all system settings"""
5
- # File paths
6
- KNOWLEDGE_DIR = Path("knowledge_base") # Directory for all knowledge files
7
- VECTOR_STORE_PATH = Path("vector_store") # Directory for FAISS index
8
- BM25_STORE_PATH = Path("vector_store/bm25.pkl") # Serialized BM25 retriever
9
-
10
- # Text processing
11
- CHUNK_SIZE = 1000 # Optimal for balance between context and retrieval
12
- CHUNK_OVERLAP = 200
13
- MAX_CONTEXT_CHUNKS = 5 # Number of chunks to retrieve
14
-
15
- # Performance
16
- CACHE_EXPIRY_HOURS = 24
17
- RELEVANCE_THRESHOLD = 0.72 # Strict similarity threshold
18
-
19
- @classmethod
20
- def setup_dirs(cls):
21
- """Ensure required directories exist"""
22
- cls.KNOWLEDGE_DIR.mkdir(exist_ok=True)
23
- cls.VECTOR_STORE_PATH.mkdir(exist_ok=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+ from pathlib import Path
4
+
5
+ class Config:
6
+ # Use temporary directories for HF Spaces
7
+ BASE_DIR = Path(tempfile.gettempdir()) / "sirraya_xbrain"
8
+
9
+ # Core directories
10
+ KNOWLEDGE_DIR = BASE_DIR / "knowledge"
11
+ VECTOR_STORE_PATH = BASE_DIR / "vector_store"
12
+ BM25_STORE_PATH = BASE_DIR / "bm25_store.pkl"
13
+
14
+ # Processing parameters
15
+ CHUNK_SIZE = 1000
16
+ CHUNK_OVERLAP = 200
17
+ MAX_CONTEXT_CHUNKS = 5
18
+
19
+ # Model settings
20
+ EMBEDDING_MODEL = "sentence-transformers/all-mpnet-base-v2"
21
+ FALLBACK_EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
22
+
23
+ # HF Models (in order of preference)
24
+ PRIMARY_LLM = "mistralai/Mistral-7B-Instruct-v0.1"
25
+ FALLBACK_LLMS = [
26
+ "microsoft/DialoGPT-medium",
27
+ "google/flan-t5-base",
28
+ "huggingface/CodeBERTa-small-v1"
29
+ ]
30
+
31
+ # API settings
32
+ LLM_TEMPERATURE = 0.1
33
+ MAX_NEW_TOKENS = 512
34
+ REQUEST_TIMEOUT = 60
35
+
36
+ @classmethod
37
+ def setup_dirs(cls):
38
+ """Setup required directories"""
39
+ cls.BASE_DIR.mkdir(parents=True, exist_ok=True)
40
+ cls.KNOWLEDGE_DIR.mkdir(parents=True, exist_ok=True)
41
+ cls.VECTOR_STORE_PATH.mkdir(parents=True, exist_ok=True)
42
+
43
+ @classmethod
44
+ def get_hf_token(cls):
45
+ """Get HuggingFace API token from environment"""
46
+ return os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN")
47
+
48
+ @classmethod
49
+ def cleanup_temp_dirs(cls):
50
+ """Cleanup temporary directories"""
51
+ try:
52
+ import shutil
53
+ if cls.BASE_DIR.exists():
54
+ shutil.rmtree(cls.BASE_DIR)
55
+ except Exception as e:
56
+ print(f"Cleanup error: {e}")
57
+
58
+ # Environment-specific settings
59
+ IS_HF_SPACES = os.getenv("SPACE_ID") is not None
60
+ IS_LOCAL = not IS_HF_SPACES