masx-openchat-llm / model_loader.py
ateetvatan
formatting + removed cpu parralism
e6a07cc
raw
history blame contribute delete
808 Bytes
import os
from dotenv import load_dotenv
from ctransformers import AutoModelForCausalLM
# Optional: Set Hugging Face cache dir
os.environ.setdefault("HF_HOME", os.path.expanduser("~/.hf_home"))
# Load variables from .env if available
load_dotenv()
# === High-Precision GGUF Model Configuration ===
MODEL_REPO = os.getenv("MODEL_REPO", "TheBloke/openchat-3.5-0106-GGUF")
MODEL_FILE = os.getenv("MODEL_FILE", "openchat-3.5-0106.Q8_0.gguf")
MODEL_TYPE = os.getenv("MODEL_TYPE", "mistral") # OpenChat 3.5 is Mistral-compatible
CTX_LEN = int(os.getenv("CTX_LEN", "8192")) # Use full 8K context
# === Load Model ===
model = AutoModelForCausalLM.from_pretrained(
MODEL_REPO,
model_file=MODEL_FILE,
model_type=MODEL_TYPE,
context_length=CTX_LEN,
gpu_layers=0,
local_files_only=False,
)