Spaces:
Runtime error
Runtime error
ffreemt
commited on
Commit
·
2327177
1
Parent(s):
7e27981
Update threads=psutil.cpu_count(logical=False)
Browse files- app.py +19 -36
- requirements.txt +2 -1
app.py
CHANGED
|
@@ -5,10 +5,11 @@ import os
|
|
| 5 |
import time
|
| 6 |
from dataclasses import asdict, dataclass
|
| 7 |
from pathlib import Path
|
| 8 |
-
from urllib.parse import urlparse
|
| 9 |
from types import SimpleNamespace
|
|
|
|
| 10 |
|
| 11 |
import gradio as gr
|
|
|
|
| 12 |
from about_time import about_time
|
| 13 |
|
| 14 |
# from ctransformers import AutoConfig, AutoModelForCausalLM
|
|
@@ -16,6 +17,12 @@ from ctransformers import AutoModelForCausalLM
|
|
| 16 |
from huggingface_hub import hf_hub_download
|
| 17 |
from loguru import logger
|
| 18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
os.environ["TZ"] = "Asia/Shanghai"
|
| 20 |
try:
|
| 21 |
time.tzset() # type: ignore # pylint: disable=no-member
|
|
@@ -230,36 +237,6 @@ def generate(
|
|
| 230 |
**asdict(generation_config),
|
| 231 |
)
|
| 232 |
|
| 233 |
-
_ = '''
|
| 234 |
-
_ = """full url: https://huggingface.co/TheBloke/mpt-30B-chat-GGML/blob/main/mpt-30b-chat.ggmlv0.q4_1.bin"""
|
| 235 |
-
|
| 236 |
-
# https://huggingface.co/TheBloke/mpt-30B-chat-GGML
|
| 237 |
-
_ = """
|
| 238 |
-
mpt-30b-chat.ggmlv0.q4_0.bin q4_0 4 16.85 GB 19.35 GB 4-bit.
|
| 239 |
-
mpt-30b-chat.ggmlv0.q4_1.bin q4_1 4 18.73 GB 21.23 GB 4-bit. Higher accuracy than q4_0 but not as high as q5_0. However has quicker inference than q5 models.
|
| 240 |
-
mpt-30b-chat.ggmlv0.q5_0.bin q5_0 5 20.60 GB 23.10 GB
|
| 241 |
-
mpt-30b-chat.ggmlv0.q5_1.bin q5_1 5 22.47 GB 24.97 GB
|
| 242 |
-
mpt-30b-chat.ggmlv0.q8_0.bin q8_0 8 31.83 GB 34.33 GB
|
| 243 |
-
"""
|
| 244 |
-
MODEL_FILENAME = "mpt-30b-chat.ggmlv0.q4_1.bin"
|
| 245 |
-
MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_0.bin" # 10.7G
|
| 246 |
-
MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_1.bin" # 11.9G
|
| 247 |
-
MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_1.bin" # 11.9G
|
| 248 |
-
|
| 249 |
-
# https://huggingface.co/TheBloke/WizardLM-13B-V1.0-Uncensored-GGML
|
| 250 |
-
MODEL_FILENAME = "wizardlm-13b-v1.0-uncensored.ggmlv3.q4_1.bin" # 8.4G
|
| 251 |
-
# '''
|
| 252 |
-
|
| 253 |
-
URL = "https://huggingface.co/TheBloke/Wizard-Vicuna-7B-Uncensored-GGML/raw/main/Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_K_M.bin"
|
| 254 |
-
MODEL_FILENAME = Path(URL).name
|
| 255 |
-
REPO_ID = "/".join(urlparse(url).path.strip('/').split('/')[:2]) # TheBloke/Wizard-Vicuna-7B-Uncensored-GGML
|
| 256 |
-
|
| 257 |
-
DESTINATION_FOLDER = "models"
|
| 258 |
-
|
| 259 |
-
logger.info(f"start dl, {REPO_ID=}, {MODEL_FILENAME=}, {DESTINATION_FOLDER=}")
|
| 260 |
-
download_quant(DESTINATION_FOLDER, REPO_ID, MODEL_FILENAME)
|
| 261 |
-
logger.info("done dl")
|
| 262 |
-
|
| 263 |
# if "mpt" in model_filename:
|
| 264 |
# config = AutoConfig.from_pretrained("mosaicml/mpt-30b-cha t", context_length=8192)
|
| 265 |
# llm = AutoModelForCausalLM.from_pretrained(
|
|
@@ -278,7 +255,16 @@ llm = AutoModelForCausalLM.from_pretrained(
|
|
| 278 |
)
|
| 279 |
# """
|
| 280 |
|
| 281 |
-
logger.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 282 |
logger.info("load llm")
|
| 283 |
|
| 284 |
_ = Path("models", MODEL_FILENAME).absolute().as_posix()
|
|
@@ -288,14 +274,11 @@ LLM = AutoModelForCausalLM.from_pretrained(
|
|
| 288 |
REPO_ID, # DESTINATION_FOLDER, # model_path_or_repo_id: str required
|
| 289 |
model_file=_,
|
| 290 |
model_type="llama", # "starcoder", AutoConfig.from_pretrained("TheBloke/WizardLM-13B-V1.0-Uncensored-GGML")
|
| 291 |
-
threads=
|
| 292 |
)
|
| 293 |
|
| 294 |
logger.info("done load llm")
|
| 295 |
|
| 296 |
-
cpu_count = os.cpu_count() // 2 # type: ignore
|
| 297 |
-
logger.debug(f"{cpu_count=}")
|
| 298 |
-
|
| 299 |
GENERATION_CONFIG = GenerationConfig(
|
| 300 |
temperature=0.2,
|
| 301 |
top_k=0,
|
|
|
|
| 5 |
import time
|
| 6 |
from dataclasses import asdict, dataclass
|
| 7 |
from pathlib import Path
|
|
|
|
| 8 |
from types import SimpleNamespace
|
| 9 |
+
from urllib.parse import urlparse
|
| 10 |
|
| 11 |
import gradio as gr
|
| 12 |
+
import psutil
|
| 13 |
from about_time import about_time
|
| 14 |
|
| 15 |
# from ctransformers import AutoConfig, AutoModelForCausalLM
|
|
|
|
| 17 |
from huggingface_hub import hf_hub_download
|
| 18 |
from loguru import logger
|
| 19 |
|
| 20 |
+
URL = "https://huggingface.co/TheBloke/Wizard-Vicuna-7B-Uncensored-GGML/raw/main/Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_K_M.bin"
|
| 21 |
+
MODEL_FILENAME = Path(URL).name
|
| 22 |
+
REPO_ID = "/".join(urlparse(URL).path.strip('/').split('/')[:2]) # TheBloke/Wizard-Vicuna-7B-Uncensored-GGML
|
| 23 |
+
|
| 24 |
+
DESTINATION_FOLDER = "models"
|
| 25 |
+
|
| 26 |
os.environ["TZ"] = "Asia/Shanghai"
|
| 27 |
try:
|
| 28 |
time.tzset() # type: ignore # pylint: disable=no-member
|
|
|
|
| 237 |
**asdict(generation_config),
|
| 238 |
)
|
| 239 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
# if "mpt" in model_filename:
|
| 241 |
# config = AutoConfig.from_pretrained("mosaicml/mpt-30b-cha t", context_length=8192)
|
| 242 |
# llm = AutoModelForCausalLM.from_pretrained(
|
|
|
|
| 255 |
)
|
| 256 |
# """
|
| 257 |
|
| 258 |
+
logger.info(f"start dl, {REPO_ID=}, {MODEL_FILENAME=}, {DESTINATION_FOLDER=}")
|
| 259 |
+
download_quant(DESTINATION_FOLDER, REPO_ID, MODEL_FILENAME)
|
| 260 |
+
logger.info("done dl")
|
| 261 |
+
|
| 262 |
+
logger.debug(f"{os.cpu_count()=} {psutil.cpu_count(logical=False)=}")
|
| 263 |
+
cpu_count = os.cpu_count() // 2 # type: ignore
|
| 264 |
+
cpu_count = psutil.cpu_count(logical=False)
|
| 265 |
+
|
| 266 |
+
logger.debug(f"{cpu_count=}")
|
| 267 |
+
|
| 268 |
logger.info("load llm")
|
| 269 |
|
| 270 |
_ = Path("models", MODEL_FILENAME).absolute().as_posix()
|
|
|
|
| 274 |
REPO_ID, # DESTINATION_FOLDER, # model_path_or_repo_id: str required
|
| 275 |
model_file=_,
|
| 276 |
model_type="llama", # "starcoder", AutoConfig.from_pretrained("TheBloke/WizardLM-13B-V1.0-Uncensored-GGML")
|
| 277 |
+
threads=cpu_count,
|
| 278 |
)
|
| 279 |
|
| 280 |
logger.info("done load llm")
|
| 281 |
|
|
|
|
|
|
|
|
|
|
| 282 |
GENERATION_CONFIG = GenerationConfig(
|
| 283 |
temperature=0.2,
|
| 284 |
top_k=0,
|
requirements.txt
CHANGED
|
@@ -3,4 +3,5 @@ transformers==4.30.2
|
|
| 3 |
huggingface_hub
|
| 4 |
gradio
|
| 5 |
loguru
|
| 6 |
-
about-time
|
|
|
|
|
|
| 3 |
huggingface_hub
|
| 4 |
gradio
|
| 5 |
loguru
|
| 6 |
+
about-time
|
| 7 |
+
psutil
|