Wizard-Vicuna-7B-Uncensored-GGML

Runtime error

App Files Files Community

ffreemt commited on Jul 8, 2023

Commit

2327177

1 Parent(s): 7e27981

Update threads=psutil.cpu_count(logical=False)

Browse files

Files changed (2) hide show

app.py +19 -36
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -5,10 +5,11 @@ import os
 import time
 from dataclasses import asdict, dataclass
 from pathlib import Path
-from urllib.parse import urlparse
 from types import SimpleNamespace
 import gradio as gr
 from about_time import about_time
 # from ctransformers import AutoConfig, AutoModelForCausalLM
@@ -16,6 +17,12 @@ from ctransformers import AutoModelForCausalLM
 from huggingface_hub import hf_hub_download
 from loguru import logger
 os.environ["TZ"] = "Asia/Shanghai"
 try:
     time.tzset()  # type: ignore # pylint: disable=no-member
@@ -230,36 +237,6 @@ def generate(
         **asdict(generation_config),
     )
-_ = '''
-_ = """full url: https://huggingface.co/TheBloke/mpt-30B-chat-GGML/blob/main/mpt-30b-chat.ggmlv0.q4_1.bin"""
-# https://huggingface.co/TheBloke/mpt-30B-chat-GGML
-_ = """
-mpt-30b-chat.ggmlv0.q4_0.bin 	q4_0 	4 	16.85 GB 	19.35 GB 	4-bit.
-mpt-30b-chat.ggmlv0.q4_1.bin 	q4_1 	4 	18.73 GB 	21.23 GB 	4-bit. Higher accuracy than q4_0 but not as high as q5_0. However has quicker inference than q5 models.
-mpt-30b-chat.ggmlv0.q5_0.bin 	q5_0 	5 	20.60 GB 	23.10 GB
-mpt-30b-chat.ggmlv0.q5_1.bin 	q5_1 	5 	22.47 GB 	24.97 GB
-mpt-30b-chat.ggmlv0.q8_0.bin 	q8_0 	8 	31.83 GB 	34.33 GB
-"""
-MODEL_FILENAME = "mpt-30b-chat.ggmlv0.q4_1.bin"
-MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_0.bin"  # 10.7G
-MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_1.bin"  # 11.9G
-MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_1.bin"  # 11.9G
-# https://huggingface.co/TheBloke/WizardLM-13B-V1.0-Uncensored-GGML
-MODEL_FILENAME = "wizardlm-13b-v1.0-uncensored.ggmlv3.q4_1.bin"  # 8.4G
-# '''
-URL = "https://huggingface.co/TheBloke/Wizard-Vicuna-7B-Uncensored-GGML/raw/main/Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_K_M.bin"
-MODEL_FILENAME = Path(URL).name
-REPO_ID = "/".join(urlparse(url).path.strip('/').split('/')[:2])  # TheBloke/Wizard-Vicuna-7B-Uncensored-GGML
-DESTINATION_FOLDER = "models"
-logger.info(f"start dl, {REPO_ID=}, {MODEL_FILENAME=}, {DESTINATION_FOLDER=}")
-download_quant(DESTINATION_FOLDER, REPO_ID, MODEL_FILENAME)
-logger.info("done dl")
 # if "mpt" in model_filename:
 #     config = AutoConfig.from_pretrained("mosaicml/mpt-30b-cha t", context_length=8192)
 #     llm = AutoModelForCausalLM.from_pretrained(
@@ -278,7 +255,16 @@ llm = AutoModelForCausalLM.from_pretrained(
 )
 # """
-logger.debug(f"{os.cpu_count()=}")
 logger.info("load llm")
 _ = Path("models", MODEL_FILENAME).absolute().as_posix()
@@ -288,14 +274,11 @@ LLM = AutoModelForCausalLM.from_pretrained(
     REPO_ID,  # DESTINATION_FOLDER,  # model_path_or_repo_id: str required
     model_file=_,
     model_type="llama",   # "starcoder",  AutoConfig.from_pretrained("TheBloke/WizardLM-13B-V1.0-Uncensored-GGML")
-    threads=os.cpu_count() // 2,  # type: ignore
 )
 logger.info("done load llm")
-cpu_count = os.cpu_count() // 2  # type: ignore
-logger.debug(f"{cpu_count=}")
 GENERATION_CONFIG = GenerationConfig(
     temperature=0.2,
     top_k=0,

 import time
 from dataclasses import asdict, dataclass
 from pathlib import Path
 from types import SimpleNamespace
+from urllib.parse import urlparse
 import gradio as gr
+import psutil
 from about_time import about_time
 # from ctransformers import AutoConfig, AutoModelForCausalLM
 from huggingface_hub import hf_hub_download
 from loguru import logger
+URL = "https://huggingface.co/TheBloke/Wizard-Vicuna-7B-Uncensored-GGML/raw/main/Wizard-Vicuna-7B-Uncensored.ggmlv3.q4_K_M.bin"
+MODEL_FILENAME = Path(URL).name
+REPO_ID = "/".join(urlparse(URL).path.strip('/').split('/')[:2])  # TheBloke/Wizard-Vicuna-7B-Uncensored-GGML
+DESTINATION_FOLDER = "models"
 os.environ["TZ"] = "Asia/Shanghai"
 try:
     time.tzset()  # type: ignore # pylint: disable=no-member
         **asdict(generation_config),
     )
 # if "mpt" in model_filename:
 #     config = AutoConfig.from_pretrained("mosaicml/mpt-30b-cha t", context_length=8192)
 #     llm = AutoModelForCausalLM.from_pretrained(
 )
 # """
+logger.info(f"start dl, {REPO_ID=}, {MODEL_FILENAME=}, {DESTINATION_FOLDER=}")
+download_quant(DESTINATION_FOLDER, REPO_ID, MODEL_FILENAME)
+logger.info("done dl")
+logger.debug(f"{os.cpu_count()=} {psutil.cpu_count(logical=False)=}")
+cpu_count = os.cpu_count() // 2  # type: ignore
+cpu_count = psutil.cpu_count(logical=False)
+logger.debug(f"{cpu_count=}")
 logger.info("load llm")
 _ = Path("models", MODEL_FILENAME).absolute().as_posix()
     REPO_ID,  # DESTINATION_FOLDER,  # model_path_or_repo_id: str required
     model_file=_,
     model_type="llama",   # "starcoder",  AutoConfig.from_pretrained("TheBloke/WizardLM-13B-V1.0-Uncensored-GGML")
+    threads=cpu_count,
 )
 logger.info("done load llm")
 GENERATION_CONFIG = GenerationConfig(
     temperature=0.2,
     top_k=0,

requirements.txt CHANGED Viewed

@@ -3,4 +3,5 @@ transformers==4.30.2
 huggingface_hub
 gradio
 loguru
-about-time

 huggingface_hub
 gradio
 loguru
+about-time
+psutil