from ctransformers import AutoModelForCausalLM llm_model = AutoModelForCausalLM.from_pretrained( "TheBloke/zephyr-7B-alpha-GGUF", model_file="zephyr-7b-alpha.Q4_K_M.gguf", model_type="llama", max_new_tokens=256, temperature=0.7 )