MODEL_DIR="maykeye_tinyllama" | |
METADATA_FILE="maykeye_tinyllama-metadata.json" | |
############################################################################### | |
# Pull both model folder, llamafile (for the engine) and llama.cpp (for the conversion script) | |
echo == Prep Enviroment == | |
git submodule update --init | |
############################################################################### | |
echo == Build and prep the llamafile engine execuable == | |
pushd llamafile | |
make -j8 | |
make | |
# This is where each executables is located for reference purpose for now as of 2024-04-05 | |
# and was determined by running `sudo make install PREFIX=/usr/local` | |
# ./o/llamafile/zipalign --> /usr/local/bin/zipalign | |
# ./o/llama.cpp/main/main --> /usr/local/bin/llamafile | |
# ./o/llama.cpp/imatrix/imatrix --> /usr/local/bin/llamafile-imatrix | |
# ./o/llama.cpp/quantize/quantize --> /usr/local/bin/llamafile-quantize | |
# ./build/llamafile-convert --> /usr/local/bin/llamafile-convert | |
# ./o/llama.cpp/perplexity/perplexity --> /usr/local/bin/llamafile-perplexity | |
# ./o/llama.cpp/llava/llava-quantize --> /usr/local/bin/llava-quantize | |
popd | |
############################################################################### | |
echo == What is our llamafile name going to be? == | |
./llama.cpp/convert_hf_to_gguf.py ${MODEL_DIR} --metadata ${METADATA_FILE} --outtype f16 --dry-run 2>/dev/null | |
OUTFILE_PATH=$(./llama.cpp/convert_hf_to_gguf.py ${MODEL_DIR} --metadata ${METADATA_FILE} --outtype f16 --dry-run 2>/dev/null) | |
OUTFILE_FILE=$(basename ${OUTFILE_PATH}) | |
OUTFILE="${OUTFILE_FILE%.gguf}" | |
echo We will be aiming to generate $OUTFILE.llamafile | |
############################################################################### | |
echo == Convert from safetensor to gguf == | |
./llama.cpp/convert_hf_to_gguf.py ${MODEL_DIR} --metadata ${METADATA_FILE} --outtype f16 --verbose &>> convert_hf_to_gguf.output.txt | |
mv ${MODEL_DIR}/${OUTFILE}.gguf ${OUTFILE}.gguf | |
# Generate Diagnostics Dumpfile | |
./llama.cpp/gguf-py/scripts/gguf_dump.py --markdown ${OUTFILE}.gguf > ${OUTFILE}.dump.md | |
############################################################################### | |
echo == Generating Llamafile == | |
cp ./llamafile/o/llama.cpp/main/main ${OUTFILE}.llamafile | |
# Create an .args file with settings defaults | |
cat >.args <<EOF | |
-m | |
${OUTFILE}.gguf | |
EOF | |
# zip align engine, gguf and default args | |
./llamafile/o/llamafile/zipalign -j0 ${OUTFILE}.llamafile ${OUTFILE}.gguf .args | |
############################################################################### | |
echo == Test Output ./${OUTFILE}.llamafile == | |
./${OUTFILE}.llamafile --cli -p "hello world the gruff man said" &>> llamafile_output_example.output.txt | |