Tyler Williams commited on
Commit
3c4ec38
·
1 Parent(s): 1b84476

chore: rename GGUF to apollo_astralis_8b.gguf and update docs

Browse files
MERGE_GUIDE.md CHANGED
@@ -144,7 +144,7 @@ python convert_hf_to_gguf.py ../apollo-astralis-8b-merged/ \
144
 
145
  # Quantize to Q4_K_M (recommended)
146
  ./llama-quantize apollo-astralis-8b-f16.gguf \
147
- apollo-astralis-8b-Q4_K_M.gguf Q4_K_M
148
  ```
149
 
150
  ### Step 3: Deploy with Ollama
@@ -152,7 +152,7 @@ python convert_hf_to_gguf.py ../apollo-astralis-8b-merged/ \
152
  ```bash
153
  # Create Modelfile
154
  cat > Modelfile <<EOF
155
- from ./apollo-astralis-8b-Q4_K_M.gguf
156
 
157
  template """<|im_start|>system
158
  {{ .System }}<|im_end|>
@@ -263,7 +263,7 @@ Available quantization formats:
263
 
264
  ```bash
265
  # Quantize to different formats
266
- ./llama-quantize apollo-astralis-8b-f16.gguf apollo-astralis-8b-Q4_K_M.gguf Q4_K_M
267
  ./llama-quantize apollo-astralis-8b-f16.gguf apollo-astralis-8b-Q5_K_M.gguf Q5_K_M
268
  ./llama-quantize apollo-astralis-8b-f16.gguf apollo-astralis-8b-Q8_0.gguf Q8_0
269
  ```
 
144
 
145
  # Quantize to Q4_K_M (recommended)
146
  ./llama-quantize apollo-astralis-8b-f16.gguf \
147
+ apollo_astralis_8b.gguf Q4_K_M
148
  ```
149
 
150
  ### Step 3: Deploy with Ollama
 
152
  ```bash
153
  # Create Modelfile
154
  cat > Modelfile <<EOF
155
+ from ./apollo_astralis_8b.gguf
156
 
157
  template """<|im_start|>system
158
  {{ .System }}<|im_end|>
 
263
 
264
  ```bash
265
  # Quantize to different formats
266
+ ./llama-quantize apollo-astralis-8b-f16.gguf apollo_astralis_8b.gguf Q4_K_M
267
  ./llama-quantize apollo-astralis-8b-f16.gguf apollo-astralis-8b-Q5_K_M.gguf Q5_K_M
268
  ./llama-quantize apollo-astralis-8b-f16.gguf apollo-astralis-8b-Q8_0.gguf Q8_0
269
  ```
README.md CHANGED
@@ -124,7 +124,7 @@ ollama run apollo-astralis-8b
124
 
125
  **Modelfile (Conservative - 256 tokens)**:
126
  ```dockerfile
127
- from ./apollo_astralis_8b_v5_conservative.Q4_K_M.gguf
128
 
129
  template """<|im_start|>system
130
  {{ .System }}<|im_end|>
@@ -146,7 +146,7 @@ system """You are Apollo, a collaborative AI assistant specializing in reasoning
146
 
147
  **Modelfile (Unlimited - for complex reasoning)**:
148
  ```dockerfile
149
- from ./apollo_astralis_8b_v5_conservative.Q4_K_M.gguf
150
 
151
  template """<|im_start|>system
152
  {{ .System }}<|im_end|>
 
124
 
125
  **Modelfile (Conservative - 256 tokens)**:
126
  ```dockerfile
127
+ from ./apollo_astralis_8b.gguf
128
 
129
  template """<|im_start|>system
130
  {{ .System }}<|im_end|>
 
146
 
147
  **Modelfile (Unlimited - for complex reasoning)**:
148
  ```dockerfile
149
+ from ./apollo_astralis_8b.gguf
150
 
151
  template """<|im_start|>system
152
  {{ .System }}<|im_end|>
UPLOAD_CHECKLIST.md CHANGED
@@ -107,13 +107,13 @@ If you want to include the quantized GGUF directly:
107
 
108
  ```bash
109
  # Copy GGUF to package directory
110
- cp /home/vanta/proving-ground/apollo_astralis_8b_v5_conservative.Q4_K_M.gguf .
111
 
112
  # Track with Git LFS
113
  git lfs track "*.gguf"
114
 
115
  # Add and push
116
- git add apollo_astralis_8b_v5_conservative.Q4_K_M.gguf
117
  git commit -m "Add Q4_K_M quantized GGUF model (4.7GB)"
118
  git push
119
  ```
 
107
 
108
  ```bash
109
  # Copy GGUF to package directory
110
+ cp /home/vanta/proving-ground/apollo_astralis_8b.gguf .
111
 
112
  # Track with Git LFS
113
  git lfs track "*.gguf"
114
 
115
  # Add and push
116
+ git add apollo_astralis_8b.gguf
117
  git commit -m "Add Q4_K_M quantized GGUF model (4.7GB)"
118
  git push
119
  ```
USAGE_GUIDE.md CHANGED
@@ -22,11 +22,11 @@ The simplest way to use Apollo Astralis:
22
  curl -fsSL https://ollama.ai/install.sh | sh
23
 
24
  # Download the GGUF model file
25
- wget https://huggingface.co/vanta-research/apollo-astralis-8b/resolve/main/apollo_astralis_8b_v5_conservative.Q4_K_M.gguf
26
 
27
  # Create Modelfile
28
  cat > Modelfile-apollo-astralis <<EOF
29
- from ./apollo_astralis_8b_v5_conservative.Q4_K_M.gguf
30
 
31
  template """<|im_start|>system
32
  {{ .System }}<|im_end|>
@@ -98,10 +98,10 @@ cd llama.cpp
98
  make
99
 
100
  # Download model
101
- wget https://huggingface.co/vanta-research/apollo-astralis-8b/resolve/main/apollo_astralis_8b_v5_conservative.Q4_K_M.gguf
102
 
103
  # Run inference
104
- ./main -m apollo_astralis_8b_v5_conservative.Q4_K_M.gguf \
105
  --prompt "Solve this problem: If x + 7 = 15, what is x?" \
106
  --temp 0.7 \
107
  --top-p 0.9 \
@@ -117,7 +117,7 @@ Best for most tasks with balanced response length:
117
 
118
  ```dockerfile
119
  # Modelfile
120
- from ./apollo_astralis_8b_v5_conservative.Q4_K_M.gguf
121
 
122
  template """<|im_start|>system
123
  {{ .System }}<|im_end|>
@@ -143,7 +143,7 @@ For multi-step reasoning requiring extended chain-of-thought:
143
 
144
  ```dockerfile
145
  # Modelfile-unlimited
146
- from ./apollo_astralis_8b_v5_conservative.Q4_K_M.gguf
147
 
148
  template """<|im_start|>system
149
  {{ .System }}<|im_end|>
 
22
  curl -fsSL https://ollama.ai/install.sh | sh
23
 
24
  # Download the GGUF model file
25
+ wget https://huggingface.co/vanta-research/apollo-astralis-8b/resolve/main/apollo_astralis_8b.gguf
26
 
27
  # Create Modelfile
28
  cat > Modelfile-apollo-astralis <<EOF
29
+ from ./apollo_astralis_8b.gguf
30
 
31
  template """<|im_start|>system
32
  {{ .System }}<|im_end|>
 
98
  make
99
 
100
  # Download model
101
+ wget https://huggingface.co/vanta-research/apollo-astralis-8b/resolve/main/apollo_astralis_8b.gguf
102
 
103
  # Run inference
104
+ ./main -m apollo_astralis_8b.gguf \
105
  --prompt "Solve this problem: If x + 7 = 15, what is x?" \
106
  --temp 0.7 \
107
  --top-p 0.9 \
 
117
 
118
  ```dockerfile
119
  # Modelfile
120
+ from ./apollo_astralis_8b.gguf
121
 
122
  template """<|im_start|>system
123
  {{ .System }}<|im_end|>
 
143
 
144
  ```dockerfile
145
  # Modelfile-unlimited
146
+ from ./apollo_astralis_8b.gguf
147
 
148
  template """<|im_start|>system
149
  {{ .System }}<|im_end|>
apollo_astralis_8b_v5_conservative.Q4_K_M.gguf → apollo_astralis_8b.gguf RENAMED
File without changes