RedHatAI
/

Mistral-Small-3.1-24B-Instruct-2503-quantized.w4a16

Image-Text-to-Text

compressed-tensors

Model card Files Files and versions

jennyyyi commited on May 16

Commit

0f771ca

·

verified ·

1 Parent(s): 72b1d45

Update README.md

Files changed (1) hide show

README.md +3 -3

README.md CHANGED Viewed

@@ -189,9 +189,9 @@ apiVersion: serving.kserve.io/v1beta1
 kind: InferenceService
 metadata:
   annotations:
-    openshift.io/display-name: Mistral-Small-3.1-24B-Instruct-2503-quantized.w4a16 # OPTIONAL CHANGE
     serving.kserve.io/deploymentMode: RawDeployment
-  name: Mistral-Small-3.1-24B-Instruct-2503-quantized.w4a16        # specify model name. This value will be used to invoke the model in the payload
   labels:
     opendatahub.io/dashboard: 'true'
 spec:
@@ -240,7 +240,7 @@ oc apply -f qwen-inferenceservice.yaml
 curl https://<inference-service-name>-predictor-default.<domain>/v1/chat/completions
         -H "Content-Type: application/json" \
         -d '{
-    "model": "Mistral-Small-3.1-24B-Instruct-2503-quantized.w4a16",
     "stream": true,
     "stream_options": {
         "include_usage": true

 kind: InferenceService
 metadata:
   annotations:
+    openshift.io/display-name: mistral-small-3-1-24b-instruct-2503-quantized-w4a16 # OPTIONAL CHANGE
     serving.kserve.io/deploymentMode: RawDeployment
+  name: mistral-small-3-1-24b-instruct-2503-quantized-w4a16       # specify model name. This value will be used to invoke the model in the payload
   labels:
     opendatahub.io/dashboard: 'true'
 spec:
 curl https://<inference-service-name>-predictor-default.<domain>/v1/chat/completions
         -H "Content-Type: application/json" \
         -d '{
+    "model": "mistral-small-3-1-24b-instruct-2503-quantized-w4a16",
     "stream": true,
     "stream_options": {
         "include_usage": true