model_info:
  name: anemll-Llama-3.1-Nemotron-Nano-8B-v1-ctx512
  version: 0.3.0
  description: |
    Demonstarates running Llama-3.1-Nemotron-Nano-8B-v1 on Apple Neural Engine
    Context length: 512
    Batch size: 64
    Chunks: 16
  license: MIT
  author: Anemll
  framework: Core ML
  language: Python
  parameters:
    context_length: 512
    batch_size: 64
    lut_embeddings: none
    lut_ffn: none
    lut_lmhead: none
    num_chunks: 16
    model_prefix: nemo_
    embeddings: nemo__embeddings.mlmodelc
    lm_head: nemo__lm_head.mlmodelc
    ffn: nemo__FFN_PF.mlmodelc