magicunicorn's picture
Upload large-v3 NPU model - 220x speedup
554083a verified
{
"model_family": "whisper",
"variant": "large-v3",
"hardware_target": "amd_npu",
"precision": "int8",
"quantization": {
"method": "INT8",
"calibration_dataset": "librispeech_100h",
"calibration_samples": 10000,
"symmetric": true,
"per_channel": true
},
"performance": {
"speedup": "220x",
"rtf": 0.0045,
"accuracy": "99%",
"tokens_per_sec": 4789,
"power": "10W"
},
"unicorn_engine": {
"version": "1.0.0",
"backend": "amd_npu",
"kernel": "mlir_aie2",
"optimization_level": 3
},
"hardware_requirements": {
"npu": "AMD XDNA 16 TOPS",
"min_driver": "1.0.0",
"supported_cpus": [
"7040",
"7045",
"8040",
"8045"
]
}
}