whisper-large-v3-fp16-ov

Description

This is whisper-large-v3 model converted to the OpenVINO™ IR (Intermediate Representation) format with weights compressed to FP16.

Compatibility

The provided OpenVINO™ IR model is compatible with:

  • OpenVINO version 2025.2.0 and higher
  • Optimum Intel 1.23.0 and higher
optimum-cli export openvino --trust-remote-code --model openai/whisper-large-v3-turbo --weight-format int4 --disable-stateful whisper-large-v3-turbo-int4-ov
#!/usr/bin/env python3
import time
import requests
import openvino_genai
import librosa
from pathlib import Path
from huggingface_hub import snapshot_download


def download_model(model_id="FluidInference/whisper-large-v3-turbo-int4-ov-npu"):
    """Download model from HuggingFace Hub"""
    local_dir = Path("models") / model_id.split("/")[-1]

    if local_dir.exists() and any(local_dir.iterdir()):
        return str(local_dir)

    print(f"Downloading model...")
    snapshot_download(
        repo_id=model_id,
        local_dir=str(local_dir),
        local_dir_use_symlinks=False
    )
    return str(local_dir)


def download_hf_audio_samples():
    """Download audio samples from Hugging Face"""
    samples_dir = Path("sample_audios")
    samples_dir.mkdir(exist_ok=True)

    downloaded = []
    whisper_samples = [
        ("https://cdn-media.huggingface.co/speech_samples/sample1.flac", "sample1.flac"),
        ("https://cdn-media.huggingface.co/speech_samples/sample2.flac", "sample2.flac"),
    ]

    for url, filename in whisper_samples:
        filepath = samples_dir / filename
        if filepath.exists():
            downloaded.append(str(filepath))
            continue

        try:
            response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
            response.raise_for_status()

            with open(filepath, 'wb') as f:
                f.write(response.content)

            downloaded.append(str(filepath))
        except Exception as e:
            print(f"Error downloading {filename}: {e}")

    return downloaded


def read_audio(filepath):
    """Read audio file and convert to 16kHz"""
    try:
        raw_speech, _ = librosa.load(filepath, sr=16000)
        return raw_speech.tolist()
    except Exception as e:
        print(f"Error reading {filepath}: {e}")
        return None


def test_whisper_on_file(pipe, filepath):
    """Test Whisper on a single audio file"""
    config = pipe.get_generation_config()
    config.language = "<|en|>"
    config.task = "transcribe"
    config.return_timestamps = True
    config.max_new_tokens = 448

    raw_speech = read_audio(filepath)
    if raw_speech is None:
        return None

    duration = len(raw_speech) / 16000

    start_time = time.time()
    result = pipe.generate(raw_speech, config)
    inference_time = time.time() - start_time

    return {
        "file": filepath,
        "duration": duration,
        "inference_time": inference_time,
        "rtf": inference_time/duration,
        "transcription": str(result)
    }


def main():
    # Download model
    model_path = download_model()

    # Initialize pipeline on NPU
    print(f"\nInitializing NPU...")
    start_time = time.time()
    pipe = openvino_genai.WhisperPipeline(model_path, "NPU")
    init_time = time.time() - start_time

    results = []

    # Collect test files
    test_files = []
    test_files.extend(Path(".").glob("*.wav"))

    if Path("samples/c/whisper_speech_recognition").exists():
        test_files.extend(Path("samples/c/whisper_speech_recognition").glob("*.wav"))

    # Download HF samples
    hf_samples = download_hf_audio_samples()
    test_files.extend([Path(f) for f in hf_samples])

    # Test all files
    print(f"\nTesting {len(test_files)} files...")
    for audio_file in test_files:
        result = test_whisper_on_file(pipe, str(audio_file))
        if result:
            results.append(result)
            print(f"[OK] {Path(result['file']).name}: RTF={result['rtf']:.2f}x")

    # Print summary
    if results:
        total_duration = sum(r["duration"] for r in results)
        total_inference = sum(r["inference_time"] for r in results)
        avg_rtf = total_inference / total_duration

        print(f"\n{'='*50}")
        print(f"NPU Performance Summary")
        print(f"{'='*50}")
        print(f"Model load time: {init_time:.1f}s")
        print(f"Files tested: {len(results)}")
        print(f"Total audio: {total_duration:.1f}s")
        print(f"Total inference: {total_inference:.1f}s")
        print(f"Average RTF: {avg_rtf:.2f}x {'[Faster than real-time]' if avg_rtf < 1 else '[Slower than real-time]'}")

        print(f"\nResults:")
        for r in results:
            trans = r['transcription'].strip()
            if len(trans) > 60:
                trans = trans[:57] + "..."
            print(f"- {Path(r['file']).name}: \"{trans}\"")


if __name__ == "__main__":
    main()
Downloads last month
1
Inference Providers NEW
This model isn't deployed by any Inference Provider. 🙋 Ask for provider support

Model tree for FluidInference/whisper-large-v3-turbo-int4-ov-npu

Finetuned
(317)
this model

Collection including FluidInference/whisper-large-v3-turbo-int4-ov-npu