pardi-speech / codec /scripts /compute_wer.py
Mehdi Lakbar
Initial demo of Lina-speech (pardi-speech)
56cfa73
import argparse
import json
import string
from jiwer import wer
def normalize_text(text: str) -> str:
"""
Lowercase and remove punctuation from a string.
Args:
text (str): Input string
Returns:
str: Normalized string
"""
# Lowercase
text = text.lower()
# Remove punctuation
text = text.translate(str.maketrans("", "", string.punctuation))
return text
def load_transcripts(jsonl_path):
originals = []
reconstructions = []
with open(jsonl_path, "r", encoding="utf-8") as f:
for line in f:
data = json.loads(line)
originals.append(data["original_text"])
reconstructions.append(data["reconstructed_text"])
return originals, reconstructions
def main(args):
originals, reconstructions = map(normalize_text, load_transcripts(args.jsonl))
score = wer(originals, reconstructions)
print(f"WER: {score:.3%}")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--jsonl", type=str, required=True, help="Path to the transcript JSONL file"
)
args = parser.parse_args()
main(args)