import argparse import json import string from jiwer import wer def normalize_text(text: str) -> str: """ Lowercase and remove punctuation from a string. Args: text (str): Input string Returns: str: Normalized string """ # Lowercase text = text.lower() # Remove punctuation text = text.translate(str.maketrans("", "", string.punctuation)) return text def load_transcripts(jsonl_path): originals = [] reconstructions = [] with open(jsonl_path, "r", encoding="utf-8") as f: for line in f: data = json.loads(line) originals.append(data["original_text"]) reconstructions.append(data["reconstructed_text"]) return originals, reconstructions def main(args): originals, reconstructions = map(normalize_text, load_transcripts(args.jsonl)) score = wer(originals, reconstructions) print(f"WER: {score:.3%}") if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--jsonl", type=str, required=True, help="Path to the transcript JSONL file" ) args = parser.parse_args() main(args)