Spaces:
Running
on
Zero
Running
on
Zero
| import argparse | |
| import json | |
| import string | |
| from jiwer import wer | |
| def normalize_text(text: str) -> str: | |
| """ | |
| Lowercase and remove punctuation from a string. | |
| Args: | |
| text (str): Input string | |
| Returns: | |
| str: Normalized string | |
| """ | |
| # Lowercase | |
| text = text.lower() | |
| # Remove punctuation | |
| text = text.translate(str.maketrans("", "", string.punctuation)) | |
| return text | |
| def load_transcripts(jsonl_path): | |
| originals = [] | |
| reconstructions = [] | |
| with open(jsonl_path, "r", encoding="utf-8") as f: | |
| for line in f: | |
| data = json.loads(line) | |
| originals.append(data["original_text"]) | |
| reconstructions.append(data["reconstructed_text"]) | |
| return originals, reconstructions | |
| def main(args): | |
| originals, reconstructions = map(normalize_text, load_transcripts(args.jsonl)) | |
| score = wer(originals, reconstructions) | |
| print(f"WER: {score:.3%}") | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument( | |
| "--jsonl", type=str, required=True, help="Path to the transcript JSONL file" | |
| ) | |
| args = parser.parse_args() | |
| main(args) | |