Spaces:

mehdi999
/

pardi-speech

Running on Zero

pardi-speech / codec /scripts /compute_wer_from_refs.py

Mehdi Lakbar

Initial demo of Lina-speech (pardi-speech)

56cfa73 25 days ago

1.65 kB

	import argparse
	import json
	import string
	from pathlib import Path

	from jiwer import cer, wer


	def normalize_text(text: str) -> str:
	"""
	Lowercase and remove punctuation from a string.

	Args:
	text (str): Input string

	Returns:
	str: Normalized string
	"""
	# Lowercase
	text = text.lower()
	# Remove punctuation
	text = text.translate(str.maketrans("", "", string.punctuation))
	return text


	def load_jsonl_dict(path):
	transcripts = {}
	with open(path, "r", encoding="utf-8") as f:
	for line in f:
	data = json.loads(line)
	transcripts[Path(data["file"]).name] = data["transcript"]
	return transcripts


	def main(args):
	ref_dict = load_jsonl_dict(args.reference)
	hyp_dict = load_jsonl_dict(args.hypothesis)

	common_files = set(ref_dict.keys()) & set(hyp_dict.keys())

	if not common_files:
	print("No common files between reference and hypothesis.")
	return

	refs = [normalize_text(ref_dict[f]) for f in sorted(common_files)]
	hyps = [normalize_text(hyp_dict[f]) for f in sorted(common_files)]

	cer_score = cer(refs, hyps)
	wer_score = wer(refs, hyps)
	print(f"CER: {cer_score:.3%}")
	print(f"WER: {wer_score:.3%}")
	print(f"Evaluated on {len(common_files)} files.")


	if __name__ == "__main__":
	parser = argparse.ArgumentParser()
	parser.add_argument(
	"--reference", type=str, required=True, help="Path to reference JSONL"
	)
	parser.add_argument(
	"--hypothesis", type=str, required=True, help="Path to hypothesis JSONL"
	)
	args = parser.parse_args()
	main(args)