Yoad commited on
Commit
2f5cf2f
·
1 Parent(s): c66d9f1

First commit with actual logic

Browse files
.dockerignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ .venv
2
+ .streamlit
3
+ .git
4
+ .gitignore
5
+ sample_inputs/
.gitignore ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .venv
2
+ .streamlit
3
+
4
+ # python
5
+ __pycache__
6
+ *.pyc
7
+ *.pyo
8
+ *.pyd
9
+ *.so
10
+ *.egg-info
11
+ dist
12
+ build
13
+ eggs
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.11.9
Dockerfile CHANGED
@@ -1,6 +1,4 @@
1
- FROM python:3.9-slim
2
-
3
- WORKDIR /app
4
 
5
  RUN apt-get update && apt-get install -y \
6
  build-essential \
@@ -9,13 +7,39 @@ RUN apt-get update && apt-get install -y \
9
  git \
10
  && rm -rf /var/lib/apt/lists/*
11
 
12
- COPY requirements.txt ./
13
- COPY src/ ./src/
 
 
 
 
 
 
 
 
14
 
15
- RUN pip3 install -r requirements.txt
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  EXPOSE 8501
18
 
19
  HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
20
 
21
- ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
 
 
 
 
 
 
1
+ FROM ghcr.io/astral-sh/uv:python3.11-bookworm-slim
 
 
2
 
3
  RUN apt-get update && apt-get install -y \
4
  build-essential \
 
7
  git \
8
  && rm -rf /var/lib/apt/lists/*
9
 
10
+ RUN useradd -m -u 1000 user
11
+
12
+ USER user
13
+
14
+ ENV HOME=/home/user \
15
+ PATH=/home/user/.local/bin:$PATH
16
+
17
+ WORKDIR $HOME/app
18
+
19
+ ENV UV_COMPILE_BYTECODE=1
20
 
21
+ # Install the project's dependencies using the lockfile and settings
22
+ RUN --mount=type=cache,target=/root/.cache/uv \
23
+ --mount=type=bind,source=uv.lock,target=uv.lock \
24
+ --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
25
+ uv sync --frozen --no-install-project --no-dev
26
+
27
+ ADD . $HOME/app
28
+
29
+ # Use uv sync to resolve and install dependencies
30
+ RUN --mount=type=cache,target=/root/.cache/uv \
31
+ uv sync --frozen --no-dev
32
+
33
+ # Place executables in the environment at the front of the path
34
+ ENV PATH="$HOME/app/.venv/bin:$PATH"
35
 
36
  EXPOSE 8501
37
 
38
  HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
39
 
40
+ # Reset the entrypoint, don't invoke `uv`
41
+ ENTRYPOINT []
42
+
43
+ ENV PYTHONPATH="$HOME/app/src:$PYTHONPATH"
44
+
45
+ CMD ["uv", "run", "streamlit", "run", "src/app.py", "--server.port=8501", "--server.address=0.0.0.0"]
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  title: Visualize Eval Results
3
- emoji: 🚀
4
  colorFrom: red
5
  colorTo: red
6
  sdk: docker
@@ -8,12 +8,21 @@ app_port: 8501
8
  tags:
9
  - streamlit
10
  pinned: false
11
- short_description: Visualize ivrit.ai ASE eval results
12
  ---
13
 
14
- # Welcome to Streamlit!
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
17
 
18
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
19
- forums](https://discuss.streamlit.io).
 
1
  ---
2
  title: Visualize Eval Results
3
+ emoji: 🔍
4
  colorFrom: red
5
  colorTo: red
6
  sdk: docker
 
8
  tags:
9
  - streamlit
10
  pinned: false
11
+ short_description: Visualize ivrit.ai ASR eval results
12
  ---
13
 
14
+ # Hi There 👋
15
+
16
+ Load ivrit.ai ASR eval results CSV file to visualize the results.
17
+ Known Datasets will also allow loading the Audio directly from the HF Hub.
18
+ Supported known datasets are:
19
+
20
+ | Dataset Repo ID + split + reference text feature name | Dataset Config | CSV Output Name |
21
+ | --------------- | -------------- | ----------- |
22
+ | ivrit-ai/eval-d1:test:text | None | ivrit_ai_eval_d1 |
23
+ | upai-inc/saspeech:test:text | None | saspeech |
24
+ | google/fleurs:test:transcription | he_il | fleurs |
25
+ | mozilla-foundation/common_voice_17_0:test:sentence | he | common_voice_17 |
26
+ | imvladikon/hebrew_speech_kan:validation:sentence | None | hebrew_speech_kan |
27
 
 
28
 
 
 
pyproject.toml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "vis-asr-eval-results"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ requires-python = ">=3.11.9"
7
+ dependencies = [
8
+ "hebrew>=0.8.1",
9
+ "huggingface-hub>=0.30.2",
10
+ "jiwer>=3.1.0",
11
+ "pandas>=2.2.3",
12
+ "soundfile>=0.13.1",
13
+ "streamlit>=1.45.0",
14
+ "transformers>=4.51.3",
15
+ ]
requirements.txt DELETED
@@ -1,3 +0,0 @@
1
- altair
2
- pandas
3
- streamlit
 
 
 
 
src/app.py ADDED
@@ -0,0 +1,407 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ import pandas as pd
4
+ import jiwer
5
+ import requests
6
+ from datetime import datetime
7
+ from pathlib import Path
8
+ from st_fixed_container import st_fixed_container
9
+ from visual_eval.visualization import render_visualize_jiwer_result_html
10
+ from visual_eval.evaluator import HebrewTextNormalizer
11
+
12
+ HF_API_TOKEN = None
13
+ try:
14
+ HF_API_TOKEN = st.secrets["HF_API_TOKEN"]
15
+ except FileNotFoundError:
16
+ HF_API_TOKEN = os.environ.get("HF_API_TOKEN")
17
+ has_api_token = HF_API_TOKEN is not None
18
+
19
+ known_datasets = [
20
+ ("ivrit-ai/eval-d1:test:text", None, "ivrit_ai_eval_d1"),
21
+ ("upai-inc/saspeech:test:text", None, "saspeech"),
22
+ ("google/fleurs:test:transcription", "he_il", "fleurs"),
23
+ ("mozilla-foundation/common_voice_17_0:test:sentence", "he", "common_voice_17"),
24
+ ("imvladikon/hebrew_speech_kan:validation:sentence", None, "hebrew_speech_kan"),
25
+ ]
26
+
27
+ # Initialize session state for audio cache if it doesn't exist
28
+ if "audio_cache" not in st.session_state:
29
+ st.session_state.audio_cache = {}
30
+
31
+ if "audio_preview_active" not in st.session_state:
32
+ st.session_state.audio_preview_active = {}
33
+
34
+
35
+ def on_file_upload():
36
+ st.session_state.audio_cache = {}
37
+ st.session_state.audio_preview_active = {}
38
+ st.session_state.selected_entry_idx = 0
39
+
40
+
41
+ def display_rtl(html):
42
+ """Render an RTL container with the provided HTML string"""
43
+ st.markdown(
44
+ f"""
45
+ <div dir="rtl" lang="he">
46
+ {html}
47
+ </div>
48
+ """,
49
+ unsafe_allow_html=True,
50
+ )
51
+
52
+
53
+ @st.cache_data
54
+ def calculate_final_metrics(uploaded_file, _df):
55
+ """Calculate final metrics for all entries
56
+
57
+ Args:
58
+ uploaded_file: The uploaded file object (For cache hash gen)
59
+ _df: The dataframe containing the evaluation results (not included in cache hash)
60
+
61
+ Returns:
62
+ A dictionary containing the final metrics
63
+ """
64
+ _df = _df.sort_values(by=["id"])
65
+ _df["reference_text"] = _df["reference_text"].fillna("")
66
+ _df["predicted_text"] = _df["predicted_text"].fillna("")
67
+
68
+ # convert to list of dicts
69
+ entries_data = _df.to_dict(orient="records")
70
+
71
+ htn = HebrewTextNormalizer()
72
+
73
+ # Calculate final metrics
74
+ results = jiwer.process_words(
75
+ [htn(entry["reference_text"]) for entry in entries_data],
76
+ [htn(entry["predicted_text"]) for entry in entries_data],
77
+ )
78
+
79
+ return results
80
+
81
+
82
+ def get_known_dataset_by_output_name(output_name):
83
+ for dataset in known_datasets:
84
+ if dataset[2] == output_name:
85
+ return dataset
86
+ return None
87
+
88
+
89
+ def get_dataset_entries_audio_urls(dataset, offset=0, max_entries=100):
90
+ if dataset is None or not has_api_token:
91
+ return None
92
+
93
+ dataset_repo_id, dataset_config, _ = dataset
94
+ if not dataset_config:
95
+ dataset_config = "default"
96
+ if ":" in dataset_repo_id:
97
+ dataset_repo_id, split, _ = dataset_repo_id.split(":")
98
+ else:
99
+ split = "test"
100
+
101
+ headers = {"Authorization": f"Bearer {HF_API_TOKEN}"}
102
+ api_query_params = {
103
+ "dataset": dataset_repo_id,
104
+ "config": dataset_config,
105
+ "split": split,
106
+ "offset": offset,
107
+ "length": max_entries,
108
+ }
109
+
110
+ query_params_str = "&".join([f"{k}={v}" for k, v in api_query_params.items()])
111
+ API_URL = f"https://datasets-server.huggingface.co/rows?{query_params_str}"
112
+
113
+ def query():
114
+ response = requests.get(API_URL, headers=headers)
115
+ return response.json()
116
+
117
+ data = query()
118
+
119
+ def get_audio_url(row):
120
+ audio_feature_list = row["row"]["audio"]
121
+ first_audio = audio_feature_list[0]
122
+ return first_audio["src"]
123
+
124
+ if "rows" in data and len(data["rows"]) > 0:
125
+ return [get_audio_url(row) for row in data["rows"]]
126
+ else:
127
+ return None
128
+
129
+
130
+ def get_audio_url_for_entry(
131
+ dataset, entry_idx, cache_neighbors=True, neighbor_range=20
132
+ ):
133
+ """
134
+ Get audio URL for a specific entry and optionally cache neighbors
135
+
136
+ Args:
137
+ dataset: Dataset tuple (repo_id, config, output_name)
138
+ entry_idx: Index of the entry to get audio URL for
139
+ cache_neighbors: Whether to cache audio URLs for neighboring entries
140
+ neighbor_range: Range of neighboring entries to cache
141
+
142
+ Returns:
143
+ Audio URL for the specified entry
144
+ """
145
+ # Calculate the range of entries to load
146
+ if cache_neighbors:
147
+ start_idx = max(0, entry_idx - neighbor_range)
148
+ max_entries = neighbor_range * 2 + 1
149
+ else:
150
+ start_idx = entry_idx
151
+ max_entries = 1
152
+
153
+ # Get audio URLs for the range of entries
154
+ audio_urls = get_dataset_entries_audio_urls(dataset, start_idx, max_entries)
155
+
156
+ if not audio_urls:
157
+ return None
158
+
159
+ # Cache the audio URLs
160
+ for i, url in enumerate(audio_urls):
161
+ idx = start_idx + i
162
+ # Extract expiration time from URL if available
163
+ expires = None
164
+ if "expires=" in url:
165
+ try:
166
+ expires_param = url.split("expires=")[1].split("&")[0]
167
+ expires = datetime.fromtimestamp(int(expires_param))
168
+ except (ValueError, IndexError):
169
+ expires = None
170
+
171
+ st.session_state.audio_cache[idx] = {"url": url, "expires": expires}
172
+
173
+ # Return the URL for the requested entry
174
+ relative_idx = entry_idx - start_idx
175
+ if 0 <= relative_idx < len(audio_urls):
176
+ return audio_urls[relative_idx]
177
+ return None
178
+
179
+
180
+ def get_cached_audio_url(entry_idx):
181
+ """
182
+ Get audio URL from cache if available and not expired
183
+
184
+ Args:
185
+ entry_idx: Index of the entry to get audio URL for
186
+
187
+ Returns:
188
+ Audio URL if available in cache and not expired, None otherwise
189
+ """
190
+ if entry_idx not in st.session_state.audio_cache:
191
+ return None
192
+
193
+ cache_entry = st.session_state.audio_cache[entry_idx]
194
+
195
+ # Check if the URL is expired
196
+ if cache_entry["expires"] and datetime.now() > cache_entry["expires"]:
197
+ return None
198
+
199
+ return cache_entry["url"]
200
+
201
+
202
+ def main():
203
+ st.set_page_config(
204
+ page_title="ASR Evaluation Visualizer", page_icon="🎤", layout="wide"
205
+ )
206
+
207
+ if not has_api_token:
208
+ st.warning("No Hugging Face API token found. Audio previews will not work.")
209
+
210
+ st.title("ASR Evaluation Visualizer")
211
+
212
+ # File uploader
213
+ uploaded_file = st.file_uploader(
214
+ "Upload evaluation results CSV", type=["csv"], on_change=on_file_upload
215
+ )
216
+
217
+ if uploaded_file is not None:
218
+ # Load the data
219
+ try:
220
+ eval_results = pd.read_csv(uploaded_file)
221
+ st.success("File uploaded successfully!")
222
+
223
+ with st.sidebar:
224
+ # Toggle for calculating total metrics
225
+ show_total_metrics = st.toggle("Show total metrics", value=False)
226
+
227
+ if show_total_metrics:
228
+ total_metrics = calculate_final_metrics(uploaded_file, eval_results)
229
+
230
+ # Display total metrics in a nice format
231
+ with st.container():
232
+ st.metric("WER", f"{total_metrics.wer * 100:.4f}%")
233
+ st.table(
234
+ {
235
+ "Hits": total_metrics.hits,
236
+ "Subs": total_metrics.substitutions,
237
+ "Dels": total_metrics.deletions,
238
+ "Insrt": total_metrics.insertions,
239
+ }
240
+ )
241
+
242
+ # Create sidebar for entry selection
243
+ st.sidebar.header("Select Entry")
244
+
245
+ # Add Next/Prev buttons at the top of the sidebar
246
+ col1, col2 = st.sidebar.columns(2)
247
+
248
+ # Define navigation functions
249
+ def go_prev():
250
+ if st.session_state.selected_entry_idx > 0:
251
+ st.session_state.selected_entry_idx -= 1
252
+
253
+ def go_next():
254
+ if st.session_state.selected_entry_idx < len(eval_results) - 1:
255
+ st.session_state.selected_entry_idx += 1
256
+
257
+ # Add navigation buttons
258
+ col1.button("← Prev", on_click=go_prev, use_container_width=True)
259
+ col2.button("Next →", on_click=go_next, use_container_width=True)
260
+
261
+ # Create a data table with entries and their WER
262
+ entries_data = []
263
+ for i in range(len(eval_results)):
264
+ wer_value = eval_results.iloc[i].get("wer", 0)
265
+ # Format WER as percentage
266
+ wer_formatted = (
267
+ f"{wer_value*100:.2f}%"
268
+ if isinstance(wer_value, (int, float))
269
+ else wer_value
270
+ )
271
+ entries_data.append({"Entry": f"Entry #{i+1}", "WER": wer_formatted})
272
+
273
+ # Create a selection mechanism using radio buttons that look like a table
274
+ st.sidebar.write("Select an entry:")
275
+
276
+ # Use a container for better styling
277
+ entry_container = st.sidebar.container()
278
+
279
+ # Create a radio button for each entry, styled to look like a table row
280
+ entry_container.radio(
281
+ "Select an entry",
282
+ options=list(range(len(eval_results))),
283
+ format_func=lambda i: f"Entry #{i+1} ({entries_data[i]['WER']})",
284
+ label_visibility="collapsed",
285
+ key="selected_entry_idx",
286
+ )
287
+
288
+ # Use the selected entry
289
+ selected_entry = st.session_state.selected_entry_idx
290
+
291
+ # Toggle for normalized vs raw text
292
+ use_normalized = st.sidebar.toggle("Use normalized text", value=True)
293
+
294
+ # Get the text columns based on the toggle
295
+ if use_normalized:
296
+ ref_col, hyp_col = "norm_reference_text", "norm_predicted_text"
297
+ else:
298
+ ref_col, hyp_col = "reference_text", "predicted_text"
299
+
300
+ # Get the reference and hypothesis texts
301
+ ref, hyp = eval_results.iloc[selected_entry][[ref_col, hyp_col]].values
302
+
303
+ st.header("Visualization")
304
+
305
+ # Check if the CSV file is from a known dataset
306
+ dataset_name = None
307
+
308
+ # If no dataset column, try to infer from filename
309
+ if uploaded_file is not None:
310
+ filename_stem = Path(uploaded_file.name).stem
311
+ dataset_name = filename_stem
312
+
313
+ if not dataset_name and "dataset" in eval_results.columns:
314
+ dataset_name = eval_results.iloc[selected_entry]["dataset"]
315
+
316
+ # Get the known dataset if available
317
+ known_dataset = get_known_dataset_by_output_name(dataset_name)
318
+
319
+ # Display audio preview button if from a known dataset
320
+ if known_dataset:
321
+ # Check if we have the audio URL in cache
322
+ audio_url = get_cached_audio_url(selected_entry)
323
+
324
+ audio_preview_active = st.session_state.audio_preview_active.get(
325
+ selected_entry, False
326
+ )
327
+
328
+ preview_audio = False
329
+ if not audio_preview_active:
330
+ # Create a button to preview audio
331
+ preview_audio = st.button("Preview Audio", key="preview_audio")
332
+
333
+ if preview_audio or audio_url:
334
+ st.session_state.audio_preview_active[selected_entry] = True
335
+ with st_fixed_container(
336
+ mode="sticky", position="top", border=True, margin=0
337
+ ):
338
+ # If button clicked or we already have the URL, get/use the audio URL
339
+ if not audio_url:
340
+ with st.spinner("Loading audio..."):
341
+ audio_url = get_audio_url_for_entry(
342
+ known_dataset, selected_entry
343
+ )
344
+
345
+ # Display the audio player in the sticky container at the top
346
+ if audio_url:
347
+ st.audio(audio_url)
348
+ else:
349
+ st.error("Failed to load audio for this entry.")
350
+
351
+ # Display the visualization
352
+ html = render_visualize_jiwer_result_html(ref, hyp)
353
+ display_rtl(html)
354
+
355
+ # Display metadata
356
+ st.header("Metadata")
357
+ metadata_cols = [
358
+ "metadata_uuid",
359
+ "model",
360
+ "dataset",
361
+ "dataset_split",
362
+ "engine",
363
+ ]
364
+ metadata = eval_results.iloc[selected_entry][metadata_cols]
365
+
366
+ # Create a DataFrame for better display
367
+ metadata_df = pd.DataFrame(
368
+ {"Field": metadata_cols, "Value": metadata.values}
369
+ )
370
+ st.table(metadata_df)
371
+
372
+ # If we have audio URL, display it in the sticky container
373
+ if "audio_url" in locals() and audio_url:
374
+ pass # CSS is now applied globally
375
+
376
+ except Exception as e:
377
+ st.error(f"Error processing file: {str(e)}")
378
+ else:
379
+ st.info(
380
+ "Please upload an evaluation results CSV file to visualize the results."
381
+ )
382
+ st.markdown(
383
+ """
384
+ ### Expected CSV Format
385
+ The CSV should have the following columns:
386
+ - id
387
+ - reference_text
388
+ - predicted_text
389
+ - norm_reference_text
390
+ - norm_predicted_text
391
+ - wer
392
+ - wil
393
+ - substitutions
394
+ - deletions
395
+ - insertions
396
+ - hits
397
+ - metadata_uuid
398
+ - model
399
+ - dataset
400
+ - dataset_split
401
+ - engine
402
+ """
403
+ )
404
+
405
+
406
+ if __name__ == "__main__":
407
+ main()
src/sample_inputs/eval_results.csv ADDED
@@ -0,0 +1 @@
 
 
1
+ id,reference_text,predicted_text,norm_reference_text,norm_predicted_text,wer,wil,substitutions,deletions,insertions,hits,metadata_uuid,model,dataset,dataset_split,engine
src/sample_inputs/ivrit_ai_eval_d1.csv ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ id,reference_text,predicted_text,norm_reference_text,norm_predicted_text,wer,wil,substitutions,deletions,insertions,hits,metadata_uuid,model,dataset,dataset_split,engine
2
+ 1,"אני אוהב לתכנת בפייתון","אני אוהב לתכנת בפיתון","אני אוהב לתכנת בפייתון","אני אוהב לתכנת בפיתון",0.2,0.2,1,0,0,4,12345,whisper-large-v3,ivrit-ai/eval-d1,test,whisper
3
+ 2,"שלום עולם","שלום עולם","שלום עולם","שלום עולם",0.0,0.0,0,0,0,2,67890,whisper-large-v3,ivrit-ai/eval-d1,test,whisper
4
+ 3,"ברוכים הבאים לישראל","ברוכים הבאים לישראל","ברוכים הבאים לישראל","ברוכים הבאים לישראל",0.0,0.0,0,0,0,3,13579,whisper-large-v3,ivrit-ai/eval-d1,test,whisper
5
+ 4,"תל אביב היא עיר יפה","תל אביב היא עיר יפה מאוד","תל אביב היא עיר יפה","תל אביב היא עיר יפה מאוד",0.2,0.2,0,0,1,5,24680,whisper-large-v3,ivrit-ai/eval-d1,test,whisper
6
+ 5,"אני גר בירושלים","אני גר בירושלים","אני גר בירושלים","אני גר בירושלים",0.0,0.0,0,0,0,3,97531,whisper-large-v3,ivrit-ai/eval-d1,test,whisper
src/st_fixed_container.py ADDED
@@ -0,0 +1,212 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Literal
2
+
3
+ import streamlit as st
4
+ from streamlit.components.v1 import html
5
+
6
+ """
7
+ st_fixed_container consist of two parts - fixed container and opaque container.
8
+ Fixed container is a container that is fixed to the top or bottom of the screen.
9
+
10
+ When transparent is set to True, the container is typical `st.container`, which is transparent by default.
11
+ When transparent is set to False, the container is custom opaque_container, that updates its background color to match the background color of the app.
12
+
13
+ Opaque container is a helper class, but can be used to create more custom views. See main for examples.
14
+
15
+ """
16
+ OPAQUE_CONTAINER_CSS = """
17
+
18
+ :root {{
19
+ --background-color: #ffffff; /* Default background color */
20
+ }}
21
+
22
+
23
+ div[data-testid="stVerticalBlockBorderWrapper"]:has(div.opaque-container-{id}):not(:has(div.not-opaque-container)) div[data-testid="stVerticalBlock"]:has(div.opaque-container-{id}):not(:has(div.not-opaque-container)) > div[data-testid="stVerticalBlockBorderWrapper"] {{
24
+ background-color: var(--background-color);
25
+ width: 100%;
26
+ }}
27
+
28
+
29
+
30
+ div[data-testid="stVerticalBlockBorderWrapper"]:has(div.opaque-container-{id}):not(:has(div.not-opaque-container)) div[data-testid="stVerticalBlock"]:has(div.opaque-container-{id}):not(:has(div.not-opaque-container)) > div[data-testid="element-container"] {{
31
+ display: none;
32
+ }}
33
+
34
+
35
+ div[data-testid="stVerticalBlockBorderWrapper"]:has(div.not-opaque-container):not(:has(div[class^='opaque-container-'])) {{
36
+ display: none;
37
+ }}
38
+ """.strip()
39
+
40
+ OPAQUE_CONTAINER_JS = """
41
+ const root = parent.document.querySelector('.stApp');
42
+ let lastBackgroundColor = null;
43
+
44
+
45
+ function updateContainerBackground(currentBackground) {
46
+ parent.document.documentElement.style.setProperty('--background-color', currentBackground);
47
+ ;
48
+ }
49
+
50
+ function checkForBackgroundColorChange() {
51
+ const style = window.getComputedStyle(root);
52
+ const currentBackgroundColor = style.backgroundColor;
53
+ if (currentBackgroundColor !== lastBackgroundColor) {
54
+ lastBackgroundColor = currentBackgroundColor; // Update the last known value
55
+ updateContainerBackground(lastBackgroundColor);
56
+ }
57
+ }
58
+
59
+ const observerCallback = (mutationsList, observer) => {
60
+ for(let mutation of mutationsList) {
61
+ if (mutation.type === 'attributes' && (mutation.attributeName === 'class' || mutation.attributeName === 'style')) {
62
+ checkForBackgroundColorChange();
63
+ }
64
+ }
65
+ };
66
+
67
+ const main = () => {
68
+ checkForBackgroundColorChange();
69
+
70
+ const observer = new MutationObserver(observerCallback);
71
+ observer.observe(root, { attributes: true, childList: false, subtree: false });
72
+ }
73
+
74
+ // main();
75
+ document.addEventListener("DOMContentLoaded", main);
76
+ """.strip()
77
+
78
+
79
+ def st_opaque_container(
80
+ *,
81
+ height: int | None = None,
82
+ border: bool | None = None,
83
+ key: str | None = None,
84
+ ):
85
+ global opaque_counter
86
+
87
+ opaque_container = st.container()
88
+ non_opaque_container = st.container()
89
+ css = OPAQUE_CONTAINER_CSS.format(id=key)
90
+ with opaque_container:
91
+ html(f"<script>{OPAQUE_CONTAINER_JS}</script>", scrolling=False, height=0)
92
+ st.markdown(f"<style>{css}</style>", unsafe_allow_html=True)
93
+ st.markdown(
94
+ f"<div class='opaque-container-{key}'></div>",
95
+ unsafe_allow_html=True,
96
+ )
97
+ with non_opaque_container:
98
+ st.markdown(
99
+ f"<div class='not-opaque-container'></div>",
100
+ unsafe_allow_html=True,
101
+ )
102
+
103
+ return opaque_container.container(height=height, border=border)
104
+
105
+
106
+ FIXED_CONTAINER_CSS = """
107
+
108
+ div[data-testid="stVerticalBlockBorderWrapper"]:has(div.fixed-container-{id}):not(:has(div.not-fixed-container)){{
109
+ background-color: transparent;
110
+ position: {mode};
111
+ width: inherit;
112
+ background-color: inherit;
113
+ {position}: {margin};
114
+ z-index: 999;
115
+
116
+ }}
117
+
118
+ div[data-testid="stVerticalBlockBorderWrapper"]:has(div.fixed-container-{id}):not(:has(div.not-fixed-container)) div[data-testid="stVerticalBlock"]:has(div.fixed-container-{id}):not(:has(div.not-fixed-container)) > div[data-testid="element-container"] {{
119
+ display: none;
120
+ }}
121
+
122
+
123
+ div[data-testid="stVerticalBlockBorderWrapper"]:has(div.not-fixed-container):not(:has(div[class^='fixed-container-'])) {{
124
+ display: none;
125
+ }}
126
+ """.strip()
127
+
128
+ MARGINS = {
129
+ "top": "2.875rem",
130
+ "bottom": "0",
131
+ }
132
+
133
+
134
+ def st_fixed_container(
135
+ *,
136
+ height: int | None = None,
137
+ border: bool | None = None,
138
+ mode: Literal["fixed", "sticky"] = "fixed",
139
+ position: Literal["top", "bottom"] = "top",
140
+ margin: str | None = None,
141
+ transparent: bool = False,
142
+ key: str | None = None,
143
+ ):
144
+ if margin is None:
145
+ margin = MARGINS[position]
146
+ global fixed_counter
147
+ fixed_container = st.container()
148
+ non_fixed_container = st.container()
149
+ css = FIXED_CONTAINER_CSS.format(
150
+ mode=mode,
151
+ position=position,
152
+ margin=margin,
153
+ id=key,
154
+ )
155
+
156
+ def render_content():
157
+ with fixed_container:
158
+ if transparent:
159
+ return st.container(height=height, border=border)
160
+
161
+ return st_opaque_container(
162
+ height=height, border=border, key=f"opaque_{key}"
163
+ )
164
+
165
+ def render_non_content():
166
+ with fixed_container:
167
+ st.markdown(f"<style>{css}</style>", unsafe_allow_html=True)
168
+ st.markdown(
169
+ f"<div class='fixed-container-{key}'></div>",
170
+ unsafe_allow_html=True,
171
+ )
172
+ with non_fixed_container:
173
+ st.markdown(
174
+ f"<div class='not-fixed-container'></div>",
175
+ unsafe_allow_html=True,
176
+ )
177
+
178
+ result = None
179
+
180
+ if position == "top":
181
+ result = render_content()
182
+ render_non_content()
183
+ else:
184
+ render_non_content()
185
+ result = render_content()
186
+
187
+ return result
188
+
189
+
190
+ if __name__ == "__main__":
191
+ for i in range(30):
192
+ st.write(f"Line {i}")
193
+
194
+ # with st_fixed_container(mode="sticky", position="bottom", border=True):
195
+ # with st_fixed_container(mode="sticky", position="top", border=True):
196
+ # with st_fixed_container(mode="fixed", position="bottom", border=True):
197
+ with st_fixed_container(mode="fixed", position="top", border=True):
198
+ st.write("This is a fixed container.")
199
+ st.write("This is a fixed container.")
200
+ st.write("This is a fixed container.")
201
+
202
+ # The following code creates a small control panel on the right side of the screen with two buttons inside it:
203
+ with st_fixed_container(mode="fixed", position="bottom", transparent=True):
204
+ _, right = st.columns([0.7, 0.3])
205
+ with right:
206
+ with st_opaque_container(border=True):
207
+ st.button("Feedback", use_container_width=True)
208
+ st.button("Clean up", use_container_width=True)
209
+
210
+ st.container(border=True).write("This is a regular container.")
211
+ for i in range(30):
212
+ st.write(f"Line {i}")
src/streamlit_app.py DELETED
@@ -1,40 +0,0 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
4
- import streamlit as st
5
-
6
- """
7
- # Welcome to Streamlit!
8
-
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
12
-
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
15
-
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/visual_eval/__init__.py ADDED
File without changes
src/visual_eval/evaluator.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Evaluator module.
3
+ Provides functions to evaluate a given model on a dataset sample using the Faster Whisper model,
4
+ and generate HTML visualization blocks of the word alignment.
5
+ """
6
+
7
+ import concurrent.futures
8
+ import gc
9
+ import io
10
+ import queue
11
+ import threading
12
+ from typing import Dict, Generator, List
13
+
14
+ import soundfile as sf
15
+ from hebrew import Hebrew
16
+ from tqdm import tqdm
17
+ from transformers.models.whisper.english_normalizer import BasicTextNormalizer
18
+
19
+ from visual_eval.visualization import render_visualize_jiwer_result_html
20
+
21
+
22
+ class HebrewTextNormalizer(BasicTextNormalizer):
23
+ def __init__(self, *args, **kwargs):
24
+ super().__init__(*args, **kwargs)
25
+
26
+ superfluous_chars_to_remove = "\u061c" # Arabic letter mark
27
+ superfluous_chars_to_remove += (
28
+ "\u200b\u200c\u200d" # Zero-width space, non-joiner, joiner
29
+ )
30
+ superfluous_chars_to_remove += "\u200e\u200f" # LTR and RTL marks
31
+ superfluous_chars_to_remove += (
32
+ "\u202a\u202b\u202c\u202d\u202e" # LTR/RTL embedding, pop, override
33
+ )
34
+ superfluous_chars_to_remove += "\u2066\u2067\u2068\u2069" # Isolate controls
35
+ superfluous_chars_to_remove += "\ufeff" # Zero-width no-break space
36
+ self.superfluous_hebrew_unicode_symbols_translator = str.maketrans(
37
+ {ord(c): None for c in superfluous_chars_to_remove}
38
+ )
39
+
40
+ self.quotes_translator = str.maketrans({ord(c): None for c in "\"'"})
41
+
42
+ def __remove_niqqud(self, text: str) -> str:
43
+ return Hebrew(text).no_niqqud().string
44
+
45
+ def __remove_superfluous_hebrew_unicode_symbols(self, text: str) -> str:
46
+ return text.translate(self.superfluous_hebrew_unicode_symbols_translator)
47
+
48
+ def __remove_quotes(self, text: str) -> str:
49
+ return text.translate(self.quotes_translator)
50
+
51
+ def __call__(self, text):
52
+ text = self.__remove_niqqud(text)
53
+ text = self.__remove_superfluous_hebrew_unicode_symbols(text)
54
+ text = self.__remove_quotes(text)
55
+ text = super().__call__(text)
56
+ return text
src/visual_eval/visualization.py ADDED
@@ -0,0 +1,279 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Visualization module.
3
+ Provides functions to render HTML visualizations of word alignment between reference and hypothesis texts,
4
+ and to generate the complete results HTML page with an embedded audio element and progress status.
5
+ """
6
+
7
+ from itertools import zip_longest
8
+ from jiwer import process_words
9
+ import hashlib
10
+
11
+ def render_visualize_jiwer_result_html(ref: str, hyp: str, title: str = "", model_id: str = None) -> str:
12
+ """
13
+ Generate an HTML visualization of the alignment between reference and hypothesis texts.
14
+
15
+ Args:
16
+ ref: The reference text.
17
+ hyp: The hypothesis (transcribed) text.
18
+ title: A title for the evaluation block (e.g., model name).
19
+ model_id: A unique identifier for the model (used in word IDs).
20
+
21
+ Returns:
22
+ An HTML string visualizing word-level alignments and error metrics.
23
+ """
24
+ # Use the title as model_id if none provided
25
+ if model_id is None:
26
+ model_id = hashlib.md5(title.encode()).hexdigest()[:8]
27
+
28
+ # Process word alignment via jiwer
29
+ word_output = process_words(ref, hyp)
30
+ alignment_chunks = word_output.alignments[0]
31
+
32
+ columns = []
33
+ ref_position = 0 # This tracks the position in the reference text
34
+
35
+ for chunk in alignment_chunks:
36
+ if chunk.type == "equal":
37
+ words = word_output.references[0][chunk.ref_start_idx : chunk.ref_end_idx]
38
+ for word in words:
39
+ ref_cell = f'<span class="word-item ref-word" data-ref-pos="{ref_position}" data-ref-word="{word}">{word}</span>'
40
+ hyp_cell = f'<span class="word-item hyp-word" data-ref-pos="{ref_position}" data-ref-word="{word}">{word}</span>'
41
+ columns.append((ref_cell, hyp_cell, ref_position))
42
+ ref_position += 1
43
+
44
+ elif chunk.type == "delete":
45
+ words = word_output.references[0][chunk.ref_start_idx : chunk.ref_end_idx]
46
+ for word in words:
47
+ ref_cell = f'<span class="word-item ref-word" data-ref-pos="{ref_position}" data-ref-word="{word}">{word}</span>'
48
+ hyp_cell = '<span style="background-color: #ffb3d7; padding: 0 4px;">&nbsp;</span>'
49
+ columns.append((ref_cell, hyp_cell, ref_position))
50
+ ref_position += 1
51
+
52
+ elif chunk.type == "insert":
53
+ words = word_output.hypotheses[0][chunk.hyp_start_idx : chunk.hyp_end_idx]
54
+ # For inserted words, they are linked to the previous reference position
55
+ # If we're at the beginning, use position 0
56
+ last_ref_pos = max(0, ref_position - 1) if ref_position > 0 else 0
57
+ for word in words:
58
+ ref_cell = '<span>&nbsp;</span>'
59
+ hyp_cell = f'<span class="word-item hyp-word" data-ref-pos="{last_ref_pos}" data-inserted="true" style="background-color: #99f7c8; padding: 0 4px;">{word}</span>'
60
+ columns.append((ref_cell, hyp_cell, last_ref_pos))
61
+ # Note: ref_position is NOT incremented for inserts
62
+
63
+ elif chunk.type == "substitute":
64
+ ref_words = word_output.references[0][chunk.ref_start_idx : chunk.ref_end_idx]
65
+ hyp_words = word_output.hypotheses[0][chunk.hyp_start_idx : chunk.hyp_end_idx]
66
+
67
+ for ref_word, hyp_word in zip_longest(ref_words, hyp_words, fillvalue=""):
68
+ if ref_word: # Only increment position for actual reference words
69
+ ref_cell = f'<span class="word-item ref-word" data-ref-pos="{ref_position}" data-ref-word="{ref_word}" style="background-color: #dddddd;">{ref_word}</span>'
70
+ if hyp_word:
71
+ hyp_cell = f'<span class="word-item hyp-word" data-ref-pos="{ref_position}" data-subst="true" style="background-color: #ffc04d; padding: 0 4px;">{hyp_word}</span>'
72
+ else:
73
+ hyp_cell = '<span style="background-color: #ffb3d7; padding: 0 4px;">&nbsp;</span>'
74
+ columns.append((ref_cell, hyp_cell, ref_position))
75
+ ref_position += 1
76
+ elif hyp_word: # Extra hypothesis words with no reference pair
77
+ # Link to previous reference position
78
+ last_ref_pos = max(0, ref_position - 1)
79
+ ref_cell = '<span>&nbsp;</span>'
80
+ hyp_cell = f'<span class="word-item hyp-word" data-ref-pos="{last_ref_pos}" data-inserted="true" style="background-color: #99f7c8; padding: 0 4px;">{hyp_word}</span>'
81
+ columns.append((ref_cell, hyp_cell, last_ref_pos))
82
+
83
+ # Create HTML visualization
84
+ html_blocks = []
85
+ metrics_results_str = f"WER: {word_output.wer * 100:0.04f}%, WIL: {word_output.wil * 100:0.04f}%"
86
+ summary_operations_str = f"Subs: {word_output.substitutions}, Dels: {word_output.deletions}, Insrt: {word_output.insertions}"
87
+
88
+ html_blocks.append(
89
+ f"<div dir='ltr' class='model-result' data-model-id='{model_id}' style='font-size: 1.25em; margin-bottom: 10px; display: flex; justify-content: space-between; gap: 1.5em;'>"
90
+ f"<div style='flex: 0 0 content;'>{metrics_results_str}</div>"
91
+ f"<div>{title}</div>"
92
+ f"<div style='flex: 0 0 content;'>{summary_operations_str}</div></div>"
93
+ )
94
+
95
+ flex_container = f'<div class="word-alignment-container" data-model-id="{model_id}" style="display: flex; flex-wrap: wrap; margin-bottom: 10px;">'
96
+ for ref_cell, hyp_cell, ref_pos in columns:
97
+ cell_html = (
98
+ f'<div class="word-pair" data-ref-pos="{ref_pos}" style="display: flex; flex-direction: column; align-items: center; border-bottom: 1px solid grey; '
99
+ 'padding-left: 1em; font-family: monospace;">'
100
+ f'<div style="text-align: center;">{ref_cell}</div>'
101
+ f'<div style="text-align: center;">{hyp_cell}</div>'
102
+ '</div>'
103
+ )
104
+ flex_container += cell_html
105
+ flex_container += '</div>'
106
+ html_blocks.append(flex_container)
107
+
108
+ html_string = f'<div class="model-block" data-model-id="{model_id}" style="background: white; color: black; margin-bottom: 20px;">' + "\n".join(html_blocks) + '</div>'
109
+
110
+ return html_string
111
+
112
+ def generate_results_html(dataset_description: str, html_blocks: list, audio_file: str, timestamp: str, progress: tuple = None) -> str:
113
+ """
114
+ Generate the complete HTML results page including an audio player, all evaluation blocks, and progress status.
115
+
116
+ Args:
117
+ dataset_description: A string describing the dataset.
118
+ html_blocks: A list of HTML strings (one per model evaluation).
119
+ audio_file: The filename of the saved audio sample.
120
+ timestamp: The timestamp string used in titles.
121
+ progress: A tuple (done, total) indicating the number of models evaluated so far.
122
+
123
+ Returns:
124
+ A complete HTML document as a string.
125
+ """
126
+ progress_html = ""
127
+ auto_scroll_to_bottom_on_load = ""
128
+ if progress:
129
+ done, total = progress
130
+ progress_html = f"<div style='margin-bottom:20px;'><strong>Progress:</strong> {done} of {total} models evaluated.</div>"
131
+ if done < total:
132
+ auto_scroll_to_bottom_on_load = """
133
+ <script type="text/javascript">
134
+ document.getElementById('results-container').scrollTop = document.getElementById('results-container').scrollHeight;
135
+ </script>
136
+ """
137
+
138
+ refresh_page_control = """
139
+ <button onclick="location.reload();">Refresh Page</button>
140
+ """
141
+ audio_element = f"""
142
+ <div style="margin-bottom: 20px;">
143
+ <audio controls>
144
+ <source src="{audio_file}" type="audio/mp3">
145
+ Your browser does not support the audio element.
146
+ </audio>
147
+ </div>
148
+ """
149
+
150
+ # Add JavaScript for reference-based word highlighting with sticky functionality
151
+ highlighting_js = """
152
+ <script type="text/javascript">
153
+ document.addEventListener('DOMContentLoaded', function() {
154
+ // Track the currently selected reference position
155
+ let selectedRefPos = null;
156
+
157
+ // Helper function to apply highlighting
158
+ function highlightPosition(refPos, isSticky = false) {
159
+ // Apply highlighting style
160
+ const highlightStyle = 'underline';
161
+
162
+ // Highlight all elements with the matching reference position
163
+ document.querySelectorAll(`.word-item[data-ref-pos="${refPos}"]`).forEach(el => {
164
+ el.style.textDecoration = highlightStyle;
165
+ el.style.textDecorationThickness = '2px';
166
+ el.style.textDecorationColor = isSticky ? 'red' : 'blue';
167
+ });
168
+ }
169
+
170
+ // Helper function to remove highlighting
171
+ function removeHighlighting(refPos) {
172
+ // Don't remove highlighting if this is the selected position
173
+ if (refPos === selectedRefPos) return;
174
+
175
+ document.querySelectorAll(`.word-item[data-ref-pos="${refPos}"]`).forEach(el => {
176
+ el.style.textDecoration = 'none';
177
+ });
178
+ }
179
+
180
+ // Helper function to clear all sticky highlighting
181
+ function clearStickyHighlighting() {
182
+ if (selectedRefPos !== null) {
183
+ document.querySelectorAll(`.word-item[data-ref-pos="${selectedRefPos}"]`).forEach(el => {
184
+ el.style.textDecoration = 'none';
185
+ });
186
+
187
+ selectedRefPos = null;
188
+ }
189
+ }
190
+
191
+ // Use event delegation for all word-alignment-containers
192
+ document.querySelectorAll('.word-alignment-container').forEach(container => {
193
+ // Mouseover (replaces mouseenter on individual elements)
194
+ container.addEventListener('mouseover', function(event) {
195
+ const target = event.target.closest('.word-item');
196
+ if (!target) return;
197
+
198
+ const refPos = target.dataset.refPos;
199
+ if (!refPos) return;
200
+
201
+ highlightPosition(refPos, false);
202
+ });
203
+
204
+ // Mouseout (replaces mouseleave on individual elements)
205
+ container.addEventListener('mouseout', function(event) {
206
+ const target = event.target.closest('.word-item');
207
+ if (!target) return;
208
+
209
+ const refPos = target.dataset.refPos;
210
+ if (!refPos) return;
211
+
212
+ removeHighlighting(refPos);
213
+ });
214
+
215
+ // Click for sticky highlighting
216
+ container.addEventListener('click', function(event) {
217
+ const target = event.target.closest('.word-item');
218
+ if (!target) return;
219
+
220
+ const refPos = target.dataset.refPos;
221
+ if (!refPos) return;
222
+
223
+ // If this position is already selected, clear it
224
+ if (selectedRefPos === refPos) {
225
+ clearStickyHighlighting();
226
+ } else {
227
+ // Clear any existing sticky highlighting
228
+ clearStickyHighlighting();
229
+
230
+ // Set new selected position
231
+ selectedRefPos = refPos;
232
+
233
+ // Apply sticky highlighting
234
+ highlightPosition(refPos, true);
235
+ }
236
+ });
237
+ });
238
+
239
+ // Add a click handler on the document to clear sticky highlighting when clicking elsewhere
240
+ document.addEventListener('click', function(e) {
241
+ // If the click wasn't on a word item or word pair, clear sticky highlighting
242
+ if (!e.target.closest('.word-item') && !e.target.closest('.word-pair') && selectedRefPos !== null) {
243
+ clearStickyHighlighting();
244
+ }
245
+ });
246
+ });
247
+ </script>
248
+ """
249
+
250
+ # Add CSS for hover effects
251
+ highlighting_css = """
252
+ <style>
253
+ .word-item {
254
+ cursor: pointer;
255
+ transition: all 0.2s;
256
+ }
257
+ </style>
258
+ """
259
+
260
+ results_html = f"""
261
+ <html dir="rtl" lang="he">
262
+ <head>
263
+ <meta charset="utf-8">
264
+ <title>Evaluation Results - {dataset_description} - {timestamp}</title>
265
+ {highlighting_css}
266
+ </head>
267
+ <body>
268
+ <h3>Evaluation Results - {dataset_description} - {timestamp}</h3>
269
+ {progress_html}{refresh_page_control}
270
+ {audio_element}
271
+ <div id="results-container" style="max-height: 80vh; overflow-y: auto;">
272
+ {''.join(html_blocks)}
273
+ </div>
274
+ {highlighting_js}
275
+ {auto_scroll_to_bottom_on_load}
276
+ </body>
277
+ </html>
278
+ """
279
+ return results_html
uv.lock ADDED
The diff for this file is too large to render. See raw diff