Spaces:

yoad
/

visualize_eval_results

Running

App Files Files Community

Yoad commited on 3 days ago

Commit

2f5cf2f

1 Parent(s): c66d9f1

First commit with actual logic

Browse files

Files changed (16) hide show

.dockerignore +5 -0
.gitignore +13 -0
.python-version +1 -0
Dockerfile +31 -7
README.md +15 -6
pyproject.toml +15 -0
requirements.txt +0 -3
src/app.py +407 -0
src/sample_inputs/eval_results.csv +1 -0
src/sample_inputs/ivrit_ai_eval_d1.csv +6 -0
src/st_fixed_container.py +212 -0
src/streamlit_app.py +0 -40
src/visual_eval/__init__.py +0 -0
src/visual_eval/evaluator.py +56 -0
src/visual_eval/visualization.py +279 -0
uv.lock +0 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,5 @@

+.venv
+.streamlit
+.git
+.gitignore
+sample_inputs/

.gitignore ADDED Viewed

	@@ -0,0 +1,13 @@

+.venv
+.streamlit
+# python
+__pycache__
+*.pyc
+*.pyo
+*.pyd
+*.so
+*.egg-info
+dist
+build
+eggs

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.11.9

Dockerfile CHANGED Viewed

@@ -1,6 +1,4 @@
-FROM python:3.9-slim
-WORKDIR /app
 RUN apt-get update && apt-get install -y \
     build-essential \
@@ -9,13 +7,39 @@ RUN apt-get update && apt-get install -y \
     git \
     && rm -rf /var/lib/apt/lists/*
-COPY requirements.txt ./
-COPY src/ ./src/
-RUN pip3 install -r requirements.txt
 EXPOSE 8501
 HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
-ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]

+FROM ghcr.io/astral-sh/uv:python3.11-bookworm-slim
 RUN apt-get update && apt-get install -y \
     build-essential \
     git \
     && rm -rf /var/lib/apt/lists/*
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+	PATH=/home/user/.local/bin:$PATH
+WORKDIR $HOME/app
+ENV UV_COMPILE_BYTECODE=1
+# Install the project's dependencies using the lockfile and settings
+RUN --mount=type=cache,target=/root/.cache/uv \
+    --mount=type=bind,source=uv.lock,target=uv.lock \
+    --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
+    uv sync --frozen --no-install-project --no-dev
+ADD . $HOME/app
+# Use uv sync to resolve and install dependencies
+RUN --mount=type=cache,target=/root/.cache/uv \
+    uv sync --frozen --no-dev
+# Place executables in the environment at the front of the path
+ENV PATH="$HOME/app/.venv/bin:$PATH"
 EXPOSE 8501
 HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
+# Reset the entrypoint, don't invoke `uv`
+ENTRYPOINT []
+ENV PYTHONPATH="$HOME/app/src:$PYTHONPATH"
+CMD ["uv", "run", "streamlit", "run", "src/app.py", "--server.port=8501", "--server.address=0.0.0.0"]

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 title: Visualize Eval Results
-emoji: 🚀
 colorFrom: red
 colorTo: red
 sdk: docker
@@ -8,12 +8,21 @@ app_port: 8501
 tags:
 - streamlit
 pinned: false
-short_description: Visualize ivrit.ai ASE eval results
 ---
-# Welcome to Streamlit!
-Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).

 ---
 title: Visualize Eval Results
+emoji: 🔍
 colorFrom: red
 colorTo: red
 sdk: docker
 tags:
 - streamlit
 pinned: false
+short_description: Visualize ivrit.ai ASR eval results
 ---
+# Hi There 👋
+Load ivrit.ai ASR eval results CSV file to visualize the results.
+Known Datasets will also allow loading the Audio directly from the HF Hub.
+Supported known datasets are:
+| Dataset Repo ID + split + reference text feature name | Dataset Config | CSV Output Name |
+| --------------- | -------------- | ----------- |
+| ivrit-ai/eval-d1:test:text | None | ivrit_ai_eval_d1 |
+| upai-inc/saspeech:test:text | None | saspeech |
+| google/fleurs:test:transcription | he_il | fleurs |
+| mozilla-foundation/common_voice_17_0:test:sentence | he | common_voice_17 |
+| imvladikon/hebrew_speech_kan:validation:sentence | None | hebrew_speech_kan |

pyproject.toml ADDED Viewed

	@@ -0,0 +1,15 @@

+[project]
+name = "vis-asr-eval-results"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.11.9"
+dependencies = [
+    "hebrew>=0.8.1",
+    "huggingface-hub>=0.30.2",
+    "jiwer>=3.1.0",
+    "pandas>=2.2.3",
+    "soundfile>=0.13.1",
+    "streamlit>=1.45.0",
+    "transformers>=4.51.3",
+]

requirements.txt DELETED Viewed

@@ -1,3 +0,0 @@
-altair
-pandas
-streamlit

src/app.py ADDED Viewed

	@@ -0,0 +1,407 @@

+import os
+import streamlit as st
+import pandas as pd
+import jiwer
+import requests
+from datetime import datetime
+from pathlib import Path
+from st_fixed_container import st_fixed_container
+from visual_eval.visualization import render_visualize_jiwer_result_html
+from visual_eval.evaluator import HebrewTextNormalizer
+HF_API_TOKEN = None
+try:
+    HF_API_TOKEN = st.secrets["HF_API_TOKEN"]
+except FileNotFoundError:
+    HF_API_TOKEN = os.environ.get("HF_API_TOKEN")
+has_api_token = HF_API_TOKEN is not None
+known_datasets = [
+    ("ivrit-ai/eval-d1:test:text", None, "ivrit_ai_eval_d1"),
+    ("upai-inc/saspeech:test:text", None, "saspeech"),
+    ("google/fleurs:test:transcription", "he_il", "fleurs"),
+    ("mozilla-foundation/common_voice_17_0:test:sentence", "he", "common_voice_17"),
+    ("imvladikon/hebrew_speech_kan:validation:sentence", None, "hebrew_speech_kan"),
+]
+# Initialize session state for audio cache if it doesn't exist
+if "audio_cache" not in st.session_state:
+    st.session_state.audio_cache = {}
+if "audio_preview_active" not in st.session_state:
+    st.session_state.audio_preview_active = {}
+def on_file_upload():
+    st.session_state.audio_cache = {}
+    st.session_state.audio_preview_active = {}
+    st.session_state.selected_entry_idx = 0
+def display_rtl(html):
+    """Render an RTL container with the provided HTML string"""
+    st.markdown(
+        f"""
+    <div dir="rtl" lang="he">
+        {html}
+    </div>
+    """,
+        unsafe_allow_html=True,
+    )
+@st.cache_data
+def calculate_final_metrics(uploaded_file, _df):
+    """Calculate final metrics for all entries
+    Args:
+        uploaded_file: The uploaded file object (For cache hash gen)
+        _df: The dataframe containing the evaluation results (not included in cache hash)
+        Returns:
+            A dictionary containing the final metrics
+    """
+    _df = _df.sort_values(by=["id"])
+    _df["reference_text"] = _df["reference_text"].fillna("")
+    _df["predicted_text"] = _df["predicted_text"].fillna("")
+    # convert to list of dicts
+    entries_data = _df.to_dict(orient="records")
+    htn = HebrewTextNormalizer()
+    # Calculate final metrics
+    results = jiwer.process_words(
+        [htn(entry["reference_text"]) for entry in entries_data],
+        [htn(entry["predicted_text"]) for entry in entries_data],
+    )
+    return results
+def get_known_dataset_by_output_name(output_name):
+    for dataset in known_datasets:
+        if dataset[2] == output_name:
+            return dataset
+    return None
+def get_dataset_entries_audio_urls(dataset, offset=0, max_entries=100):
+    if dataset is None or not has_api_token:
+        return None
+    dataset_repo_id, dataset_config, _ = dataset
+    if not dataset_config:
+        dataset_config = "default"
+    if ":" in dataset_repo_id:
+        dataset_repo_id, split, _ = dataset_repo_id.split(":")
+    else:
+        split = "test"
+    headers = {"Authorization": f"Bearer {HF_API_TOKEN}"}
+    api_query_params = {
+        "dataset": dataset_repo_id,
+        "config": dataset_config,
+        "split": split,
+        "offset": offset,
+        "length": max_entries,
+    }
+    query_params_str = "&".join([f"{k}={v}" for k, v in api_query_params.items()])
+    API_URL = f"https://datasets-server.huggingface.co/rows?{query_params_str}"
+    def query():
+        response = requests.get(API_URL, headers=headers)
+        return response.json()
+    data = query()
+    def get_audio_url(row):
+        audio_feature_list = row["row"]["audio"]
+        first_audio = audio_feature_list[0]
+        return first_audio["src"]
+    if "rows" in data and len(data["rows"]) > 0:
+        return [get_audio_url(row) for row in data["rows"]]
+    else:
+        return None
+def get_audio_url_for_entry(
+    dataset, entry_idx, cache_neighbors=True, neighbor_range=20
+):
+    """
+    Get audio URL for a specific entry and optionally cache neighbors
+    Args:
+        dataset: Dataset tuple (repo_id, config, output_name)
+        entry_idx: Index of the entry to get audio URL for
+        cache_neighbors: Whether to cache audio URLs for neighboring entries
+        neighbor_range: Range of neighboring entries to cache
+    Returns:
+        Audio URL for the specified entry
+    """
+    # Calculate the range of entries to load
+    if cache_neighbors:
+        start_idx = max(0, entry_idx - neighbor_range)
+        max_entries = neighbor_range * 2 + 1
+    else:
+        start_idx = entry_idx
+        max_entries = 1
+    # Get audio URLs for the range of entries
+    audio_urls = get_dataset_entries_audio_urls(dataset, start_idx, max_entries)
+    if not audio_urls:
+        return None
+    # Cache the audio URLs
+    for i, url in enumerate(audio_urls):
+        idx = start_idx + i
+        # Extract expiration time from URL if available
+        expires = None
+        if "expires=" in url:
+            try:
+                expires_param = url.split("expires=")[1].split("&")[0]
+                expires = datetime.fromtimestamp(int(expires_param))
+            except (ValueError, IndexError):
+                expires = None
+        st.session_state.audio_cache[idx] = {"url": url, "expires": expires}
+    # Return the URL for the requested entry
+    relative_idx = entry_idx - start_idx
+    if 0 <= relative_idx < len(audio_urls):
+        return audio_urls[relative_idx]
+    return None
+def get_cached_audio_url(entry_idx):
+    """
+    Get audio URL from cache if available and not expired
+    Args:
+        entry_idx: Index of the entry to get audio URL for
+    Returns:
+        Audio URL if available in cache and not expired, None otherwise
+    """
+    if entry_idx not in st.session_state.audio_cache:
+        return None
+    cache_entry = st.session_state.audio_cache[entry_idx]
+    # Check if the URL is expired
+    if cache_entry["expires"] and datetime.now() > cache_entry["expires"]:
+        return None
+    return cache_entry["url"]
+def main():
+    st.set_page_config(
+        page_title="ASR Evaluation Visualizer", page_icon="🎤", layout="wide"
+    )
+    if not has_api_token:
+        st.warning("No Hugging Face API token found. Audio previews will not work.")
+    st.title("ASR Evaluation Visualizer")
+    # File uploader
+    uploaded_file = st.file_uploader(
+        "Upload evaluation results CSV", type=["csv"], on_change=on_file_upload
+    )
+    if uploaded_file is not None:
+        # Load the data
+        try:
+            eval_results = pd.read_csv(uploaded_file)
+            st.success("File uploaded successfully!")
+            with st.sidebar:
+                # Toggle for calculating total metrics
+                show_total_metrics = st.toggle("Show total metrics", value=False)
+                if show_total_metrics:
+                    total_metrics = calculate_final_metrics(uploaded_file, eval_results)
+                    # Display total metrics in a nice format
+                    with st.container():
+                        st.metric("WER", f"{total_metrics.wer * 100:.4f}%")
+                        st.table(
+                            {
+                                "Hits": total_metrics.hits,
+                                "Subs": total_metrics.substitutions,
+                                "Dels": total_metrics.deletions,
+                                "Insrt": total_metrics.insertions,
+                            }
+                        )
+            # Create sidebar for entry selection
+            st.sidebar.header("Select Entry")
+            # Add Next/Prev buttons at the top of the sidebar
+            col1, col2 = st.sidebar.columns(2)
+            # Define navigation functions
+            def go_prev():
+                if st.session_state.selected_entry_idx > 0:
+                    st.session_state.selected_entry_idx -= 1
+            def go_next():
+                if st.session_state.selected_entry_idx < len(eval_results) - 1:
+                    st.session_state.selected_entry_idx += 1
+            # Add navigation buttons
+            col1.button("← Prev", on_click=go_prev, use_container_width=True)
+            col2.button("Next →", on_click=go_next, use_container_width=True)
+            # Create a data table with entries and their WER
+            entries_data = []
+            for i in range(len(eval_results)):
+                wer_value = eval_results.iloc[i].get("wer", 0)
+                # Format WER as percentage
+                wer_formatted = (
+                    f"{wer_value*100:.2f}%"
+                    if isinstance(wer_value, (int, float))
+                    else wer_value
+                )
+                entries_data.append({"Entry": f"Entry #{i+1}", "WER": wer_formatted})
+            # Create a selection mechanism using radio buttons that look like a table
+            st.sidebar.write("Select an entry:")
+            # Use a container for better styling
+            entry_container = st.sidebar.container()
+            # Create a radio button for each entry, styled to look like a table row
+            entry_container.radio(
+                "Select an entry",
+                options=list(range(len(eval_results))),
+                format_func=lambda i: f"Entry #{i+1} ({entries_data[i]['WER']})",
+                label_visibility="collapsed",
+                key="selected_entry_idx",
+            )
+            # Use the selected entry
+            selected_entry = st.session_state.selected_entry_idx
+            # Toggle for normalized vs raw text
+            use_normalized = st.sidebar.toggle("Use normalized text", value=True)
+            # Get the text columns based on the toggle
+            if use_normalized:
+                ref_col, hyp_col = "norm_reference_text", "norm_predicted_text"
+            else:
+                ref_col, hyp_col = "reference_text", "predicted_text"
+            # Get the reference and hypothesis texts
+            ref, hyp = eval_results.iloc[selected_entry][[ref_col, hyp_col]].values
+            st.header("Visualization")
+            # Check if the CSV file is from a known dataset
+            dataset_name = None
+            # If no dataset column, try to infer from filename
+            if uploaded_file is not None:
+                filename_stem = Path(uploaded_file.name).stem
+                dataset_name = filename_stem
+            if not dataset_name and "dataset" in eval_results.columns:
+                dataset_name = eval_results.iloc[selected_entry]["dataset"]
+            # Get the known dataset if available
+            known_dataset = get_known_dataset_by_output_name(dataset_name)
+            # Display audio preview button if from a known dataset
+            if known_dataset:
+                # Check if we have the audio URL in cache
+                audio_url = get_cached_audio_url(selected_entry)
+                audio_preview_active = st.session_state.audio_preview_active.get(
+                    selected_entry, False
+                )
+                preview_audio = False
+                if not audio_preview_active:
+                    # Create a button to preview audio
+                    preview_audio = st.button("Preview Audio", key="preview_audio")
+                if preview_audio or audio_url:
+                    st.session_state.audio_preview_active[selected_entry] = True
+                    with st_fixed_container(
+                        mode="sticky", position="top", border=True, margin=0
+                    ):
+                        # If button clicked or we already have the URL, get/use the audio URL
+                        if not audio_url:
+                            with st.spinner("Loading audio..."):
+                                audio_url = get_audio_url_for_entry(
+                                    known_dataset, selected_entry
+                                )
+                        # Display the audio player in the sticky container at the top
+                        if audio_url:
+                            st.audio(audio_url)
+                        else:
+                            st.error("Failed to load audio for this entry.")
+            # Display the visualization
+            html = render_visualize_jiwer_result_html(ref, hyp)
+            display_rtl(html)
+            # Display metadata
+            st.header("Metadata")
+            metadata_cols = [
+                "metadata_uuid",
+                "model",
+                "dataset",
+                "dataset_split",
+                "engine",
+            ]
+            metadata = eval_results.iloc[selected_entry][metadata_cols]
+            # Create a DataFrame for better display
+            metadata_df = pd.DataFrame(
+                {"Field": metadata_cols, "Value": metadata.values}
+            )
+            st.table(metadata_df)
+            # If we have audio URL, display it in the sticky container
+            if "audio_url" in locals() and audio_url:
+                pass  # CSS is now applied globally
+        except Exception as e:
+            st.error(f"Error processing file: {str(e)}")
+    else:
+        st.info(
+            "Please upload an evaluation results CSV file to visualize the results."
+        )
+        st.markdown(
+            """
+        ### Expected CSV Format
+        The CSV should have the following columns:
+        - id
+        - reference_text
+        - predicted_text
+        - norm_reference_text
+        - norm_predicted_text
+        - wer
+        - wil
+        - substitutions
+        - deletions
+        - insertions
+        - hits
+        - metadata_uuid
+        - model
+        - dataset
+        - dataset_split
+        - engine
+        """
+        )
+if __name__ == "__main__":
+    main()

src/sample_inputs/eval_results.csv ADDED Viewed

	@@ -0,0 +1 @@


1	+ id,reference_text,predicted_text,norm_reference_text,norm_predicted_text,wer,wil,substitutions,deletions,insertions,hits,metadata_uuid,model,dataset,dataset_split,engine

src/sample_inputs/ivrit_ai_eval_d1.csv ADDED Viewed

	@@ -0,0 +1,6 @@

+id,reference_text,predicted_text,norm_reference_text,norm_predicted_text,wer,wil,substitutions,deletions,insertions,hits,metadata_uuid,model,dataset,dataset_split,engine
+1,"אני אוהב לתכנת בפייתון","אני אוהב לתכנת בפיתון","אני אוהב לתכנת בפייתון","אני אוהב לתכנת בפיתון",0.2,0.2,1,0,0,4,12345,whisper-large-v3,ivrit-ai/eval-d1,test,whisper
+2,"שלום עולם","שלום עולם","שלום עולם","שלום עולם",0.0,0.0,0,0,0,2,67890,whisper-large-v3,ivrit-ai/eval-d1,test,whisper
+3,"ברוכים הבאים לישראל","ברוכים הבאים לישראל","ברוכים הבאים לישראל","ברוכים הבאים לישראל",0.0,0.0,0,0,0,3,13579,whisper-large-v3,ivrit-ai/eval-d1,test,whisper
+4,"תל אביב היא עיר יפה","תל אביב היא עיר יפה מאוד","תל אביב היא עיר יפה","תל אביב היא עיר יפה מאוד",0.2,0.2,0,0,1,5,24680,whisper-large-v3,ivrit-ai/eval-d1,test,whisper
+5,"אני גר בירושלים","אני גר בירושלים","אני גר בירושלים","אני גר בירושלים",0.0,0.0,0,0,0,3,97531,whisper-large-v3,ivrit-ai/eval-d1,test,whisper

src/st_fixed_container.py ADDED Viewed

	@@ -0,0 +1,212 @@

+from typing import Literal
+import streamlit as st
+from streamlit.components.v1 import html
+"""
+st_fixed_container consist of two parts - fixed container and opaque container.
+Fixed container is a container that is fixed to the top or bottom of the screen.
+When transparent is set to True, the container is typical `st.container`, which is transparent by default.
+When transparent is set to False, the container is custom opaque_container, that updates its background color to match the background color of the app.
+Opaque container is a helper class, but can be used to create more custom views. See main for examples.
+"""
+OPAQUE_CONTAINER_CSS = """
+:root {{
+    --background-color: #ffffff; /* Default background color */
+}}
+div[data-testid="stVerticalBlockBorderWrapper"]:has(div.opaque-container-{id}):not(:has(div.not-opaque-container)) div[data-testid="stVerticalBlock"]:has(div.opaque-container-{id}):not(:has(div.not-opaque-container)) > div[data-testid="stVerticalBlockBorderWrapper"] {{
+    background-color: var(--background-color);
+    width: 100%;
+}}
+div[data-testid="stVerticalBlockBorderWrapper"]:has(div.opaque-container-{id}):not(:has(div.not-opaque-container)) div[data-testid="stVerticalBlock"]:has(div.opaque-container-{id}):not(:has(div.not-opaque-container)) > div[data-testid="element-container"] {{
+    display: none;
+}}
+div[data-testid="stVerticalBlockBorderWrapper"]:has(div.not-opaque-container):not(:has(div[class^='opaque-container-'])) {{
+    display: none;
+}}
+""".strip()
+OPAQUE_CONTAINER_JS = """
+const root = parent.document.querySelector('.stApp');
+let lastBackgroundColor = null;
+function updateContainerBackground(currentBackground) {
+    parent.document.documentElement.style.setProperty('--background-color', currentBackground);
+    ;
+}
+function checkForBackgroundColorChange() {
+    const style = window.getComputedStyle(root);
+    const currentBackgroundColor = style.backgroundColor;
+    if (currentBackgroundColor !== lastBackgroundColor) {
+        lastBackgroundColor = currentBackgroundColor; // Update the last known value
+        updateContainerBackground(lastBackgroundColor);
+    }
+}
+const observerCallback = (mutationsList, observer) => {
+    for(let mutation of mutationsList) {
+        if (mutation.type === 'attributes' && (mutation.attributeName === 'class' || mutation.attributeName === 'style')) {
+            checkForBackgroundColorChange();
+        }
+    }
+};
+const main = () => {
+    checkForBackgroundColorChange();
+    const observer = new MutationObserver(observerCallback);
+    observer.observe(root, { attributes: true, childList: false, subtree: false });
+}
+// main();
+document.addEventListener("DOMContentLoaded", main);
+""".strip()
+def st_opaque_container(
+    *,
+    height: int | None = None,
+    border: bool | None = None,
+    key: str | None = None,
+):
+    global opaque_counter
+    opaque_container = st.container()
+    non_opaque_container = st.container()
+    css = OPAQUE_CONTAINER_CSS.format(id=key)
+    with opaque_container:
+        html(f"<script>{OPAQUE_CONTAINER_JS}</script>", scrolling=False, height=0)
+        st.markdown(f"<style>{css}</style>", unsafe_allow_html=True)
+        st.markdown(
+            f"<div class='opaque-container-{key}'></div>",
+            unsafe_allow_html=True,
+        )
+    with non_opaque_container:
+        st.markdown(
+            f"<div class='not-opaque-container'></div>",
+            unsafe_allow_html=True,
+        )
+    return opaque_container.container(height=height, border=border)
+FIXED_CONTAINER_CSS = """
+div[data-testid="stVerticalBlockBorderWrapper"]:has(div.fixed-container-{id}):not(:has(div.not-fixed-container)){{
+    background-color: transparent;
+    position: {mode};
+    width: inherit;
+    background-color: inherit;
+    {position}: {margin};
+    z-index: 999;
+}}
+div[data-testid="stVerticalBlockBorderWrapper"]:has(div.fixed-container-{id}):not(:has(div.not-fixed-container)) div[data-testid="stVerticalBlock"]:has(div.fixed-container-{id}):not(:has(div.not-fixed-container)) > div[data-testid="element-container"] {{
+    display: none;
+}}
+div[data-testid="stVerticalBlockBorderWrapper"]:has(div.not-fixed-container):not(:has(div[class^='fixed-container-'])) {{
+    display: none;
+}}
+""".strip()
+MARGINS = {
+    "top": "2.875rem",
+    "bottom": "0",
+}
+def st_fixed_container(
+    *,
+    height: int | None = None,
+    border: bool | None = None,
+    mode: Literal["fixed", "sticky"] = "fixed",
+    position: Literal["top", "bottom"] = "top",
+    margin: str | None = None,
+    transparent: bool = False,
+    key: str | None = None,
+):
+    if margin is None:
+        margin = MARGINS[position]
+    global fixed_counter
+    fixed_container = st.container()
+    non_fixed_container = st.container()
+    css = FIXED_CONTAINER_CSS.format(
+        mode=mode,
+        position=position,
+        margin=margin,
+        id=key,
+    )
+    def render_content():
+        with fixed_container:
+            if transparent:
+                return st.container(height=height, border=border)
+            return st_opaque_container(
+                height=height, border=border, key=f"opaque_{key}"
+            )
+    def render_non_content():
+        with fixed_container:
+            st.markdown(f"<style>{css}</style>", unsafe_allow_html=True)
+            st.markdown(
+                f"<div class='fixed-container-{key}'></div>",
+                unsafe_allow_html=True,
+            )
+        with non_fixed_container:
+            st.markdown(
+                f"<div class='not-fixed-container'></div>",
+                unsafe_allow_html=True,
+            )
+    result = None
+    if position == "top":
+        result = render_content()
+        render_non_content()
+    else:
+        render_non_content()
+        result = render_content()
+    return result
+if __name__ == "__main__":
+    for i in range(30):
+        st.write(f"Line {i}")
+    # with st_fixed_container(mode="sticky", position="bottom", border=True):
+    # with st_fixed_container(mode="sticky", position="top", border=True):
+    # with st_fixed_container(mode="fixed", position="bottom", border=True):
+    with st_fixed_container(mode="fixed", position="top", border=True):
+        st.write("This is a fixed container.")
+        st.write("This is a fixed container.")
+        st.write("This is a fixed container.")
+    # The following code creates a small control panel on the right side of the screen with two buttons inside it:
+    with st_fixed_container(mode="fixed", position="bottom", transparent=True):
+        _, right = st.columns([0.7, 0.3])
+        with right:
+            with st_opaque_container(border=True):
+                st.button("Feedback", use_container_width=True)
+                st.button("Clean up", use_container_width=True)
+    st.container(border=True).write("This is a regular container.")
+    for i in range(30):
+        st.write(f"Line {i}")

src/streamlit_app.py DELETED Viewed

@@ -1,40 +0,0 @@
-import altair as alt
-import numpy as np
-import pandas as pd
-import streamlit as st
-"""
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
-"""
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

src/visual_eval/__init__.py ADDED Viewed

File without changes

src/visual_eval/evaluator.py ADDED Viewed

	@@ -0,0 +1,56 @@

+"""
+Evaluator module.
+Provides functions to evaluate a given model on a dataset sample using the Faster Whisper model,
+and generate HTML visualization blocks of the word alignment.
+"""
+import concurrent.futures
+import gc
+import io
+import queue
+import threading
+from typing import Dict, Generator, List
+import soundfile as sf
+from hebrew import Hebrew
+from tqdm import tqdm
+from transformers.models.whisper.english_normalizer import BasicTextNormalizer
+from visual_eval.visualization import render_visualize_jiwer_result_html
+class HebrewTextNormalizer(BasicTextNormalizer):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        superfluous_chars_to_remove = "\u061c"  # Arabic letter mark
+        superfluous_chars_to_remove += (
+            "\u200b\u200c\u200d"  # Zero-width space, non-joiner, joiner
+        )
+        superfluous_chars_to_remove += "\u200e\u200f"  # LTR and RTL marks
+        superfluous_chars_to_remove += (
+            "\u202a\u202b\u202c\u202d\u202e"  # LTR/RTL embedding, pop, override
+        )
+        superfluous_chars_to_remove += "\u2066\u2067\u2068\u2069"  # Isolate controls
+        superfluous_chars_to_remove += "\ufeff"  # Zero-width no-break space
+        self.superfluous_hebrew_unicode_symbols_translator = str.maketrans(
+            {ord(c): None for c in superfluous_chars_to_remove}
+        )
+        self.quotes_translator = str.maketrans({ord(c): None for c in "\"'"})
+    def __remove_niqqud(self, text: str) -> str:
+        return Hebrew(text).no_niqqud().string
+    def __remove_superfluous_hebrew_unicode_symbols(self, text: str) -> str:
+        return text.translate(self.superfluous_hebrew_unicode_symbols_translator)
+    def __remove_quotes(self, text: str) -> str:
+        return text.translate(self.quotes_translator)
+    def __call__(self, text):
+        text = self.__remove_niqqud(text)
+        text = self.__remove_superfluous_hebrew_unicode_symbols(text)
+        text = self.__remove_quotes(text)
+        text = super().__call__(text)
+        return text

src/visual_eval/visualization.py ADDED Viewed

	@@ -0,0 +1,279 @@

+"""
+Visualization module.
+Provides functions to render HTML visualizations of word alignment between reference and hypothesis texts,
+and to generate the complete results HTML page with an embedded audio element and progress status.
+"""
+from itertools import zip_longest
+from jiwer import process_words
+import hashlib
+def render_visualize_jiwer_result_html(ref: str, hyp: str, title: str = "", model_id: str = None) -> str:
+    """
+    Generate an HTML visualization of the alignment between reference and hypothesis texts.
+    Args:
+        ref: The reference text.
+        hyp: The hypothesis (transcribed) text.
+        title: A title for the evaluation block (e.g., model name).
+        model_id: A unique identifier for the model (used in word IDs).
+    Returns:
+        An HTML string visualizing word-level alignments and error metrics.
+    """
+    # Use the title as model_id if none provided
+    if model_id is None:
+        model_id = hashlib.md5(title.encode()).hexdigest()[:8]
+    # Process word alignment via jiwer
+    word_output = process_words(ref, hyp)
+    alignment_chunks = word_output.alignments[0]
+    columns = []
+    ref_position = 0  # This tracks the position in the reference text
+    for chunk in alignment_chunks:
+        if chunk.type == "equal":
+            words = word_output.references[0][chunk.ref_start_idx : chunk.ref_end_idx]
+            for word in words:
+                ref_cell = f'<span class="word-item ref-word" data-ref-pos="{ref_position}" data-ref-word="{word}">{word}</span>'
+                hyp_cell = f'<span class="word-item hyp-word" data-ref-pos="{ref_position}" data-ref-word="{word}">{word}</span>'
+                columns.append((ref_cell, hyp_cell, ref_position))
+                ref_position += 1
+        elif chunk.type == "delete":
+            words = word_output.references[0][chunk.ref_start_idx : chunk.ref_end_idx]
+            for word in words:
+                ref_cell = f'<span class="word-item ref-word" data-ref-pos="{ref_position}" data-ref-word="{word}">{word}</span>'
+                hyp_cell = '<span style="background-color: #ffb3d7; padding: 0 4px;">&nbsp;</span>'
+                columns.append((ref_cell, hyp_cell, ref_position))
+                ref_position += 1
+        elif chunk.type == "insert":
+            words = word_output.hypotheses[0][chunk.hyp_start_idx : chunk.hyp_end_idx]
+            # For inserted words, they are linked to the previous reference position
+            # If we're at the beginning, use position 0
+            last_ref_pos = max(0, ref_position - 1) if ref_position > 0 else 0
+            for word in words:
+                ref_cell = '<span>&nbsp;</span>'
+                hyp_cell = f'<span class="word-item hyp-word" data-ref-pos="{last_ref_pos}" data-inserted="true" style="background-color: #99f7c8; padding: 0 4px;">{word}</span>'
+                columns.append((ref_cell, hyp_cell, last_ref_pos))
+                # Note: ref_position is NOT incremented for inserts
+        elif chunk.type == "substitute":
+            ref_words = word_output.references[0][chunk.ref_start_idx : chunk.ref_end_idx]
+            hyp_words = word_output.hypotheses[0][chunk.hyp_start_idx : chunk.hyp_end_idx]
+            for ref_word, hyp_word in zip_longest(ref_words, hyp_words, fillvalue=""):
+                if ref_word:  # Only increment position for actual reference words
+                    ref_cell = f'<span class="word-item ref-word" data-ref-pos="{ref_position}" data-ref-word="{ref_word}" style="background-color: #dddddd;">{ref_word}</span>'
+                    if hyp_word:
+                        hyp_cell = f'<span class="word-item hyp-word" data-ref-pos="{ref_position}" data-subst="true" style="background-color: #ffc04d; padding: 0 4px;">{hyp_word}</span>'
+                    else:
+                        hyp_cell = '<span style="background-color: #ffb3d7; padding: 0 4px;">&nbsp;</span>'
+                    columns.append((ref_cell, hyp_cell, ref_position))
+                    ref_position += 1
+                elif hyp_word:  # Extra hypothesis words with no reference pair
+                    # Link to previous reference position
+                    last_ref_pos = max(0, ref_position - 1)
+                    ref_cell = '<span>&nbsp;</span>'
+                    hyp_cell = f'<span class="word-item hyp-word" data-ref-pos="{last_ref_pos}" data-inserted="true" style="background-color: #99f7c8; padding: 0 4px;">{hyp_word}</span>'
+                    columns.append((ref_cell, hyp_cell, last_ref_pos))
+    # Create HTML visualization
+    html_blocks = []
+    metrics_results_str = f"WER: {word_output.wer * 100:0.04f}%, WIL: {word_output.wil * 100:0.04f}%"
+    summary_operations_str = f"Subs: {word_output.substitutions}, Dels: {word_output.deletions}, Insrt: {word_output.insertions}"
+    html_blocks.append(
+        f"<div dir='ltr' class='model-result' data-model-id='{model_id}' style='font-size: 1.25em; margin-bottom: 10px; display: flex; justify-content: space-between; gap: 1.5em;'>"
+        f"<div style='flex: 0 0 content;'>{metrics_results_str}</div>"
+        f"<div>{title}</div>"
+        f"<div style='flex: 0 0 content;'>{summary_operations_str}</div></div>"
+    )
+    flex_container = f'<div class="word-alignment-container" data-model-id="{model_id}" style="display: flex; flex-wrap: wrap; margin-bottom: 10px;">'
+    for ref_cell, hyp_cell, ref_pos in columns:
+        cell_html = (
+            f'<div class="word-pair" data-ref-pos="{ref_pos}" style="display: flex; flex-direction: column; align-items: center; border-bottom: 1px solid grey; '
+            'padding-left: 1em; font-family: monospace;">'
+            f'<div style="text-align: center;">{ref_cell}</div>'
+            f'<div style="text-align: center;">{hyp_cell}</div>'
+            '</div>'
+        )
+        flex_container += cell_html
+    flex_container += '</div>'
+    html_blocks.append(flex_container)
+    html_string = f'<div class="model-block" data-model-id="{model_id}" style="background: white; color: black; margin-bottom: 20px;">' + "\n".join(html_blocks) + '</div>'
+    return html_string
+def generate_results_html(dataset_description: str, html_blocks: list, audio_file: str, timestamp: str, progress: tuple = None) -> str:
+    """
+    Generate the complete HTML results page including an audio player, all evaluation blocks, and progress status.
+    Args:
+        dataset_description: A string describing the dataset.
+        html_blocks: A list of HTML strings (one per model evaluation).
+        audio_file: The filename of the saved audio sample.
+        timestamp: The timestamp string used in titles.
+        progress: A tuple (done, total) indicating the number of models evaluated so far.
+    Returns:
+        A complete HTML document as a string.
+    """
+    progress_html = ""
+    auto_scroll_to_bottom_on_load = ""
+    if progress:
+        done, total = progress
+        progress_html = f"<div style='margin-bottom:20px;'><strong>Progress:</strong> {done} of {total} models evaluated.</div>"
+        if done < total:
+            auto_scroll_to_bottom_on_load = """
+            <script type="text/javascript">
+                document.getElementById('results-container').scrollTop = document.getElementById('results-container').scrollHeight;
+            </script>
+            """
+    refresh_page_control = """
+    <button onclick="location.reload();">Refresh Page</button>
+    """
+    audio_element = f"""
+    <div style="margin-bottom: 20px;">
+        <audio controls>
+            <source src="{audio_file}" type="audio/mp3">
+            Your browser does not support the audio element.
+        </audio>
+    </div>
+    """
+    # Add JavaScript for reference-based word highlighting with sticky functionality
+    highlighting_js = """
+    <script type="text/javascript">
+        document.addEventListener('DOMContentLoaded', function() {
+        // Track the currently selected reference position
+        let selectedRefPos = null;
+        // Helper function to apply highlighting
+        function highlightPosition(refPos, isSticky = false) {
+            // Apply highlighting style
+            const highlightStyle = 'underline';
+            // Highlight all elements with the matching reference position
+            document.querySelectorAll(`.word-item[data-ref-pos="${refPos}"]`).forEach(el => {
+                el.style.textDecoration = highlightStyle;
+                el.style.textDecorationThickness = '2px';
+                el.style.textDecorationColor = isSticky ? 'red' : 'blue';
+            });
+        }
+        // Helper function to remove highlighting
+        function removeHighlighting(refPos) {
+            // Don't remove highlighting if this is the selected position
+            if (refPos === selectedRefPos) return;
+            document.querySelectorAll(`.word-item[data-ref-pos="${refPos}"]`).forEach(el => {
+                el.style.textDecoration = 'none';
+            });
+        }
+        // Helper function to clear all sticky highlighting
+        function clearStickyHighlighting() {
+            if (selectedRefPos !== null) {
+                document.querySelectorAll(`.word-item[data-ref-pos="${selectedRefPos}"]`).forEach(el => {
+                    el.style.textDecoration = 'none';
+                });
+                selectedRefPos = null;
+            }
+        }
+        // Use event delegation for all word-alignment-containers
+        document.querySelectorAll('.word-alignment-container').forEach(container => {
+            // Mouseover (replaces mouseenter on individual elements)
+            container.addEventListener('mouseover', function(event) {
+                const target = event.target.closest('.word-item');
+                if (!target) return;
+                const refPos = target.dataset.refPos;
+                if (!refPos) return;
+                highlightPosition(refPos, false);
+            });
+            // Mouseout (replaces mouseleave on individual elements)
+            container.addEventListener('mouseout', function(event) {
+                const target = event.target.closest('.word-item');
+                if (!target) return;
+                const refPos = target.dataset.refPos;
+                if (!refPos) return;
+                removeHighlighting(refPos);
+            });
+            // Click for sticky highlighting
+            container.addEventListener('click', function(event) {
+                const target = event.target.closest('.word-item');
+                if (!target) return;
+                const refPos = target.dataset.refPos;
+                if (!refPos) return;
+                // If this position is already selected, clear it
+                if (selectedRefPos === refPos) {
+                    clearStickyHighlighting();
+                } else {
+                    // Clear any existing sticky highlighting
+                    clearStickyHighlighting();
+                    // Set new selected position
+                    selectedRefPos = refPos;
+                    // Apply sticky highlighting
+                    highlightPosition(refPos, true);
+                }
+            });
+        });
+        // Add a click handler on the document to clear sticky highlighting when clicking elsewhere
+        document.addEventListener('click', function(e) {
+            // If the click wasn't on a word item or word pair, clear sticky highlighting
+            if (!e.target.closest('.word-item') && !e.target.closest('.word-pair') && selectedRefPos !== null) {
+                clearStickyHighlighting();
+            }
+        });
+    });
+    </script>
+    """
+    # Add CSS for hover effects
+    highlighting_css = """
+    <style>
+        .word-item {
+            cursor: pointer;
+            transition: all 0.2s;
+        }
+    </style>
+    """
+    results_html = f"""
+    <html dir="rtl" lang="he">
+    <head>
+        <meta charset="utf-8">
+        <title>Evaluation Results - {dataset_description} - {timestamp}</title>
+        {highlighting_css}
+    </head>
+    <body>
+        <h3>Evaluation Results - {dataset_description} - {timestamp}</h3>
+        {progress_html}{refresh_page_control}
+        {audio_element}
+        <div id="results-container" style="max-height: 80vh; overflow-y: auto;">
+        {''.join(html_blocks)}
+        </div>
+        {highlighting_js}
+        {auto_scroll_to_bottom_on_load}
+    </body>
+    </html>
+    """
+    return results_html

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff