Spaces:

Sean546
/

Saskatoon-Hospital-Monitor

Sleeping

App Files Files Community

Sean546 commited on Jul 17

Commit

39f6d09

verified ·

1 Parent(s): b65af9d

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +147 -38

src/streamlit_app.py CHANGED Viewed

@@ -1,40 +1,149 @@
-import altair as alt
-import numpy as np
-import pandas as pd
 import streamlit as st
-"""
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
-"""
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

 import streamlit as st
+import requests
+import fitz  # PyMuPDF
+import re
+import pandas as pd
+from datetime import datetime
+import os
+import time
+# --- Configuration ---
+PDF_URL = "https://www.ehealthsask.ca/reporting/Documents/SaskatoonHospitalBedCapacity.pdf"
+DATA_FILE = "hospital_data.csv"
+HOSPITALS = ["RUH", "SCH", "SPH", "JPCH"]
+# --- Main Functions ---
+def fetch_and_parse_pdf():
+    """Downloads the PDF, extracts text, and parses out the waiting counts."""
+    try:
+        # Download the PDF content
+        response = requests.get(PDF_URL, timeout=15)
+        response.raise_for_status()  # Raise an exception for bad status codes
+        pdf_bytes = response.content
+        # Extract text using PyMuPDF
+        doc = fitz.open(stream=pdf_bytes, filetype="pdf")
+        full_text = ""
+        for page in doc:
+            full_text += page.get_text()
+        doc.close()
+        # Find the data with Regular Expressions
+        # This pattern looks for the hospital code, the word "Emergency", and captures the number after "Waiting:"
+        data = []
+        for hospital in HOSPITALS:
+            # A more robust regex to handle variations in whitespace and text
+            pattern = re.compile(rf"{hospital}\s*Emergency.*?Waiting:\s*(\d+)", re.IGNORECASE | re.DOTALL)
+            match = pattern.search(full_text)
+            if match:
+                waiting_count = int(match.group(1))
+                data.append({
+                    "timestamp": datetime.now().isoformat(),
+                    "hospital": hospital,
+                    "waiting": waiting_count
+                })
+        return data
+    except requests.exceptions.RequestException as e:
+        st.error(f"Error downloading PDF: {e}")
+        return None
+    except Exception as e:
+        st.error(f"An error occurred during PDF parsing: {e}")
+        return None
+def save_data(new_data):
+    """Appends new data to the CSV file."""
+    if not new_data:
+        return
+    try:
+        # Check if the file exists
+        if os.path.exists(DATA_FILE):
+            df = pd.read_csv(DATA_FILE)
+        else:
+            df = pd.DataFrame(columns=["timestamp", "hospital", "waiting"])
+        # Append new data
+        new_df = pd.DataFrame(new_data)
+        df = pd.concat([df, new_df], ignore_index=True)
+        # Keep only the last ~1000 entries to prevent the file from getting too large
+        df = df.tail(1000)
+        df.to_csv(DATA_FILE, index=False)
+    except Exception as e:
+        st.error(f"Could not save data: {e}")
+def load_latest_data():
+    """Loads the most recent entry for each hospital from the CSV."""
+    if not os.path.exists(DATA_FILE):
+        return None
+    try:
+        df = pd.read_csv(DATA_FILE)
+        # Sort by timestamp and get the last entry for each hospital
+        latest_data = df.sort_values('timestamp').groupby('hospital').tail(1)
+        return latest_data
+    except Exception as e:
+        st.error(f"Could not load data file: {e}")
+        return None
+# --- Streamlit UI ---
+# Set page configuration
+st.set_page_config(page_title="Saskatoon ED Monitor", layout="wide")
+# Title
+st.title("Saskatoon Emergency Department Monitor")
+# Check if we need to run the update
+# This part is triggered by the scheduled GitHub Action
+query_params = st.experimental_get_query_params()
+if query_params.get("update", ["false"])[0] == "true":
+    with st.spinner("Running scheduled data update..."):
+        st.info("Fetching new data from eHealth PDF...")
+        latest_data = fetch_and_parse_pdf()
+        if latest_data:
+            save_data(latest_data)
+            st.success("Data updated successfully!")
+            # Clear the query param by re-running the script without it
+            time.sleep(2) # Give user time to see the message
+            st.experimental_set_query_params()
+            st.experimental_rerun()
+# Display the latest data
+latest_df = load_latest_data()
+if latest_df is not None and not latest_df.empty:
+    last_update_time = pd.to_datetime(latest_df['timestamp'].max())
+    st.markdown(f"**Last successful update:** `{last_update_time.strftime('%Y-%m-%d %I:%M %p')}`")
+    cols = st.columns(len(HOSPITALS))
+    for i, hospital in enumerate(HOSPITALS):
+        with cols[i]:
+            hospital_data = latest_df[latest_df['hospital'] == hospital]
+            if not hospital_data.empty:
+                waiting_count = hospital_data['waiting'].iloc[0]
+                st.metric(label=f"**{hospital}**", value=f"{waiting_count} Waiting")
+            else:
+                st.metric(label=f"**{hospital}**", value="No Data")
+else:
+    st.warning("No data available yet. The first update may be running. This page will refresh automatically.")
+# Auto-refresh the page every 15 minutes to show fresh data
+# Note: Streamlit doesn't have a perfect background scheduler. This is a client-side trick.
+# The GitHub Action is the reliable way to *fetch* data. This just refreshes the view.
+time.sleep(900) # 15 minutes * 60 seconds
+st.experimental_rerun()