Sean546 commited on
Commit
39f6d09
·
verified ·
1 Parent(s): b65af9d

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +147 -38
src/streamlit_app.py CHANGED
@@ -1,40 +1,149 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
4
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- """
7
- # Welcome to Streamlit!
8
-
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
12
-
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
15
-
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
 
 
 
1
  import streamlit as st
2
+ import requests
3
+ import fitz # PyMuPDF
4
+ import re
5
+ import pandas as pd
6
+ from datetime import datetime
7
+ import os
8
+ import time
9
+
10
+ # --- Configuration ---
11
+ PDF_URL = "https://www.ehealthsask.ca/reporting/Documents/SaskatoonHospitalBedCapacity.pdf"
12
+ DATA_FILE = "hospital_data.csv"
13
+ HOSPITALS = ["RUH", "SCH", "SPH", "JPCH"]
14
+
15
+ # --- Main Functions ---
16
+
17
+ def fetch_and_parse_pdf():
18
+ """Downloads the PDF, extracts text, and parses out the waiting counts."""
19
+ try:
20
+ # Download the PDF content
21
+ response = requests.get(PDF_URL, timeout=15)
22
+ response.raise_for_status() # Raise an exception for bad status codes
23
+ pdf_bytes = response.content
24
+
25
+ # Extract text using PyMuPDF
26
+ doc = fitz.open(stream=pdf_bytes, filetype="pdf")
27
+ full_text = ""
28
+ for page in doc:
29
+ full_text += page.get_text()
30
+ doc.close()
31
+
32
+ # Find the data with Regular Expressions
33
+ # This pattern looks for the hospital code, the word "Emergency", and captures the number after "Waiting:"
34
+ data = []
35
+ for hospital in HOSPITALS:
36
+ # A more robust regex to handle variations in whitespace and text
37
+ pattern = re.compile(rf"{hospital}\s*Emergency.*?Waiting:\s*(\d+)", re.IGNORECASE | re.DOTALL)
38
+ match = pattern.search(full_text)
39
+
40
+ if match:
41
+ waiting_count = int(match.group(1))
42
+ data.append({
43
+ "timestamp": datetime.now().isoformat(),
44
+ "hospital": hospital,
45
+ "waiting": waiting_count
46
+ })
47
+
48
+ return data
49
+
50
+ except requests.exceptions.RequestException as e:
51
+ st.error(f"Error downloading PDF: {e}")
52
+ return None
53
+ except Exception as e:
54
+ st.error(f"An error occurred during PDF parsing: {e}")
55
+ return None
56
+
57
+
58
+ def save_data(new_data):
59
+ """Appends new data to the CSV file."""
60
+ if not new_data:
61
+ return
62
+
63
+ try:
64
+ # Check if the file exists
65
+ if os.path.exists(DATA_FILE):
66
+ df = pd.read_csv(DATA_FILE)
67
+ else:
68
+ df = pd.DataFrame(columns=["timestamp", "hospital", "waiting"])
69
+
70
+ # Append new data
71
+ new_df = pd.DataFrame(new_data)
72
+ df = pd.concat([df, new_df], ignore_index=True)
73
+
74
+ # Keep only the last ~1000 entries to prevent the file from getting too large
75
+ df = df.tail(1000)
76
+
77
+ df.to_csv(DATA_FILE, index=False)
78
+
79
+ except Exception as e:
80
+ st.error(f"Could not save data: {e}")
81
+
82
+
83
+ def load_latest_data():
84
+ """Loads the most recent entry for each hospital from the CSV."""
85
+ if not os.path.exists(DATA_FILE):
86
+ return None
87
+
88
+ try:
89
+ df = pd.read_csv(DATA_FILE)
90
+ # Sort by timestamp and get the last entry for each hospital
91
+ latest_data = df.sort_values('timestamp').groupby('hospital').tail(1)
92
+ return latest_data
93
+ except Exception as e:
94
+ st.error(f"Could not load data file: {e}")
95
+ return None
96
+
97
+ # --- Streamlit UI ---
98
+
99
+ # Set page configuration
100
+ st.set_page_config(page_title="Saskatoon ED Monitor", layout="wide")
101
+
102
+ # Title
103
+ st.title("Saskatoon Emergency Department Monitor")
104
+
105
+ # Check if we need to run the update
106
+ # This part is triggered by the scheduled GitHub Action
107
+ query_params = st.experimental_get_query_params()
108
+ if query_params.get("update", ["false"])[0] == "true":
109
+ with st.spinner("Running scheduled data update..."):
110
+ st.info("Fetching new data from eHealth PDF...")
111
+ latest_data = fetch_and_parse_pdf()
112
+ if latest_data:
113
+ save_data(latest_data)
114
+ st.success("Data updated successfully!")
115
+ # Clear the query param by re-running the script without it
116
+ time.sleep(2) # Give user time to see the message
117
+ st.experimental_set_query_params()
118
+ st.experimental_rerun()
119
+
120
+
121
+ # Display the latest data
122
+ latest_df = load_latest_data()
123
+
124
+ if latest_df is not None and not latest_df.empty:
125
+ last_update_time = pd.to_datetime(latest_df['timestamp'].max())
126
+
127
+ st.markdown(f"**Last successful update:** `{last_update_time.strftime('%Y-%m-%d %I:%M %p')}`")
128
+
129
+ cols = st.columns(len(HOSPITALS))
130
+
131
+ for i, hospital in enumerate(HOSPITALS):
132
+ with cols[i]:
133
+ hospital_data = latest_df[latest_df['hospital'] == hospital]
134
+ if not hospital_data.empty:
135
+ waiting_count = hospital_data['waiting'].iloc[0]
136
+ st.metric(label=f"**{hospital}**", value=f"{waiting_count} Waiting")
137
+ else:
138
+ st.metric(label=f"**{hospital}**", value="No Data")
139
+ else:
140
+ st.warning("No data available yet. The first update may be running. This page will refresh automatically.")
141
+
142
+
143
+ # Auto-refresh the page every 15 minutes to show fresh data
144
+ # Note: Streamlit doesn't have a perfect background scheduler. This is a client-side trick.
145
+ # The GitHub Action is the reliable way to *fetch* data. This just refreshes the view.
146
+ time.sleep(900) # 15 minutes * 60 seconds
147
+ st.experimental_rerun()
148
+
149