qgallouedec HF Staff commited on
Commit
b797456
·
verified ·
1 Parent(s): 9935ce4

Update start_app.py

Browse files
Files changed (1) hide show
  1. start_app.py +0 -252
start_app.py CHANGED
@@ -1,253 +1 @@
1
- import json
2
- import os
3
- import re
4
- import subprocess
5
- import time
6
- import yaml
7
 
8
- import gradio as gr
9
- import pandas as pd
10
- import requests
11
- from huggingface_hub import HfApi, get_token
12
-
13
-
14
- CMD = ["python" ,"run_job.py"]
15
- ARG_NAMES = ["<src>", "<dst>", "<query>", "[-c config]", "[-s split]", "[-p private]"]
16
- SPACE_ID = os.environ.get("SPACE_ID") or "lhoestq/run-duckdb-jobs"
17
-
18
- CONTENT = """
19
- ## Usage:
20
-
21
- ```bash
22
- curl -L 'https://huggingface.co/api/jobs/<username>' \
23
- -H 'Content-Type: application/json' \
24
- -H 'Authorization: Bearer <hf_token>' \
25
- -d '{{
26
- "spaceId": "{SPACE_ID}",
27
- "command": {CMD},
28
- "arguments": {ARG_NAMES},
29
- "environment": {{"HF_TOKEN": <hf_token>}},
30
- "flavor": "cpu-basic"
31
- }}'
32
- ```
33
-
34
- ## Example:
35
- """
36
-
37
- with open("README.md") as f:
38
- METADATA = yaml.safe_load(f.read().split("---\n")[1])
39
- TITLE = METADATA["title"]
40
- SHORT_DESCRIPTION = METADATA.get("short_description")
41
- EMOJI = METADATA["emoji"]
42
-
43
- try:
44
- process = subprocess.run(CMD + ["--help"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
45
- HELP = not process.returncode and (process.stdout or process.stderr).decode()
46
- except Exception:
47
- HELP = False
48
-
49
- DRY_RUN = bool(HELP) and bool(m :=re.search("--dry(-|_)run", HELP)) and m.group(0)
50
-
51
- def parse_log(line: str, pbars: dict[str, float] = None):
52
- if line.startswith("data: {"):
53
- data = json.loads(line[len("data: "):])
54
- data, timestamp = data["data"], data["timestamp"]
55
- if pbars is not None and data.startswith("===== Job started at"):
56
- pbars.pop("Starting ⚙️", None)
57
- pbars["Running 🏃"] = 0.0
58
- return f"[{timestamp}] {data}\n\n"
59
- elif pbars is not None and (percent_match := re.search("\\d+(?:\\.\\d+)?%", data)) and any(c in data.split("%")[1][:10] for c in "|█▌"):
60
- pbars.pop("Running 🏃", None)
61
- [pbars.pop(desc) for desc, percent in pbars.items() if percent == 1.]
62
- percent = float(percent_match.group(0)[:-1]) / 100
63
- desc = data[:percent_match.start()].strip() or "Progress"
64
- pbars[desc] = percent
65
- else:
66
- return f"[{timestamp}] {data}\n\n"
67
- return ""
68
-
69
- def dry_run(src, config, split, dst, query):
70
- if not all([src, dst, query]):
71
- raise gr.Error("Please fill source, destination and query.")
72
- args = ["--src", src] + (["--config", config] if config else []) + (["--split", split] if split else []) + [ "--dst", dst, "--query", query, DRY_RUN]
73
- cmd = CMD + args
74
- logs = "Job:\n\n```bash\n" + " ".join('"' + arg.replace('"', '\"""') + '"' if " " in arg else arg for arg in cmd) + "\n```\nOutput:\n\n"
75
- yield {output_markdown: logs, progress_labels: gr.Label(visible=False), details_accordion: gr.Accordion(open=True)}
76
- process = subprocess.Popen(cmd, stdout=subprocess.PIPE)
77
- for line in iter(process.stdout.readline, b""):
78
- logs += line.decode()
79
- yield {output_markdown: logs}
80
-
81
- def run(src, config, split, dst, query, oauth_token: gr.OAuthToken | None, profile: gr.OAuthProfile | None):
82
- if not all([src, dst, query]):
83
- raise gr.Error("Please fill source, destination and query.")
84
- if oauth_token and profile:
85
- token = oauth_token.token
86
- username = profile.username
87
- elif (token := get_token()):
88
- username = HfApi().whoami(token=token)["name"]
89
- else:
90
- raise gr.Error("Please log in to run the job.")
91
- args = ["--src", src] + (["--config", config] if config else []) + (["--split", split] if split else []) + [ "--dst", dst, "--query", query]
92
- cmd = CMD + args
93
- logs = "Job:\n\n```bash\n" + " ".join('"' + arg.replace('"', '\"""') + '"' if " " in arg else arg for arg in cmd) + "\n```\nOutput:\n\n"
94
- pbars = {}
95
- yield {output_markdown: logs, progress_labels: gr.Label(pbars, visible=bool(pbars))}
96
- resp = requests.post(
97
- f"https://huggingface.co/api/jobs/{username}",
98
- json={
99
- "spaceId": SPACE_ID,
100
- "arguments": args,
101
- "command": CMD,
102
- "environment": {"HF_TOKEN": token},
103
- "flavor": "cpu-basic"
104
- },
105
- headers={"Authorization": f"Bearer {token}"}
106
- )
107
- if resp.status_code != 200:
108
- logs += resp.text
109
- pbars = {"Finished with an error ❌": 1.0}
110
- else:
111
- job_id = resp.json()["metadata"]["job_id"]
112
- pbars = {"Starting ⚙️": 0.0}
113
- yield {output_markdown: logs, progress_labels: gr.Label(pbars, visible=bool(pbars))}
114
- resp = requests.get(
115
- f"https://huggingface.co/api/jobs/{username}/{job_id}/logs-stream",
116
- headers={"Authorization": f"Bearer {token}"},
117
- stream=True
118
- )
119
- for line in resp.iter_lines():
120
- logs += parse_log(line.decode("utf-8"), pbars=pbars)
121
- yield {output_markdown: logs, progress_labels: gr.Label(pbars, visible=bool(pbars))}
122
- job_status = {"status": {"stage": "RUNNING"}}
123
- while True:
124
- job_status = requests.get(
125
- f"https://huggingface.co/api/jobs/{username}/{job_id}",
126
- headers={"Authorization": f"Bearer {token}"}
127
- ).json()
128
- if job_status["status"]["stage"] == "RUNNING":
129
- time.sleep(1)
130
- else:
131
- break
132
- if job_status["status"]["stage"] == "COMPLETED":
133
- pbars = {"Finished ✅": 1.0}
134
- else:
135
- logs += f'{job_status["status"]["message"]} ({job_status["status"]["error"]})'
136
- pbars = {"Finished with an error ❌": 1.0}
137
- yield {output_markdown: logs, progress_labels: gr.Label(pbars, visible=bool(pbars))}
138
-
139
-
140
- READ_FUNCTIONS = ("pl.read_parquet", "pl.read_csv", "pl.read_json")
141
- NUM_TRENDING_DATASETS = 10
142
-
143
- with gr.Blocks() as demo:
144
- with gr.Row():
145
- with gr.Column(scale=10):
146
- gr.Markdown(f"# {TITLE} {EMOJI}")
147
- if SHORT_DESCRIPTION:
148
- gr.Markdown(SHORT_DESCRIPTION)
149
- with gr.Column():
150
- gr.LoginButton()
151
- gr.Markdown(CONTENT.format(SPACE_ID=SPACE_ID, CMD=json.dumps(CMD), ARG_NAMES=json.dumps(ARG_NAMES)))
152
- with gr.Row():
153
- with gr.Column(scale=10):
154
- with gr.Row():
155
- loading_codes_json = gr.JSON([], visible=False)
156
- dataset_dropdown = gr.Dropdown(label="Source Dataset", allow_custom_value=True, scale=10)
157
- subset_dropdown = gr.Dropdown(info="Subset", allow_custom_value=True, show_label=False, visible=False)
158
- split_dropdown = gr.Dropdown(info="Split", allow_custom_value=True, show_label=False, visible=False)
159
- with gr.Column(min_width=60):
160
- gr.HTML("<div style='font-size: 4em;'>→</div>")
161
- with gr.Column(scale=10):
162
- dst_dropdown = gr.Dropdown(label="Destination Dataset", allow_custom_value=True)
163
- query_textarea = gr.Textbox(label="SQL Query", lines=2, max_lines=300, placeholder="SELECT * FROM src;", value="SELECT * FROM src;")
164
- with gr.Row():
165
- run_button = gr.Button("Run", scale=10, variant="primary")
166
- if DRY_RUN:
167
- dry_run_button = gr.Button("Dry-Run")
168
- progress_labels= gr.Label(visible=False, label="Progress")
169
- with gr.Accordion("Details", open=False) as details_accordion:
170
- output_markdown = gr.Markdown(label="Output logs")
171
- run_button.click(run, inputs=[dataset_dropdown, subset_dropdown, split_dropdown, dst_dropdown, query_textarea], outputs=[details_accordion, progress_labels, output_markdown])
172
- if DRY_RUN:
173
- dry_run_button.click(dry_run, inputs=[dataset_dropdown, subset_dropdown, split_dropdown, dst_dropdown, query_textarea], outputs=[details_accordion, progress_labels, output_markdown])
174
-
175
- def show_subset_dropdown(dataset: str):
176
- if dataset and "/" not in dataset.strip().strip("/"):
177
- return []
178
- resp = requests.get(f"https://datasets-server.huggingface.co/compatible-libraries?dataset={dataset}", timeout=3).json()
179
- loading_codes = ([lib["loading_codes"] for lib in resp.get("libraries", []) if lib["function"] in READ_FUNCTIONS] or [[]])[0] or []
180
- subsets = [loading_code["config_name"] for loading_code in loading_codes]
181
- subset = (subsets or [""])[0]
182
- return dict(choices=subsets, value=subset, visible=len(subsets) > 1, key=hash(str(loading_codes))), loading_codes
183
-
184
- def show_split_dropdown(subset: str, loading_codes: list[dict]):
185
- splits = ([list(loading_code["arguments"]["splits"]) for loading_code in loading_codes if loading_code["config_name"] == subset] or [[]])[0]
186
- split = (splits or [""])[0]
187
- return dict(choices=splits, value=split, visible=len(splits) > 1, key=hash(str(loading_codes) + subset))
188
-
189
- @demo.load(outputs=[dataset_dropdown, loading_codes_json, subset_dropdown, split_dropdown])
190
- def _fetch_datasets(request: gr.Request):
191
- dataset = "CohereForAI/Global-MMLU"
192
- datasets = [dataset] + [ds.id for ds in HfApi().list_datasets(limit=NUM_TRENDING_DATASETS, sort="trendingScore", direction=-1) if ds.id != dataset]
193
- subsets, loading_codes = show_subset_dropdown(dataset)
194
- splits = show_split_dropdown(subsets["value"], loading_codes)
195
- return {
196
- dataset_dropdown: gr.Dropdown(choices=datasets, value=dataset),
197
- loading_codes_json: loading_codes,
198
- subset_dropdown: gr.Dropdown(**subsets),
199
- split_dropdown: gr.Dropdown(**splits),
200
- }
201
-
202
- @dataset_dropdown.select(inputs=[dataset_dropdown], outputs=[subset_dropdown, split_dropdown])
203
- def _show_subset_dropdown(dataset: str):
204
- subsets, loading_codes = show_subset_dropdown(dataset)
205
- splits = show_split_dropdown(subsets["value"], loading_codes)
206
- return {
207
- subset_dropdown: gr.Dropdown(**subsets),
208
- split_dropdown: gr.Dropdown(**splits),
209
- }
210
-
211
- @subset_dropdown.select(inputs=[dataset_dropdown, subset_dropdown, loading_codes_json], outputs=[split_dropdown])
212
- def _show_split_dropdown(dataset: str, subset: str, loading_codes: list[dict]):
213
- splits = show_split_dropdown(subset, loading_codes)
214
- return {
215
- split_dropdown: gr.Dropdown(**splits),
216
- }
217
-
218
- if HELP:
219
- with demo.route("Help", "/help"):
220
- gr.Markdown(f"# Help\n\n```\n{HELP}\n```")
221
-
222
- with demo.route("Jobs", "/jobs") as page:
223
- gr.Markdown("# Jobs")
224
- jobs_dataframe = gr.DataFrame(datatype="markdown")
225
-
226
- @page.load(outputs=[jobs_dataframe])
227
- def list_jobs(oauth_token: gr.OAuthToken | None, profile: gr.OAuthProfile | None):
228
- if oauth_token and profile:
229
- token = oauth_token.token
230
- username = profile.username
231
- elif (token := get_token()):
232
- username = HfApi().whoami(token=token)["name"]
233
- else:
234
- return pd.DataFrame({"Log in to see jobs": []})
235
- resp = requests.get(
236
- f"https://huggingface.co/api/jobs/{username}",
237
- headers={"Authorization": f"Bearer {token}"}
238
- )
239
- return pd.DataFrame([
240
- {
241
- "id": job["metadata"]["id"],
242
- "created_at": job["metadata"]["created_at"],
243
- "stage": job["compute"]["status"]["stage"],
244
- "output": f'[logs](https://huggingface.co/api/jobs/{username}/{job["metadata"]["id"]}/logs-stream)',
245
- "command": str(job["compute"]["spec"]["extra"]["command"]),
246
- "args": str(job["compute"]["spec"]["extra"]["args"]),
247
- }
248
- for job in resp.json()
249
- if job["compute"]["spec"]["extra"]["input"]["spaceId"] == SPACE_ID
250
- ])
251
-
252
- if __name__ == "__main__":
253
- demo.launch(server_name="0.0.0.0")
 
 
 
 
 
 
 
1