Spaces:
Runtime error
Runtime error
Update start_app.py
Browse files- start_app.py +0 -252
start_app.py
CHANGED
@@ -1,253 +1 @@
|
|
1 |
-
import json
|
2 |
-
import os
|
3 |
-
import re
|
4 |
-
import subprocess
|
5 |
-
import time
|
6 |
-
import yaml
|
7 |
|
8 |
-
import gradio as gr
|
9 |
-
import pandas as pd
|
10 |
-
import requests
|
11 |
-
from huggingface_hub import HfApi, get_token
|
12 |
-
|
13 |
-
|
14 |
-
CMD = ["python" ,"run_job.py"]
|
15 |
-
ARG_NAMES = ["<src>", "<dst>", "<query>", "[-c config]", "[-s split]", "[-p private]"]
|
16 |
-
SPACE_ID = os.environ.get("SPACE_ID") or "lhoestq/run-duckdb-jobs"
|
17 |
-
|
18 |
-
CONTENT = """
|
19 |
-
## Usage:
|
20 |
-
|
21 |
-
```bash
|
22 |
-
curl -L 'https://huggingface.co/api/jobs/<username>' \
|
23 |
-
-H 'Content-Type: application/json' \
|
24 |
-
-H 'Authorization: Bearer <hf_token>' \
|
25 |
-
-d '{{
|
26 |
-
"spaceId": "{SPACE_ID}",
|
27 |
-
"command": {CMD},
|
28 |
-
"arguments": {ARG_NAMES},
|
29 |
-
"environment": {{"HF_TOKEN": <hf_token>}},
|
30 |
-
"flavor": "cpu-basic"
|
31 |
-
}}'
|
32 |
-
```
|
33 |
-
|
34 |
-
## Example:
|
35 |
-
"""
|
36 |
-
|
37 |
-
with open("README.md") as f:
|
38 |
-
METADATA = yaml.safe_load(f.read().split("---\n")[1])
|
39 |
-
TITLE = METADATA["title"]
|
40 |
-
SHORT_DESCRIPTION = METADATA.get("short_description")
|
41 |
-
EMOJI = METADATA["emoji"]
|
42 |
-
|
43 |
-
try:
|
44 |
-
process = subprocess.run(CMD + ["--help"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
45 |
-
HELP = not process.returncode and (process.stdout or process.stderr).decode()
|
46 |
-
except Exception:
|
47 |
-
HELP = False
|
48 |
-
|
49 |
-
DRY_RUN = bool(HELP) and bool(m :=re.search("--dry(-|_)run", HELP)) and m.group(0)
|
50 |
-
|
51 |
-
def parse_log(line: str, pbars: dict[str, float] = None):
|
52 |
-
if line.startswith("data: {"):
|
53 |
-
data = json.loads(line[len("data: "):])
|
54 |
-
data, timestamp = data["data"], data["timestamp"]
|
55 |
-
if pbars is not None and data.startswith("===== Job started at"):
|
56 |
-
pbars.pop("Starting ⚙️", None)
|
57 |
-
pbars["Running 🏃"] = 0.0
|
58 |
-
return f"[{timestamp}] {data}\n\n"
|
59 |
-
elif pbars is not None and (percent_match := re.search("\\d+(?:\\.\\d+)?%", data)) and any(c in data.split("%")[1][:10] for c in "|█▌"):
|
60 |
-
pbars.pop("Running 🏃", None)
|
61 |
-
[pbars.pop(desc) for desc, percent in pbars.items() if percent == 1.]
|
62 |
-
percent = float(percent_match.group(0)[:-1]) / 100
|
63 |
-
desc = data[:percent_match.start()].strip() or "Progress"
|
64 |
-
pbars[desc] = percent
|
65 |
-
else:
|
66 |
-
return f"[{timestamp}] {data}\n\n"
|
67 |
-
return ""
|
68 |
-
|
69 |
-
def dry_run(src, config, split, dst, query):
|
70 |
-
if not all([src, dst, query]):
|
71 |
-
raise gr.Error("Please fill source, destination and query.")
|
72 |
-
args = ["--src", src] + (["--config", config] if config else []) + (["--split", split] if split else []) + [ "--dst", dst, "--query", query, DRY_RUN]
|
73 |
-
cmd = CMD + args
|
74 |
-
logs = "Job:\n\n```bash\n" + " ".join('"' + arg.replace('"', '\"""') + '"' if " " in arg else arg for arg in cmd) + "\n```\nOutput:\n\n"
|
75 |
-
yield {output_markdown: logs, progress_labels: gr.Label(visible=False), details_accordion: gr.Accordion(open=True)}
|
76 |
-
process = subprocess.Popen(cmd, stdout=subprocess.PIPE)
|
77 |
-
for line in iter(process.stdout.readline, b""):
|
78 |
-
logs += line.decode()
|
79 |
-
yield {output_markdown: logs}
|
80 |
-
|
81 |
-
def run(src, config, split, dst, query, oauth_token: gr.OAuthToken | None, profile: gr.OAuthProfile | None):
|
82 |
-
if not all([src, dst, query]):
|
83 |
-
raise gr.Error("Please fill source, destination and query.")
|
84 |
-
if oauth_token and profile:
|
85 |
-
token = oauth_token.token
|
86 |
-
username = profile.username
|
87 |
-
elif (token := get_token()):
|
88 |
-
username = HfApi().whoami(token=token)["name"]
|
89 |
-
else:
|
90 |
-
raise gr.Error("Please log in to run the job.")
|
91 |
-
args = ["--src", src] + (["--config", config] if config else []) + (["--split", split] if split else []) + [ "--dst", dst, "--query", query]
|
92 |
-
cmd = CMD + args
|
93 |
-
logs = "Job:\n\n```bash\n" + " ".join('"' + arg.replace('"', '\"""') + '"' if " " in arg else arg for arg in cmd) + "\n```\nOutput:\n\n"
|
94 |
-
pbars = {}
|
95 |
-
yield {output_markdown: logs, progress_labels: gr.Label(pbars, visible=bool(pbars))}
|
96 |
-
resp = requests.post(
|
97 |
-
f"https://huggingface.co/api/jobs/{username}",
|
98 |
-
json={
|
99 |
-
"spaceId": SPACE_ID,
|
100 |
-
"arguments": args,
|
101 |
-
"command": CMD,
|
102 |
-
"environment": {"HF_TOKEN": token},
|
103 |
-
"flavor": "cpu-basic"
|
104 |
-
},
|
105 |
-
headers={"Authorization": f"Bearer {token}"}
|
106 |
-
)
|
107 |
-
if resp.status_code != 200:
|
108 |
-
logs += resp.text
|
109 |
-
pbars = {"Finished with an error ❌": 1.0}
|
110 |
-
else:
|
111 |
-
job_id = resp.json()["metadata"]["job_id"]
|
112 |
-
pbars = {"Starting ⚙️": 0.0}
|
113 |
-
yield {output_markdown: logs, progress_labels: gr.Label(pbars, visible=bool(pbars))}
|
114 |
-
resp = requests.get(
|
115 |
-
f"https://huggingface.co/api/jobs/{username}/{job_id}/logs-stream",
|
116 |
-
headers={"Authorization": f"Bearer {token}"},
|
117 |
-
stream=True
|
118 |
-
)
|
119 |
-
for line in resp.iter_lines():
|
120 |
-
logs += parse_log(line.decode("utf-8"), pbars=pbars)
|
121 |
-
yield {output_markdown: logs, progress_labels: gr.Label(pbars, visible=bool(pbars))}
|
122 |
-
job_status = {"status": {"stage": "RUNNING"}}
|
123 |
-
while True:
|
124 |
-
job_status = requests.get(
|
125 |
-
f"https://huggingface.co/api/jobs/{username}/{job_id}",
|
126 |
-
headers={"Authorization": f"Bearer {token}"}
|
127 |
-
).json()
|
128 |
-
if job_status["status"]["stage"] == "RUNNING":
|
129 |
-
time.sleep(1)
|
130 |
-
else:
|
131 |
-
break
|
132 |
-
if job_status["status"]["stage"] == "COMPLETED":
|
133 |
-
pbars = {"Finished ✅": 1.0}
|
134 |
-
else:
|
135 |
-
logs += f'{job_status["status"]["message"]} ({job_status["status"]["error"]})'
|
136 |
-
pbars = {"Finished with an error ❌": 1.0}
|
137 |
-
yield {output_markdown: logs, progress_labels: gr.Label(pbars, visible=bool(pbars))}
|
138 |
-
|
139 |
-
|
140 |
-
READ_FUNCTIONS = ("pl.read_parquet", "pl.read_csv", "pl.read_json")
|
141 |
-
NUM_TRENDING_DATASETS = 10
|
142 |
-
|
143 |
-
with gr.Blocks() as demo:
|
144 |
-
with gr.Row():
|
145 |
-
with gr.Column(scale=10):
|
146 |
-
gr.Markdown(f"# {TITLE} {EMOJI}")
|
147 |
-
if SHORT_DESCRIPTION:
|
148 |
-
gr.Markdown(SHORT_DESCRIPTION)
|
149 |
-
with gr.Column():
|
150 |
-
gr.LoginButton()
|
151 |
-
gr.Markdown(CONTENT.format(SPACE_ID=SPACE_ID, CMD=json.dumps(CMD), ARG_NAMES=json.dumps(ARG_NAMES)))
|
152 |
-
with gr.Row():
|
153 |
-
with gr.Column(scale=10):
|
154 |
-
with gr.Row():
|
155 |
-
loading_codes_json = gr.JSON([], visible=False)
|
156 |
-
dataset_dropdown = gr.Dropdown(label="Source Dataset", allow_custom_value=True, scale=10)
|
157 |
-
subset_dropdown = gr.Dropdown(info="Subset", allow_custom_value=True, show_label=False, visible=False)
|
158 |
-
split_dropdown = gr.Dropdown(info="Split", allow_custom_value=True, show_label=False, visible=False)
|
159 |
-
with gr.Column(min_width=60):
|
160 |
-
gr.HTML("<div style='font-size: 4em;'>→</div>")
|
161 |
-
with gr.Column(scale=10):
|
162 |
-
dst_dropdown = gr.Dropdown(label="Destination Dataset", allow_custom_value=True)
|
163 |
-
query_textarea = gr.Textbox(label="SQL Query", lines=2, max_lines=300, placeholder="SELECT * FROM src;", value="SELECT * FROM src;")
|
164 |
-
with gr.Row():
|
165 |
-
run_button = gr.Button("Run", scale=10, variant="primary")
|
166 |
-
if DRY_RUN:
|
167 |
-
dry_run_button = gr.Button("Dry-Run")
|
168 |
-
progress_labels= gr.Label(visible=False, label="Progress")
|
169 |
-
with gr.Accordion("Details", open=False) as details_accordion:
|
170 |
-
output_markdown = gr.Markdown(label="Output logs")
|
171 |
-
run_button.click(run, inputs=[dataset_dropdown, subset_dropdown, split_dropdown, dst_dropdown, query_textarea], outputs=[details_accordion, progress_labels, output_markdown])
|
172 |
-
if DRY_RUN:
|
173 |
-
dry_run_button.click(dry_run, inputs=[dataset_dropdown, subset_dropdown, split_dropdown, dst_dropdown, query_textarea], outputs=[details_accordion, progress_labels, output_markdown])
|
174 |
-
|
175 |
-
def show_subset_dropdown(dataset: str):
|
176 |
-
if dataset and "/" not in dataset.strip().strip("/"):
|
177 |
-
return []
|
178 |
-
resp = requests.get(f"https://datasets-server.huggingface.co/compatible-libraries?dataset={dataset}", timeout=3).json()
|
179 |
-
loading_codes = ([lib["loading_codes"] for lib in resp.get("libraries", []) if lib["function"] in READ_FUNCTIONS] or [[]])[0] or []
|
180 |
-
subsets = [loading_code["config_name"] for loading_code in loading_codes]
|
181 |
-
subset = (subsets or [""])[0]
|
182 |
-
return dict(choices=subsets, value=subset, visible=len(subsets) > 1, key=hash(str(loading_codes))), loading_codes
|
183 |
-
|
184 |
-
def show_split_dropdown(subset: str, loading_codes: list[dict]):
|
185 |
-
splits = ([list(loading_code["arguments"]["splits"]) for loading_code in loading_codes if loading_code["config_name"] == subset] or [[]])[0]
|
186 |
-
split = (splits or [""])[0]
|
187 |
-
return dict(choices=splits, value=split, visible=len(splits) > 1, key=hash(str(loading_codes) + subset))
|
188 |
-
|
189 |
-
@demo.load(outputs=[dataset_dropdown, loading_codes_json, subset_dropdown, split_dropdown])
|
190 |
-
def _fetch_datasets(request: gr.Request):
|
191 |
-
dataset = "CohereForAI/Global-MMLU"
|
192 |
-
datasets = [dataset] + [ds.id for ds in HfApi().list_datasets(limit=NUM_TRENDING_DATASETS, sort="trendingScore", direction=-1) if ds.id != dataset]
|
193 |
-
subsets, loading_codes = show_subset_dropdown(dataset)
|
194 |
-
splits = show_split_dropdown(subsets["value"], loading_codes)
|
195 |
-
return {
|
196 |
-
dataset_dropdown: gr.Dropdown(choices=datasets, value=dataset),
|
197 |
-
loading_codes_json: loading_codes,
|
198 |
-
subset_dropdown: gr.Dropdown(**subsets),
|
199 |
-
split_dropdown: gr.Dropdown(**splits),
|
200 |
-
}
|
201 |
-
|
202 |
-
@dataset_dropdown.select(inputs=[dataset_dropdown], outputs=[subset_dropdown, split_dropdown])
|
203 |
-
def _show_subset_dropdown(dataset: str):
|
204 |
-
subsets, loading_codes = show_subset_dropdown(dataset)
|
205 |
-
splits = show_split_dropdown(subsets["value"], loading_codes)
|
206 |
-
return {
|
207 |
-
subset_dropdown: gr.Dropdown(**subsets),
|
208 |
-
split_dropdown: gr.Dropdown(**splits),
|
209 |
-
}
|
210 |
-
|
211 |
-
@subset_dropdown.select(inputs=[dataset_dropdown, subset_dropdown, loading_codes_json], outputs=[split_dropdown])
|
212 |
-
def _show_split_dropdown(dataset: str, subset: str, loading_codes: list[dict]):
|
213 |
-
splits = show_split_dropdown(subset, loading_codes)
|
214 |
-
return {
|
215 |
-
split_dropdown: gr.Dropdown(**splits),
|
216 |
-
}
|
217 |
-
|
218 |
-
if HELP:
|
219 |
-
with demo.route("Help", "/help"):
|
220 |
-
gr.Markdown(f"# Help\n\n```\n{HELP}\n```")
|
221 |
-
|
222 |
-
with demo.route("Jobs", "/jobs") as page:
|
223 |
-
gr.Markdown("# Jobs")
|
224 |
-
jobs_dataframe = gr.DataFrame(datatype="markdown")
|
225 |
-
|
226 |
-
@page.load(outputs=[jobs_dataframe])
|
227 |
-
def list_jobs(oauth_token: gr.OAuthToken | None, profile: gr.OAuthProfile | None):
|
228 |
-
if oauth_token and profile:
|
229 |
-
token = oauth_token.token
|
230 |
-
username = profile.username
|
231 |
-
elif (token := get_token()):
|
232 |
-
username = HfApi().whoami(token=token)["name"]
|
233 |
-
else:
|
234 |
-
return pd.DataFrame({"Log in to see jobs": []})
|
235 |
-
resp = requests.get(
|
236 |
-
f"https://huggingface.co/api/jobs/{username}",
|
237 |
-
headers={"Authorization": f"Bearer {token}"}
|
238 |
-
)
|
239 |
-
return pd.DataFrame([
|
240 |
-
{
|
241 |
-
"id": job["metadata"]["id"],
|
242 |
-
"created_at": job["metadata"]["created_at"],
|
243 |
-
"stage": job["compute"]["status"]["stage"],
|
244 |
-
"output": f'[logs](https://huggingface.co/api/jobs/{username}/{job["metadata"]["id"]}/logs-stream)',
|
245 |
-
"command": str(job["compute"]["spec"]["extra"]["command"]),
|
246 |
-
"args": str(job["compute"]["spec"]["extra"]["args"]),
|
247 |
-
}
|
248 |
-
for job in resp.json()
|
249 |
-
if job["compute"]["spec"]["extra"]["input"]["spaceId"] == SPACE_ID
|
250 |
-
])
|
251 |
-
|
252 |
-
if __name__ == "__main__":
|
253 |
-
demo.launch(server_name="0.0.0.0")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|