whyu commited on
Commit
8c7c7b7
·
1 Parent(s): 7c3c46a

user enters api key

Browse files
Files changed (1) hide show
  1. app.py +100 -34
app.py CHANGED
@@ -2,6 +2,8 @@ import gradio as gr
2
  import openai
3
  import json
4
  import os
 
 
5
  from tqdm import tqdm
6
  import pandas as pd
7
  import numpy as np
@@ -9,12 +11,13 @@ from collections import Counter
9
  import time
10
  from zipfile import ZipFile
11
 
12
- openai.api_key = os.environ.get("AZURE_OPENAI_KEY")
13
- openai.api_base = os.environ.get("AZURE_OPENAI_ENDPOINT")
14
- openai.api_type = 'azure'
15
- openai.api_version = os.environ.get("AZURE_OPENAI_API_VERSION")
16
- deployment_id = os.environ.get("AZURE_OPENAI_DEP_ID")
17
- gpt_model = deployment_id
 
18
 
19
 
20
 
@@ -33,8 +36,30 @@ Can you explain this meme? | This meme is poking fun at the fact that the names
33
  """
34
 
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
- def grade(file_obj, progress=gr.Progress()):
38
  # load metadata
39
  # Download mm-vet.zip and `unzip mm-vet.zip` and change the path below
40
  mmvet_path = "mm-vet"
@@ -104,21 +129,16 @@ def grade(file_obj, progress=gr.Progress()):
104
 
105
 
106
  ###### change your model name ######
107
- model = file_obj.name.split("/")[-1][:-5]
108
  # result_path = "results"
109
  num_run = 1 # we set 5 in the paper
110
  # model_results_file = os.path.join(result_path, f"{model}.json")
111
  model_results_file = file_obj.name
112
 
113
- # grade results for each sample to svae
114
- grade_file = f'{model}_{gpt_model}-grade-{num_run}runs.json'
115
- # grade_file = os.path.join(result_path, grade_file)
116
-
117
- # score results regarding capabilities/capability integration to save
118
- cap_score_file = f'{model}_{sub_set_name}{gpt_model}-cap-score-{num_run}runs.csv'
119
- # cap_score_file = os.path.join(result_path, cap_score_file)
120
- cap_int_score_file = f'{model}_{sub_set_name}{gpt_model}-cap-int-score-{num_run}runs.csv'
121
- # cap_int_score_file = os.path.join(result_path, cap_int_score_file)
122
 
123
 
124
 
@@ -170,8 +190,8 @@ def grade(file_obj, progress=gr.Progress()):
170
  while not grade_sample_run_complete:
171
  try:
172
  response = openai.ChatCompletion.create(
173
- # model=gpt_model,
174
- engine=gpt_model,
175
  max_tokens=3,
176
  temperature=temperature,
177
  messages=messages)
@@ -191,8 +211,8 @@ def grade(file_obj, progress=gr.Progress()):
191
  {"role": "user", "content": question},
192
  ]
193
  response = openai.ChatCompletion.create(
194
- # model=gpt_model,
195
- engine=gpt_model,
196
  max_tokens=3,
197
  temperature=temperature,
198
  messages=messages)
@@ -205,7 +225,8 @@ def grade(file_obj, progress=gr.Progress()):
205
  score = 0.0
206
  flag = False
207
  grade_sample_run_complete = True
208
- except:
 
209
  # gpt4 may have token rate limit
210
  num_sleep += 1
211
  if num_sleep > 12:
@@ -217,14 +238,18 @@ def grade(file_obj, progress=gr.Progress()):
217
  time.sleep(5)
218
 
219
 
 
 
 
220
  if len(sample_grade['model']) >= j + 1:
221
- sample_grade['model'][j] = response['model']
222
- sample_grade['content'][j] = content
223
  sample_grade['score'][j] = score
224
  else:
225
- sample_grade['model'].append(response['model'])
226
- sample_grade['content'].append(content)
227
  sample_grade['score'].append(score)
 
228
  grade_results[id] = sample_grade
229
 
230
  with open(grade_file, 'w') as f:
@@ -280,12 +305,13 @@ def grade(file_obj, progress=gr.Progress()):
280
  df2.to_csv(cap_int_score_file)
281
 
282
  files = [cap_score_file, cap_int_score_file, grade_file]
283
- zip_file = f"results.zip"
284
  with ZipFile(zip_file, "w") as zipObj:
285
- for idx, file in enumerate(files):
286
- zipObj.write(file, file)
287
- for file in files:
288
- os.remove(file)
 
 
289
  return zip_file
290
 
291
 
@@ -296,6 +322,39 @@ def grade(file_obj, progress=gr.Progress()):
296
  # outputs="file")
297
 
298
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299
  markdown = """
300
  <p align="center">
301
  <img src="https://github-production-user-asset-6210df.s3.amazonaws.com/49296856/258254299-29c00dae-8201-4128-b341-dad4663b544a.jpg" width="400"> <br>
@@ -304,7 +363,7 @@ markdown = """
304
 
305
  # [MM-Vet: Evaluating Large Multimodal Models for Integrated Capabilities](https://arxiv.org/abs/2308.02490)
306
 
307
- In this demo, we offer MM-Vet LLM-based (GPT-4) evaluator to grade open-ended outputs from your models.
308
 
309
  Plese upload your json file of your model results containing `{v1_0: ..., v1_1: ..., }`like [this json file](https://raw.githubusercontent.com/yuweihao/MM-Vet/main/results/llava_llama2_13b_chat.json).
310
 
@@ -316,10 +375,17 @@ The grading results will be downloaded as a zip file.
316
 
317
  with gr.Blocks() as demo:
318
  gr.Markdown(markdown)
 
 
 
 
 
 
319
  with gr.Row():
320
  inp = gr.File(file_types=[".json"])
321
  out = gr.File(file_types=[".zip"])
322
- inp.change(grade, inp, out)
 
323
 
324
  if __name__ == "__main__":
325
- demo.queue().launch()
 
2
  import openai
3
  import json
4
  import os
5
+ import uuid
6
+ import tempfile
7
  from tqdm import tqdm
8
  import pandas as pd
9
  import numpy as np
 
11
  import time
12
  from zipfile import ZipFile
13
 
14
+ # For Azure OpenAI
15
+ # openai.api_key = os.environ.get("AZURE_OPENAI_KEY")
16
+ # openai.api_base = os.environ.get("AZURE_OPENAI_ENDPOINT")
17
+ # openai.api_type = 'azure'
18
+ # openai.api_version = os.environ.get("AZURE_OPENAI_API_VERSION")
19
+ # deployment_id = os.environ.get("AZURE_OPENAI_DEP_ID")
20
+ # gpt_model = deployment_id
21
 
22
 
23
 
 
36
  """
37
 
38
 
39
+ import threading, shutil
40
+
41
+ def schedule_cleanup(paths, delay=600):
42
+ def _clean():
43
+ time.sleep(delay)
44
+ for p in (paths if isinstance(paths, (list, tuple)) else [paths]):
45
+ try:
46
+ if os.path.isdir(p):
47
+ shutil.rmtree(p, ignore_errors=True)
48
+ elif os.path.isfile(p):
49
+ os.remove(p)
50
+ except:
51
+ pass
52
+ threading.Thread(target=_clean, daemon=True).start()
53
+
54
+
55
+ def grade(file_obj, key, model, progress=gr.Progress()):
56
+ # set set api key
57
+ openai.api_key = key
58
+ gpt_model = model
59
+
60
+ workdir = tempfile.mkdtemp(prefix="mmvet_grade_")
61
+ uid = uuid.uuid4().hex
62
 
 
63
  # load metadata
64
  # Download mm-vet.zip and `unzip mm-vet.zip` and change the path below
65
  mmvet_path = "mm-vet"
 
129
 
130
 
131
  ###### change your model name ######
132
+ model_name = os.path.basename(file_obj.name)[:-5]
133
  # result_path = "results"
134
  num_run = 1 # we set 5 in the paper
135
  # model_results_file = os.path.join(result_path, f"{model}.json")
136
  model_results_file = file_obj.name
137
 
138
+ grade_file = os.path.join(workdir, f'{model_name}_{gpt_model}-grade-{num_run}runs_{uid}.json')
139
+ cap_score_file = os.path.join(workdir, f'{model_name}_{sub_set_name}{gpt_model}-cap-score-{num_run}runs_{uid}.csv')
140
+ cap_int_score_file = os.path.join(workdir, f'{model_name}_{sub_set_name}{gpt_model}-cap-int-score-{num_run}runs_{uid}.csv')
141
+ zip_file = os.path.join(workdir, f"results_{uid}.zip")
 
 
 
 
 
142
 
143
 
144
 
 
190
  while not grade_sample_run_complete:
191
  try:
192
  response = openai.ChatCompletion.create(
193
+ model=gpt_model,
194
+ # engine=gpt_model, # For Azure OpenAI
195
  max_tokens=3,
196
  temperature=temperature,
197
  messages=messages)
 
211
  {"role": "user", "content": question},
212
  ]
213
  response = openai.ChatCompletion.create(
214
+ model=gpt_model,
215
+ # engine=gpt_model, # For Azure OpenAI
216
  max_tokens=3,
217
  temperature=temperature,
218
  messages=messages)
 
225
  score = 0.0
226
  flag = False
227
  grade_sample_run_complete = True
228
+ except Exception as e:
229
+ print(e)
230
  # gpt4 may have token rate limit
231
  num_sleep += 1
232
  if num_sleep > 12:
 
238
  time.sleep(5)
239
 
240
 
241
+ resp_model = str(response.get('model', gpt_model))
242
+ content_str = str(content)
243
+
244
  if len(sample_grade['model']) >= j + 1:
245
+ sample_grade['model'][j] = resp_model
246
+ sample_grade['content'][j] = content_str
247
  sample_grade['score'][j] = score
248
  else:
249
+ sample_grade['model'].append(resp_model)
250
+ sample_grade['content'].append(content_str)
251
  sample_grade['score'].append(score)
252
+
253
  grade_results[id] = sample_grade
254
 
255
  with open(grade_file, 'w') as f:
 
305
  df2.to_csv(cap_int_score_file)
306
 
307
  files = [cap_score_file, cap_int_score_file, grade_file]
 
308
  with ZipFile(zip_file, "w") as zipObj:
309
+ for fpath in files:
310
+ arcname = os.path.basename(fpath)
311
+ zipObj.write(fpath, arcname)
312
+ for fpath in files:
313
+ os.remove(fpath)
314
+ schedule_cleanup([zip_file, workdir], delay=600)
315
  return zip_file
316
 
317
 
 
322
  # outputs="file")
323
 
324
 
325
+
326
+ # --- Validate key and model before running grading ---
327
+ def validate_key_and_model(key: str, model: str):
328
+ openai.api_key = key.strip() # strip leading/trailing spaces
329
+ try:
330
+ # This call is fast and checks both key validity and model availability
331
+ openai.Model.retrieve(model)
332
+ return True, "OK"
333
+ except openai.error.AuthenticationError:
334
+ return False, "Invalid OpenAI API key. Please check and try again."
335
+ except openai.error.InvalidRequestError as e:
336
+ msg = str(e)
337
+ if "does not exist" in msg or "You do not have access" in msg or "model_not_found" in msg:
338
+ return False, f"API key is valid, but you do not have access to model `{model}`."
339
+ return False, f"Invalid request: {msg}"
340
+ except openai.error.RateLimitError:
341
+ return False, "Rate limit or quota exceeded. Please try again later."
342
+ except openai.error.APIConnectionError:
343
+ return False, "Failed to connect to OpenAI service. Please check your network."
344
+ except openai.error.OpenAIError as e:
345
+ return False, f"OpenAI returned an error: {e}"
346
+ except Exception as e:
347
+ return False, f"Unexpected error: {e}"
348
+
349
+ # --- Wrapper for the grading function ---
350
+ def run_grade(file_obj, key, model, progress=gr.Progress(track_tqdm=True)):
351
+ ok, msg = validate_key_and_model(key, model)
352
+ if not ok:
353
+ # This will be visible to the user in the Gradio UI
354
+ raise gr.Error(msg)
355
+ return grade(file_obj, key, model, progress=progress)
356
+
357
+
358
  markdown = """
359
  <p align="center">
360
  <img src="https://github-production-user-asset-6210df.s3.amazonaws.com/49296856/258254299-29c00dae-8201-4128-b341-dad4663b544a.jpg" width="400"> <br>
 
363
 
364
  # [MM-Vet: Evaluating Large Multimodal Models for Integrated Capabilities](https://arxiv.org/abs/2308.02490)
365
 
366
+ This demo uses LLM-based (GPT-4) evaluator to grade open-ended outputs from your models.
367
 
368
  Plese upload your json file of your model results containing `{v1_0: ..., v1_1: ..., }`like [this json file](https://raw.githubusercontent.com/yuweihao/MM-Vet/main/results/llava_llama2_13b_chat.json).
369
 
 
375
 
376
  with gr.Blocks() as demo:
377
  gr.Markdown(markdown)
378
+ key = gr.Textbox(label="Enter your OpenAI API Key (this space will not save your API Key)", type="password")
379
+ model = gr.Dropdown(
380
+ choices=["gpt-4-0613", "gpt-4-turbo"],
381
+ value="gpt-4-0613",
382
+ label="Select GPt-4 model version (gpt-4-0613 is the default and gpt-4-turbo is cheaper)"
383
+ )
384
  with gr.Row():
385
  inp = gr.File(file_types=[".json"])
386
  out = gr.File(file_types=[".zip"])
387
+ btn = gr.Button("Start grading", variant="primary")
388
+ btn.click(fn=run_grade, inputs=[inp, key, model], outputs=out)
389
 
390
  if __name__ == "__main__":
391
+ demo.queue(max_size=8).launch()