update question download format
Browse files
backend/routes/download.py
CHANGED
|
@@ -15,23 +15,23 @@ router = APIRouter(tags=["download"])
|
|
| 15 |
@router.get("/download-dataset/{session_id}")
|
| 16 |
async def download_dataset(session_id: str):
|
| 17 |
"""
|
| 18 |
-
|
| 19 |
|
| 20 |
Args:
|
| 21 |
-
session_id:
|
| 22 |
|
| 23 |
Returns:
|
| 24 |
-
|
| 25 |
"""
|
| 26 |
try:
|
| 27 |
-
#
|
| 28 |
with tempfile.TemporaryDirectory() as temp_dir:
|
| 29 |
-
#
|
| 30 |
repo_id = f"yourbench/yourbench_{session_id}"
|
| 31 |
|
| 32 |
try:
|
| 33 |
-
#
|
| 34 |
-
logging.info(f"
|
| 35 |
snapshot_path = snapshot_download(
|
| 36 |
repo_id=repo_id,
|
| 37 |
repo_type="dataset",
|
|
@@ -39,22 +39,22 @@ async def download_dataset(session_id: str):
|
|
| 39 |
token=os.environ.get("HF_TOKEN")
|
| 40 |
)
|
| 41 |
|
| 42 |
-
logging.info(f"Dataset
|
| 43 |
|
| 44 |
-
#
|
| 45 |
zip_io = io.BytesIO()
|
| 46 |
with zipfile.ZipFile(zip_io, 'w', zipfile.ZIP_DEFLATED) as zip_file:
|
| 47 |
-
#
|
| 48 |
for root, _, files in os.walk(snapshot_path):
|
| 49 |
for file in files:
|
| 50 |
file_path = os.path.join(root, file)
|
| 51 |
arc_name = os.path.relpath(file_path, snapshot_path)
|
| 52 |
zip_file.write(file_path, arcname=arc_name)
|
| 53 |
|
| 54 |
-
#
|
| 55 |
zip_io.seek(0)
|
| 56 |
|
| 57 |
-
#
|
| 58 |
filename = f"yourbench_{session_id}_dataset.zip"
|
| 59 |
return StreamingResponse(
|
| 60 |
zip_io,
|
|
@@ -63,31 +63,31 @@ async def download_dataset(session_id: str):
|
|
| 63 |
)
|
| 64 |
|
| 65 |
except Exception as e:
|
| 66 |
-
logging.error(f"
|
| 67 |
raise HTTPException(
|
| 68 |
status_code=500,
|
| 69 |
-
detail=f"
|
| 70 |
)
|
| 71 |
except Exception as e:
|
| 72 |
-
logging.error(f"
|
| 73 |
raise HTTPException(
|
| 74 |
status_code=500,
|
| 75 |
-
detail=f"
|
| 76 |
)
|
| 77 |
|
| 78 |
@router.get("/download-questions/{session_id}")
|
| 79 |
async def download_questions(session_id: str):
|
| 80 |
"""
|
| 81 |
-
|
| 82 |
|
| 83 |
Args:
|
| 84 |
-
session_id:
|
| 85 |
|
| 86 |
Returns:
|
| 87 |
-
|
| 88 |
"""
|
| 89 |
try:
|
| 90 |
-
#
|
| 91 |
dataset_repo_id = f"yourbench/yourbench_{session_id}"
|
| 92 |
|
| 93 |
# Initialize questions list
|
|
@@ -126,13 +126,10 @@ async def download_questions(session_id: str):
|
|
| 126 |
|
| 127 |
# If we couldn't load any questions, the dataset might not exist
|
| 128 |
if len(all_questions) == 0:
|
| 129 |
-
raise HTTPException(status_code=404, detail="
|
| 130 |
|
| 131 |
-
# Convert questions to JSON
|
| 132 |
-
questions_json = json.dumps(
|
| 133 |
-
"session_id": session_id,
|
| 134 |
-
"questions": all_questions
|
| 135 |
-
}, ensure_ascii=False, indent=2)
|
| 136 |
|
| 137 |
# Create a BytesIO object with the JSON data
|
| 138 |
json_bytes = io.BytesIO(questions_json.encode('utf-8'))
|
|
@@ -150,8 +147,8 @@ async def download_questions(session_id: str):
|
|
| 150 |
# Re-raise HTTP exceptions
|
| 151 |
raise
|
| 152 |
except Exception as e:
|
| 153 |
-
logging.error(f"
|
| 154 |
raise HTTPException(
|
| 155 |
status_code=500,
|
| 156 |
-
detail=f"
|
| 157 |
)
|
|
|
|
| 15 |
@router.get("/download-dataset/{session_id}")
|
| 16 |
async def download_dataset(session_id: str):
|
| 17 |
"""
|
| 18 |
+
Downloads the HuggingFace dataset associated with a session and returns it to the client
|
| 19 |
|
| 20 |
Args:
|
| 21 |
+
session_id: Session identifier
|
| 22 |
|
| 23 |
Returns:
|
| 24 |
+
ZIP file containing the dataset
|
| 25 |
"""
|
| 26 |
try:
|
| 27 |
+
# Create a temporary directory to store the dataset files
|
| 28 |
with tempfile.TemporaryDirectory() as temp_dir:
|
| 29 |
+
# HuggingFace repo identifier
|
| 30 |
repo_id = f"yourbench/yourbench_{session_id}"
|
| 31 |
|
| 32 |
try:
|
| 33 |
+
# Download the dataset snapshot from HuggingFace
|
| 34 |
+
logging.info(f"Downloading dataset {repo_id}")
|
| 35 |
snapshot_path = snapshot_download(
|
| 36 |
repo_id=repo_id,
|
| 37 |
repo_type="dataset",
|
|
|
|
| 39 |
token=os.environ.get("HF_TOKEN")
|
| 40 |
)
|
| 41 |
|
| 42 |
+
logging.info(f"Dataset downloaded to {snapshot_path}")
|
| 43 |
|
| 44 |
+
# Create a ZIP file in memory
|
| 45 |
zip_io = io.BytesIO()
|
| 46 |
with zipfile.ZipFile(zip_io, 'w', zipfile.ZIP_DEFLATED) as zip_file:
|
| 47 |
+
# Loop through all files in the dataset and add them to the ZIP
|
| 48 |
for root, _, files in os.walk(snapshot_path):
|
| 49 |
for file in files:
|
| 50 |
file_path = os.path.join(root, file)
|
| 51 |
arc_name = os.path.relpath(file_path, snapshot_path)
|
| 52 |
zip_file.write(file_path, arcname=arc_name)
|
| 53 |
|
| 54 |
+
# Reset the cursor to the beginning of the stream
|
| 55 |
zip_io.seek(0)
|
| 56 |
|
| 57 |
+
# Return the ZIP to the client
|
| 58 |
filename = f"yourbench_{session_id}_dataset.zip"
|
| 59 |
return StreamingResponse(
|
| 60 |
zip_io,
|
|
|
|
| 63 |
)
|
| 64 |
|
| 65 |
except Exception as e:
|
| 66 |
+
logging.error(f"Error while downloading the dataset: {str(e)}")
|
| 67 |
raise HTTPException(
|
| 68 |
status_code=500,
|
| 69 |
+
detail=f"Error while downloading the dataset: {str(e)}"
|
| 70 |
)
|
| 71 |
except Exception as e:
|
| 72 |
+
logging.error(f"General error: {str(e)}")
|
| 73 |
raise HTTPException(
|
| 74 |
status_code=500,
|
| 75 |
+
detail=f"Error during download: {str(e)}"
|
| 76 |
)
|
| 77 |
|
| 78 |
@router.get("/download-questions/{session_id}")
|
| 79 |
async def download_questions(session_id: str):
|
| 80 |
"""
|
| 81 |
+
Downloads the questions generated for a session in JSON format
|
| 82 |
|
| 83 |
Args:
|
| 84 |
+
session_id: Session identifier
|
| 85 |
|
| 86 |
Returns:
|
| 87 |
+
JSON file containing only the list of generated questions
|
| 88 |
"""
|
| 89 |
try:
|
| 90 |
+
# HuggingFace repo identifier
|
| 91 |
dataset_repo_id = f"yourbench/yourbench_{session_id}"
|
| 92 |
|
| 93 |
# Initialize questions list
|
|
|
|
| 126 |
|
| 127 |
# If we couldn't load any questions, the dataset might not exist
|
| 128 |
if len(all_questions) == 0:
|
| 129 |
+
raise HTTPException(status_code=404, detail="No questions found for this session")
|
| 130 |
|
| 131 |
+
# Convert only the list of questions to JSON (without session_id and without wrapping object)
|
| 132 |
+
questions_json = json.dumps(all_questions, ensure_ascii=False, indent=2)
|
|
|
|
|
|
|
|
|
|
| 133 |
|
| 134 |
# Create a BytesIO object with the JSON data
|
| 135 |
json_bytes = io.BytesIO(questions_json.encode('utf-8'))
|
|
|
|
| 147 |
# Re-raise HTTP exceptions
|
| 148 |
raise
|
| 149 |
except Exception as e:
|
| 150 |
+
logging.error(f"Error retrieving questions: {str(e)}")
|
| 151 |
raise HTTPException(
|
| 152 |
status_code=500,
|
| 153 |
+
detail=f"Error downloading questions: {str(e)}"
|
| 154 |
)
|
backend/tasks/get_available_model_provider.py
CHANGED
|
@@ -102,7 +102,7 @@ def test_provider(model_name: str, provider: str, verbose: bool = False) -> bool
|
|
| 102 |
if verbose:
|
| 103 |
logger.warning(f"Error creating client for {provider}: {str(auth_error)}")
|
| 104 |
return False
|
| 105 |
-
|
| 106 |
except Exception as e:
|
| 107 |
if verbose:
|
| 108 |
logger.warning(f"Error in test_provider: {str(e)}")
|
|
@@ -172,6 +172,7 @@ def get_available_model_provider(model_name, verbose=False):
|
|
| 172 |
raise ValueError("HF_TOKEN not defined in environment")
|
| 173 |
|
| 174 |
# Get providers for the model and prioritize them
|
|
|
|
| 175 |
try:
|
| 176 |
# Essayer avec le token
|
| 177 |
try:
|
|
@@ -198,18 +199,19 @@ def get_available_model_provider(model_name, verbose=False):
|
|
| 198 |
# Autre erreur, la relancer
|
| 199 |
raise auth_error
|
| 200 |
|
| 201 |
-
if not hasattr(info, "inference_provider_mapping"):
|
| 202 |
if verbose:
|
| 203 |
logger.info(f"No inference providers found for {model_name}")
|
| 204 |
# Essayer avec la liste de providers par défaut
|
| 205 |
return _test_fallback_providers(model_name, verbose)
|
| 206 |
-
|
| 207 |
providers = list(info.inference_provider_mapping.keys())
|
| 208 |
if not providers:
|
| 209 |
if verbose:
|
| 210 |
logger.info(f"Empty list of providers for {model_name}")
|
| 211 |
# Essayer avec la liste de providers par défaut
|
| 212 |
return _test_fallback_providers(model_name, verbose)
|
|
|
|
| 213 |
except Exception as e:
|
| 214 |
if verbose:
|
| 215 |
logger.error(f"Error retrieving model info for {model_name}: {str(e)}")
|
|
@@ -271,7 +273,7 @@ def get_available_model_provider(model_name, verbose=False):
|
|
| 271 |
if verbose:
|
| 272 |
logger.error(f"Error in get_available_model_provider: {str(e)}")
|
| 273 |
return None
|
| 274 |
-
|
| 275 |
def _test_fallback_providers(model_name, verbose=False):
|
| 276 |
"""
|
| 277 |
Fonction de secours qui teste une liste de providers communs sans passer par l'API
|
|
@@ -459,10 +461,10 @@ def test_models(verbose=True):
|
|
| 459 |
"deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
|
| 460 |
"mistralai/Mistral-Small-24B-Instruct-2501",
|
| 461 |
]
|
| 462 |
-
|
| 463 |
if verbose:
|
| 464 |
print("\n===== Testing all available models =====")
|
| 465 |
-
|
| 466 |
for model in models:
|
| 467 |
provider = get_available_model_provider(model, verbose)
|
| 468 |
results["all_models"][model] = provider
|
|
|
|
| 102 |
if verbose:
|
| 103 |
logger.warning(f"Error creating client for {provider}: {str(auth_error)}")
|
| 104 |
return False
|
| 105 |
+
|
| 106 |
except Exception as e:
|
| 107 |
if verbose:
|
| 108 |
logger.warning(f"Error in test_provider: {str(e)}")
|
|
|
|
| 172 |
raise ValueError("HF_TOKEN not defined in environment")
|
| 173 |
|
| 174 |
# Get providers for the model and prioritize them
|
| 175 |
+
info = None
|
| 176 |
try:
|
| 177 |
# Essayer avec le token
|
| 178 |
try:
|
|
|
|
| 199 |
# Autre erreur, la relancer
|
| 200 |
raise auth_error
|
| 201 |
|
| 202 |
+
if not info or not hasattr(info, "inference_provider_mapping"):
|
| 203 |
if verbose:
|
| 204 |
logger.info(f"No inference providers found for {model_name}")
|
| 205 |
# Essayer avec la liste de providers par défaut
|
| 206 |
return _test_fallback_providers(model_name, verbose)
|
| 207 |
+
|
| 208 |
providers = list(info.inference_provider_mapping.keys())
|
| 209 |
if not providers:
|
| 210 |
if verbose:
|
| 211 |
logger.info(f"Empty list of providers for {model_name}")
|
| 212 |
# Essayer avec la liste de providers par défaut
|
| 213 |
return _test_fallback_providers(model_name, verbose)
|
| 214 |
+
|
| 215 |
except Exception as e:
|
| 216 |
if verbose:
|
| 217 |
logger.error(f"Error retrieving model info for {model_name}: {str(e)}")
|
|
|
|
| 273 |
if verbose:
|
| 274 |
logger.error(f"Error in get_available_model_provider: {str(e)}")
|
| 275 |
return None
|
| 276 |
+
|
| 277 |
def _test_fallback_providers(model_name, verbose=False):
|
| 278 |
"""
|
| 279 |
Fonction de secours qui teste une liste de providers communs sans passer par l'API
|
|
|
|
| 461 |
"deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
|
| 462 |
"mistralai/Mistral-Small-24B-Instruct-2501",
|
| 463 |
]
|
| 464 |
+
|
| 465 |
if verbose:
|
| 466 |
print("\n===== Testing all available models =====")
|
| 467 |
+
|
| 468 |
for model in models:
|
| 469 |
provider = get_available_model_provider(model, verbose)
|
| 470 |
results["all_models"][model] = provider
|
frontend/src/components/Benchmark/CreateForm.jsx
CHANGED
|
@@ -83,6 +83,12 @@ function CreateForm({ onStartGeneration }) {
|
|
| 83 |
|
| 84 |
// Liste des documents par défaut
|
| 85 |
const defaultDocuments = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
{
|
| 87 |
id: "the-bitter-lesson",
|
| 88 |
name: "The Bitter Lesson",
|
|
@@ -95,12 +101,6 @@ function CreateForm({ onStartGeneration }) {
|
|
| 95 |
icon: <DescriptionIcon sx={{ fontSize: 40 }} />,
|
| 96 |
description: "Frequently asked questions about hurricanes",
|
| 97 |
},
|
| 98 |
-
{
|
| 99 |
-
id: "pokemon-guide",
|
| 100 |
-
name: "Pokemon Guide",
|
| 101 |
-
icon: <MenuBookIcon sx={{ fontSize: 40 }} />,
|
| 102 |
-
description: "A comprehensive guide for Pokemon enthusiasts",
|
| 103 |
-
},
|
| 104 |
];
|
| 105 |
|
| 106 |
const handleCloseSnackbar = () => {
|
|
@@ -181,8 +181,10 @@ function CreateForm({ onStartGeneration }) {
|
|
| 181 |
align="center"
|
| 182 |
sx={{ mb: 2, color: "text.secondary" }}
|
| 183 |
>
|
| 184 |
-
To create a benchmark, choose a sample document or
|
| 185 |
-
file/URL
|
|
|
|
|
|
|
| 186 |
</Typography>
|
| 187 |
|
| 188 |
<Grid container spacing={2} sx={{ mb: 0 }}>
|
|
|
|
| 83 |
|
| 84 |
// Liste des documents par défaut
|
| 85 |
const defaultDocuments = [
|
| 86 |
+
{
|
| 87 |
+
id: "pokemon-guide",
|
| 88 |
+
name: "Pokemon Guide",
|
| 89 |
+
icon: <MenuBookIcon sx={{ fontSize: 40 }} />,
|
| 90 |
+
description: "A comprehensive guide for Pokemon enthusiasts",
|
| 91 |
+
},
|
| 92 |
{
|
| 93 |
id: "the-bitter-lesson",
|
| 94 |
name: "The Bitter Lesson",
|
|
|
|
| 101 |
icon: <DescriptionIcon sx={{ fontSize: 40 }} />,
|
| 102 |
description: "Frequently asked questions about hurricanes",
|
| 103 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
];
|
| 105 |
|
| 106 |
const handleCloseSnackbar = () => {
|
|
|
|
| 181 |
align="center"
|
| 182 |
sx={{ mb: 2, color: "text.secondary" }}
|
| 183 |
>
|
| 184 |
+
To create a benchmark, <b>choose</b> a <b>sample document</b> or{" "}
|
| 185 |
+
<b>upload</b> your <b>own file/URL</b>.
|
| 186 |
+
<br />
|
| 187 |
+
(ideally a knowledge base, a FAQ, a news article, etc.)
|
| 188 |
</Typography>
|
| 189 |
|
| 190 |
<Grid container spacing={2} sx={{ mb: 0 }}>
|