Spaces:
Paused
Paused
| """ | |
| This module implements a hyperparameter optimization routine for the embedding application. It utilizes TPE optimization from Optuna. | |
| Each run, the optimizer will set the default values inside the hyperparameters. At the end, it will output the best ones it has found. | |
| """ | |
| import re | |
| import json | |
| import optuna | |
| import gradio as gr | |
| import numpy as np | |
| import logging | |
| import hashlib | |
| logging.getLogger('optuna').setLevel(logging.WARNING) | |
| import extensions.superboogav2.parameters as parameters | |
| from pathlib import Path | |
| from .benchmark import benchmark | |
| from .parameters import Parameters | |
| from modules.logging_colors import logger | |
| # Format the parameters into markdown format. | |
| def _markdown_hyperparams(): | |
| res = [] | |
| for param_name, param_value in Parameters.getInstance().hyperparameters.items(): | |
| # Escape any markdown syntax | |
| param_name = re.sub(r"([_*\[\]()~`>#+-.!])", r"\\\1", param_name) | |
| param_value_default = re.sub(r"([_*\[\]()~`>#+-.!])", r"\\\1", str(param_value['default'])) if param_value['default'] else ' ' | |
| res.append('* {}: **{}**'.format(param_name, param_value_default)) | |
| return '\n'.join(res) | |
| # Convert numpy types to python types. | |
| def _convert_np_types(params): | |
| for key in params: | |
| if type(params[key]) == np.bool_: | |
| params[key] = bool(params[key]) | |
| elif type(params[key]) == np.int64: | |
| params[key] = int(params[key]) | |
| elif type(params[key]) == np.float64: | |
| params[key] = float(params[key]) | |
| return params | |
| # Set the default values for the hyperparameters. | |
| def _set_hyperparameters(params): | |
| for param_name, param_value in params.items(): | |
| if param_name in Parameters.getInstance().hyperparameters: | |
| Parameters.getInstance().hyperparameters[param_name]['default'] = param_value | |
| # Check if the parameter is for optimization. | |
| def _is_optimization_param(val): | |
| is_opt = val.get('should_optimize', False) # Either does not exist or is false | |
| return is_opt | |
| # Create a hashable representation of the parameters | |
| def _get_params_hash(params): | |
| params_str = json.dumps(params, sort_keys=True) | |
| return hashlib.sha256(params_str.encode()).hexdigest() | |
| def optimize(collector, progress=gr.Progress()): | |
| # Inform the user that something is happening. | |
| progress(0, desc=f'Setting Up...') | |
| # Track the current step | |
| current_step = 0 | |
| # Track the best score | |
| best_score = 0 | |
| # Dictionary for caching scores | |
| scores_cache = {} | |
| def objective_function(trial): | |
| nonlocal current_step | |
| nonlocal best_score | |
| nonlocal scores_cache | |
| params = {} | |
| for key, val in Parameters.getInstance().hyperparameters.items(): | |
| if _is_optimization_param(val): | |
| params[key] = trial.suggest_categorical(key, val['categories']) | |
| _set_hyperparameters(params) | |
| params_hash = _get_params_hash(params) | |
| # If the score for these parameters is in the cache, return it | |
| if params_hash in scores_cache: | |
| return scores_cache[params_hash] | |
| # Benchmark the current set of parameters. | |
| score, max_score = benchmark(Path("extensions/superboogav2/benchmark_texts/questions.json"), collector) | |
| # Cache the score | |
| scores_cache[params_hash] = score | |
| result = json.dumps(_convert_np_types(params), indent=4) | |
| result += f'\nScore: {score}/{max_score}' | |
| logger.debug(result) | |
| # Increment the current step | |
| current_step += 1 | |
| # Update the best score | |
| best_score = max(best_score, score) | |
| # Update the progress | |
| progress(current_step / parameters.get_optimization_steps(), desc=f'Optimizing... {current_step}/{parameters.get_optimization_steps()}') | |
| return -score | |
| # Run the optimization. | |
| study = optuna.create_study() | |
| study.optimize(objective_function, n_trials=int(parameters.get_optimization_steps())) | |
| best_params = study.best_params | |
| _set_hyperparameters(best_params) | |
| # Convert results to a markdown string. | |
| str_result = f"## Best parameters:\n\n{_markdown_hyperparams()}\n\n## Score:\n\n{best_score}" | |
| # Save to JSON file | |
| with open('best_params.json', 'w') as fp: | |
| json.dump(_convert_np_types(best_params), fp, indent=4) | |
| return str_result |