Spaces:

sundaram22times
/

recommendation

Sleeping

App Files Files Community

recommendation / src /fine_tuning /config.py

sundaram22verma

initial commit

9d76e23 about 1 month ago

raw

history blame contribute delete

2.59 kB

	"""Configuration settings for model fine-tuning."""

	from pathlib import Path

	# Directory paths
	FINE_TUNING_DIR = Path(__file__).parent
	MODELS_DIR = FINE_TUNING_DIR / "models"
	DATA_DIR = FINE_TUNING_DIR / "data"

	# Fine-tuning parameters
	DEFAULT_FINE_TUNING_CONFIG = {
	"min_training_samples": 1000,
	"training_interval_hours": 24,
	"epochs": 5, # Reduced from 10 to 5 epochs
	"batch_size": 64, # Increased from 32 to 64 for faster processing
	"learning_rate": 2e-5,
	"max_seq_length": 512, # Maximum sequence length for tokenization (Note: CrossEncoder uses its own model.max_length)
	"warmup_steps": 500, # Reduced from 1000 to 500
	"use_mixed_precision": True, # Enable mixed precision training
	"gradient_accumulation_steps": 2, # Reduced from 4 to 2 for faster updates
	"dataloader_num_workers": 2, # Reduced from 4 to 2 to prevent memory issues
	"pin_memory": True, # Pin memory for faster data transfer to GPU
	"weight_decay": 0.01, # L2 regularization
	"num_cycles": 2, # Reduced from 3 to 2
	"early_stopping_patience": 2, # Reduced from 3 to 2
	"early_stopping_min_delta": 1e-4, # Minimum improvement for early stopping
	"adam_epsilon": 1e-8, # Adam optimizer epsilon
	"max_grad_norm": 1.0, # Maximum gradient norm for clipping
	"adam_betas": (0.9, 0.999), # Adam optimizer betas

	# Parameters for hard negative mining (names aligned with trainer.py's .get() calls)
	"hard_negatives_top_k": 2, # Reduced from 3 to 2
	"hard_negatives_weight": 1.2, # Weight for hard negative examples

	# Note: The following parameters from the original config are not directly used by
	# the provided trainer.py or are redundant, so they have been removed for clarity:
	# embedding_batch_size, use_ipex, use_amp, scheduler, validation_split,
	# save_best_model, fp16_opt_level, scheduler_num_cycles, scheduler_power,
	# num_folds (trainer uses hardcoded 5), min_feedback_confidence, ensemble_aggregation,
	# feedback_weight_scale, min_samples_per_class, temperature, use_weighted_sampling,
	# augmentation_strength, confidence_threshold, ensemble_diversity_weight,
	# label_smoothing, dropout_rate, warmup_ratio, max_train_steps, eval_steps,
	# logging_steps, save_steps, save_total_limit (utils.py has MAX_OLD_MODELS).
	}

	# Model versioning
	MODEL_STATUS = {
	"BASE": "base",
	"FINE_TUNED": "fine_tuned",
	"TRAINING": "training"
	}

	# File names
	TRAINING_DATA_FILE = "reranker_training_data.jsonl"
	MODEL_METADATA_FILE = "model_metadata.json"
	USER_FEEDBACK_FILE = "user_feedback.jsonl"