''' import pandas as pd from sklearn.utils import resample # Load the cleaned dataset df = pd.read_csv("cleaned_qbias_dataset.csv") # Separate majority and minority classes df_left = df[df.label == 0] df_center = df[df.label == 1] df_right = df[df.label == 2] # Determine target size (matching the majority class: 'left') target_size = len(df_left) # Upsample center class to match 'left' df_center_upsampled = resample( df_center, replace=True, n_samples=target_size, random_state=42 ) # Combine all classes into one balanced DataFrame df_balanced = pd.concat([df_left, df_center_upsampled, df_right]) # Shuffle the final dataset df_balanced = df_balanced.sample(frac=1, random_state=42).reset_index(drop=True) # Save to new CSV df_balanced.to_csv("cleaned_qbias_balanced.csv", index=False) print("Balanced dataset saved as cleaned_qbias_balanced.csv") print(df_balanced['label'].value_counts()) ''' import pandas as pd df = pd.read_csv("cleaned_qbias_dataset.csv") print(df['label'].value_counts())