import numpy as np
import sklearn.metrics
from sklearn.cluster import KMeans
from factor_analyzer.factor_analyzer import calculate_bartlett_sphericity
from factor_analyzer.factor_analyzer import calculate_kmo

from coding.llh.visualization.draw_heat_map import draw_heat_map
from coding.llh.visualization.draw_scatter import draw_scatter_2D, draw_scatter_2D_1, draw_scatter_3D_1, draw_scatter_3D


# K-means
def k_means(array: np.ndarray):
    info = {}

    draw_scatter_2D_1(array, "2D scatter data before k-means")
    draw_scatter_3D_1(array, "3D scatter data before k-means")

    K = 60

    info["Number of clustering centers"] = K

    k_means_model = KMeans(n_clusters=K, init='k-means++')

    k_means_model.fit(array)

    sum_of_squared_errors = k_means_model.inertia_

    info["SSE"] = sum_of_squared_errors

    draw_scatter_2D(array, k_means_model.labels_, k_means_model.cluster_centers_, "2D scatter data after k-means")
    draw_scatter_3D(array, k_means_model.labels_, k_means_model.cluster_centers_, "3D scatter data after k-means")

    result = k_means_model.fit_predict(array[:200])

    silhouette_score = sklearn.metrics.silhouette_score(array[:200], result)

    info["Silhouette score"] = silhouette_score

    return info


# Bartlett sphericity test
def bartlett_test(df):
    _, p_value = calculate_bartlett_sphericity(df)

    return p_value


# KMO test
def kmo_test(df):
    _, kmo_score = calculate_kmo(df)

    return kmo_score


# Principal component analysis
def pca(df):
    # Only consider the correlation of the independent variables
    info = {}

    # array_x = df.iloc[:, 1:]
    array_x = df.iloc[:, :]
    array_y = df.iloc[:, :1]

    # Bartlett sphericity test
    p_value = bartlett_test(array_x)
    info["p value of bartlett sphericity test"] = p_value
    if p_value < 0.05:
        info["Result of bartlett sphericity test"] = "Accept"
    else:
        info["Result of bartlett sphericity test"] = "Reject"

    # KMO test
    kmo_score = kmo_test(array_x)
    info["Score of KMO test"] = kmo_score
    if kmo_score > 0.5:
        info["Result of KMO test"] = "Accept"
    else:
        info["Result of KMO test"] = "Reject"

    # get the matrix of correlation coefficients
    covX = np.around(np.corrcoef(array_x.T), decimals=3)

    # 计算协方差矩阵的对角线元素的标准差
    std_dev = np.sqrt(np.diag(covX))

    # 计算皮尔逊相关系数矩阵
    pearson_matrix = covX / np.outer(std_dev, std_dev)

    # draw_heat_map(pearson_matrix, "pearson matrix", True, df.columns.values)

    # Solve the eigenvalues and eigenvectors of the coefficient correlation matrix
    eigenvalues, eigenvectors = np.linalg.eig(covX.T)

    eigenvalues = np.around(eigenvalues, decimals=3)

    eigenvalues_dict = dict(zip(eigenvalues.tolist(), list(range(0, len(eigenvalues)))))

    # Sort feature values in descending order
    eigenvalues = sorted(eigenvalues, reverse=True)

    for i, value in enumerate(eigenvalues):
        if i == 0:
            sorted_eigenvectors = eigenvectors[:, eigenvalues_dict[value]].reshape(-1, 1)
        else:
            sorted_eigenvectors = np.concatenate((sorted_eigenvectors, eigenvectors[:, eigenvalues_dict[value]].reshape(-1, 1)), axis=1)

    # draw_line_graph(range(1, len(eigenvalues) + 1), eigenvalues, "Eigenvalue")

    # get the contribution of the eigenvalues
    contribution = eigenvalues / np.sum(eigenvalues)

    # get the cumulative contribution of the eigenvalues
    cumulative_contribution = np.cumsum(contribution)

    # Selection of principal components
    main_factors_index = [i for i in range(len(cumulative_contribution)) if cumulative_contribution[i] < 0.80]

    main_factor_num = len(main_factors_index)

    info["Main factor num"] = main_factor_num

    # Get the projection matrix
    projected_array = array_x.dot(sorted_eigenvectors[:, :main_factor_num])
    projected_array = np.concatenate((array_y.values, projected_array), axis=1)

    return projected_array, info