import os
import re
import uuid
from langchain_google_genai import ChatGoogleGenerativeAI
import pandas as pd
from langchain_core.prompts import ChatPromptTemplate
from langchain_experimental.tools import PythonAstREPLTool
from langchain_experimental.agents import create_pandas_dataframe_agent
from dotenv import load_dotenv
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import datetime as dt

# Set the backend for matplotlib to 'Agg' to avoid GUI issues
matplotlib.use('Agg')

load_dotenv()
model_name = 'gemini-2.0-flash'  # Specify the model name
google_api_keys = os.getenv("GEMINI_API_KEYS").split(",")

# Create pre-initialized LLM instances
llm_instances = [
    ChatGoogleGenerativeAI(model=model_name, api_key=key)
    for key in google_api_keys
]
current_instance_index = 0  # Track current instance being used

def create_agent(llm, data, tools):
    """Create agent with tool names"""
    return create_pandas_dataframe_agent(
        llm,
        data,
        agent_type="tool-calling",
        verbose=True,
        allow_dangerous_code=True,
        extra_tools=tools,
        return_intermediate_steps=True
    )
    
def _prompt_generator(question: str, chart_required: bool):
    chat_prompt = f"""You are a senior data analyst working with CSV data. Adhere strictly to the following guidelines:  

                     1. **Data Verification:** Always inspect the data with `.sample(5).to_dict()` before performing any analysis.  
                     2. **Data Integrity:** Ensure proper handling of null values to maintain accuracy and reliability.  
                     3. **Communication:** Provide concise, professional, and well-structured responses.
                     4. Avoid including any internal processing details or references to the methods used to generate your response (ex: based on the tool call, using the function -> These types of phrases.)

                     **Query:** {question}  
                 """               
        
    chart_prompt = f"""You are a senior data analyst working with CSV data. Follow these rules STRICTLY:

                   1. Generate ONE unique identifier FIRST using: unique_id = uuid.uuid4().hex
                   2. Visualization requirements:
                   - Adjust font sizes, rotate labels (45° if needed), truncate for readability
                   - Figure size: (12, 6)
                   - Descriptive titles (fontsize=14)
                   - Colorblind-friendly palettes
                   - Do not use any visualization library other than matplotlib or seaborn
                   3. File handling rules:
                   - Create MAXIMUM 2 charts if absolutely necessary
                   - For multiple charts:
                       * Arrange in grid format (2x1 vertical layout preferred)
                       * Use SAME unique_id with suffixes: 
                       - f"{{unique_id}}_1.png"
                       - f"{{unique_id}}_2.png"
                   - Save EXCLUSIVELY to "generated_charts" folder
                   - File naming: f"chart_{{unique_id}}.png" (for single chart)
                   4. FINAL OUTPUT MUST BE:
                   - For single chart: f"generated_charts/chart_{{unique_id}}.png"
                   - For multiple charts: f"generated_charts/chart_{{unique_id}}.png" (combined grid image)
                   - **ONLY return this full path string, nothing else**

                   **Query:** {question}

                   IMPORTANT: 
                   - Generate the unique_id FIRST before any operations
                   - Use THE SAME unique_id throughout entire process
                   - NEVER generate new UUIDs after initial creation
                   - Return EXACT filepath string of the final saved chart
                   """

    if chart_required:
        return ChatPromptTemplate.from_template(chart_prompt)
    else:
        return ChatPromptTemplate.from_template(chat_prompt)

def langchain_gemini_csv_handler(csv_url: str, question: str, chart_required: bool):
    global current_instance_index
    data = pd.read_csv(csv_url)
    
    # Try all available instances
    while current_instance_index < len(llm_instances):
        try:
            llm = llm_instances[current_instance_index]
            print(f"Using LLM instance index {current_instance_index}")

            # Create tool with validated name
            tool = PythonAstREPLTool(
                locals={
                    "df": data,
                    "pd": pd,
                    "np": np,
                    "plt": plt,
                    "sns": sns,
                    "matplotlib": matplotlib,
                    "uuid": uuid,
                    "dt": dt
                },
            )

            agent = create_agent(llm, data, [tool])
            prompt = _prompt_generator(question, chart_required)
            result = agent.invoke({"input": prompt})
            output = result.get("output")
            
            if output is None:
                raise ValueError("Received None response from agent")
                
            return output
        
        except Exception as e:
            print(f"Error using LLM instance index {current_instance_index}: {e}")
            current_instance_index += 1

    print("All LLM instances have been exhausted.")
    return None


# import os
# import re
# import uuid
# from langchain_google_genai import ChatGoogleGenerativeAI
# import pandas as pd
# from langchain_core.prompts import ChatPromptTemplate
# from langchain_experimental.tools import PythonAstREPLTool
# from langchain_experimental.agents import create_pandas_dataframe_agent
# from dotenv import load_dotenv
# import numpy as np
# import matplotlib.pyplot as plt
# import matplotlib
# import seaborn as sns
# import datetime as dt

# # Set the backend for matplotlib to 'Agg' to avoid GUI issues
# matplotlib.use('Agg')

# load_dotenv()
# model_name = 'gemini-2.0-flash'  # Specify the model name
# google_api_keys = os.getenv("GEMINI_API_KEYS").split(",")

# # Create pre-initialized LLM instances
# llm_instances = [
#     ChatGoogleGenerativeAI(model=model_name, api_key=key)
#     for key in google_api_keys
# ]
# current_instance_index = 0  # Track current instance being used

# def is_retryable_error(error: Exception) -> bool:
#     """Check if the error should trigger a retry with next instance"""
#     error_str = str(error).lower()
    
#     retry_conditions = [
#         # Rate limiting and quota errors
#         '429' in error_str,
#         'quota' in error_str,
#         'rate limit' in error_str,
#         'resource exhausted' in error_str,
#         'exceeded' in error_str,
#         'limit reached' in error_str,
        
#         # Authentication and permission errors
#         'permission denied' in error_str,
#         'invalid api key' in error_str,
#         'authentication' in error_str,
        
#         # Server errors
#         '500' in error_str,
#         '503' in error_str,
#         'service unavailable' in error_str,
        
#         # Connection issues
#         'timeout' in error_str,
#         'connection' in error_str,
        
#         # Content policy
#         'content policy' in error_str,
#         'safety' in error_str,
#         'blocked' in error_str
#     ]
    
#     return any(retry_conditions)

# def create_agent(llm, data, tools):
#     """Create agent with tool names"""
#     return create_pandas_dataframe_agent(
#         llm,
#         data,
#         agent_type="tool-calling",
#         verbose=True,
#         allow_dangerous_code=True,
#         extra_tools=tools,
#         return_intermediate_steps=True
#     )
    
# def _prompt_generator(question: str, chart_required: bool):
#     chat_prompt = f"""You are a senior data analyst working with CSV data. Adhere strictly to the following guidelines:  

#                      1. **Data Verification:** Always inspect the data with `.sample(5).to_dict()` before performing any analysis.  
#                      2. **Data Integrity:** Ensure proper handling of null values to maintain accuracy and reliability.  
#                      3. **Communication:** Provide concise, professional, and well-structured responses.
#                      4. Avoid including any internal processing details or references to the methods used to generate your response (ex: based on the tool call, using the function -> These types of phrases.)

#                      **Query:** {question}  
#                  """               
        
#     chart_prompt = f"""You are a senior data analyst working with CSV data. Follow these rules STRICTLY:

#                    1. Generate ONE unique identifier FIRST using: unique_id = uuid.uuid4().hex
#                    2. Visualization requirements:
#                    - Adjust font sizes, rotate labels (45° if needed), truncate for readability
#                    - Figure size: (12, 6)
#                    - Descriptive titles (fontsize=14)
#                    - Colorblind-friendly palettes
#                    3. File handling rules:
#                    - Create MAXIMUM 2 charts if absolutely necessary
#                    - For multiple charts:
#                        * Arrange in grid format (2x1 vertical layout preferred)
#                        * Use SAME unique_id with suffixes: 
#                        - f"{{unique_id}}_1.png"
#                        - f"{{unique_id}}_2.png"
#                    - Save EXCLUSIVELY to "generated_charts" folder
#                    - File naming: f"chart_{{unique_id}}.png" (for single chart)
#                    4. FINAL OUTPUT MUST BE:
#                    - For single chart: f"generated_charts/chart_{{unique_id}}.png"
#                    - For multiple charts: f"generated_charts/chart_{{unique_id}}.png" (combined grid image)
#                    - **ONLY return this full path string, nothing else**

#                    **Query:** {question}

#                    IMPORTANT: 
#                    - Generate the unique_id FIRST before any operations
#                    - Use THE SAME unique_id throughout entire process
#                    - NEVER generate new UUIDs after initial creation
#                    - Return EXACT filepath string of the final saved chart
#                    """

#     if chart_required:
#         return ChatPromptTemplate.from_template(chart_prompt)
#     else:
#         return ChatPromptTemplate.from_template(chat_prompt)

# def langchain_gemini_csv_handler(csv_url: str, question: str, chart_required: bool):
#     global current_instance_index
#     data = pd.read_csv(csv_url)
    
#     # Track first error in case all instances fail
#     first_error = None
    
#     while current_instance_index < len(llm_instances):
#         try:
#             llm = llm_instances[current_instance_index]
#             print(f"Attempting with LLM instance {current_instance_index + 1}/{len(llm_instances)}")

#             # Create tool with validated name
#             tool = PythonAstREPLTool(
#                 locals={
#                     "df": data,
#                     "pd": pd,
#                     "np": np,
#                     "plt": plt,
#                     "sns": sns,
#                     "matplotlib": matplotlib,
#                     "uuid": uuid,
#                     "dt": dt
#                 },
#             )

#             agent = create_agent(llm, data, [tool])
#             prompt = _prompt_generator(question, chart_required)
#             result = agent.invoke({"input": prompt})
#             output = result.get("output")
            
#             if output is None:
#                 raise ValueError("Received None response from agent")
                
#             if isinstance(output, str) and any(err in output.lower() for err in ['quota', 'limit', 'exhausted']):
#                 raise ValueError(f"API limitation detected in response: {output}")
                
#             return output
        
#         except Exception as e:
#             error_msg = f"Error with instance {current_instance_index}: {str(e)}"
#             print(error_msg)
            
#             # Store first error if not set
#             if first_error is None:
#                 first_error = error_msg
                
#             # Check if we should try next instance
#             if is_retryable_error(e):
#                 current_instance_index += 1
#                 continue
#             else:
#                 # Non-retryable error - return immediately
#                 return {
#                     "error": "Non-retryable error occurred",
#                     "details": str(e),
#                     "instance": current_instance_index
#                 }

#     # All instances exhausted
#     error_response = {
#         "error": "All API instances failed",
#         "details": first_error or "Unknown error",
#         "attempted_instances": current_instance_index
#     }
#     print(error_response)
#     return error_response