Spaces:

SustainabilityLabIITGN
/

VayuBuddy

Running

File size: 4,860 Bytes

import os
import pandas as pd
from pandasai import Agent, SmartDataframe
from typing import Tuple
from PIL import Image
from pandasai.llm import HuggingFaceTextGen
from dotenv import load_dotenv
from langchain_groq.chat_models import ChatGroq
from langchain_google_genai import GoogleGenerativeAI

load_dotenv()
Groq_Token = os.environ["GROQ_API_KEY"]
models = {"mixtral": "mixtral-8x7b-32768", "llama": "llama2-70b-4096", "gemma": "gemma-7b-it", "gemini-pro": "gemini-pro"}

hf_token = os.getenv("HF_READ")
gemini_token = os.getenv("GEMINI_TOKEN")

def preprocess_and_load_df(path: str) -> pd.DataFrame:
    df = pd.read_csv(path)
    df["Timestamp"] = pd.to_datetime(df["Timestamp"])
    return df

def load_agent(df: pd.DataFrame, context: str, inference_server: str, name="mixtral") -> Agent:
    # llm = HuggingFaceTextGen(
    # inference_server_url=inference_server,
    # max_new_tokens=250,
    # temperature=0.1,
    # repetition_penalty=1.2,
    # top_k=5,
    # )
    # llm.client.headers = {"Authorization": f"Bearer {hf_token}"}
    if name == "gemini-pro":
        llm = GoogleGenerativeAI(model=model, google_api_key=gemini_token, temperature=0.1)
    else:
        llm = ChatGroq(model=models[name], api_key=os.getenv("GROQ_API"), temperature=0.1)
    
    agent = Agent(df, config={"llm": llm, "enable_cache": False, "options": {"wait_for_model": True}})
    agent.add_message(context)
    return agent

def load_smart_df(df: pd.DataFrame, inference_server: str, name="mixtral") -> SmartDataframe:
    # llm = HuggingFaceTextGen(
    # inference_server_url=inference_server,
    # )
    # llm.client.headers = {"Authorization": f"Bearer {hf_token}"}
    llm = ChatGroq(model=models[name], api_key=os.getenv("GROQ_API"), temperature=0.1)
    df = SmartDataframe(df, config={"llm": llm, "max_retries": 5, "enable_cache": False})
    return df

def get_from_user(prompt):
    return {"role": "user", "content": prompt}

def ask_agent(agent: Agent, prompt: str) -> Tuple[str, str, str]:
    response = agent.chat(prompt)
    gen_code = agent.last_code_generated
    ex_code = agent.last_code_executed
    last_prompt = agent.last_prompt
    return {"role": "assistant", "content": response, "gen_code": gen_code, "ex_code": ex_code, "last_prompt": last_prompt}

def decorate_with_code(response: dict) -> str:
    return f"""<details>
<summary>Generated Code</summary>
    
```python
{response["gen_code"]}
```
</details>

<details>
<summary>Prompt</summary>

{response["last_prompt"]}
"""

def show_response(st, response):
    with st.chat_message(response["role"]):
        try:
            image = Image.open(response["content"])
            if "gen_code" in response:
                st.markdown(decorate_with_code(response), unsafe_allow_html=True)
            st.image(image)
            return {"is_image": True}
        except Exception as e:
            if "gen_code" in response:
                display_content = decorate_with_code(response) + f"""</details>

{response["content"]}"""
            else:
                display_content = response["content"]
            st.markdown(display_content, unsafe_allow_html=True)
            return {"is_image": False}

def ask_question(model_name, question):
    if model_name == "gemini-pro":
        llm = GoogleGenerativeAI(model=model, google_api_key=os.environ.get("GOOGLE_API_KEY"), temperature=0)
    else:
        llm = ChatGroq(model=models[model_name], api_key=os.getenv("GROQ_API"), temperature=0.1)

    df_check = pd.read_csv("Data.csv")
    df_check["Timestamp"] = pd.to_datetime(df_check["Timestamp"])
    df_check = df_check.head(5)

    new_line = "\n"

    template = f"""```python
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv("Data.csv")
df["Timestamp"] = pd.to_datetime(df["Timestamp"])

    # df.dtypes
    {new_line.join(map(lambda x: '# '+x, str(df_check.dtypes).split(new_line)))}

    # {question.strip()}
    # <your code here>
    ```
    """

    query = f"""I have a pandas dataframe data of PM2.5 and PM10.
    * Frequency of data is daily. 
    * `pollution` generally means `PM2.5`.
    * Save result in a variable `answer` and make it global.
    * If result is a plot, save it and save path in `answer`. Example: `answer='plot.png'`
    * If result is not a plot, save it as a string in `answer`. Example: `answer='The city is Mumbai'`

    Complete the following code.

    {template}

    """
    if model_name == "gemini-pro":
        answer = llm.invoke(query)
    else:
        answer = llm.invoke(query).content
    code = f"""
    {template.split("```python")[1].split("```")[0]}
    {answer.split("```python")[1].split("```")[0]}
    """
    # update variable `answer` when code is executed
    exec(code)

    return {"role": "assistant", "content": answer.content, "gen_code": code, "ex_code": code, "last_prompt": question}