File size: 4,860 Bytes
c114fc1 042ce46 c114fc1 18bcedb c114fc1 042ce46 c114fc1 042ce46 c114fc1 042ce46 c114fc1 18bcedb c114fc1 18bcedb c114fc1 042ce46 c114fc1 042ce46 c114fc1 042ce46 c114fc1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
import os
import pandas as pd
from pandasai import Agent, SmartDataframe
from typing import Tuple
from PIL import Image
from pandasai.llm import HuggingFaceTextGen
from dotenv import load_dotenv
from langchain_groq.chat_models import ChatGroq
from langchain_google_genai import GoogleGenerativeAI
load_dotenv()
Groq_Token = os.environ["GROQ_API_KEY"]
models = {"mixtral": "mixtral-8x7b-32768", "llama": "llama2-70b-4096", "gemma": "gemma-7b-it", "gemini-pro": "gemini-pro"}
hf_token = os.getenv("HF_READ")
gemini_token = os.getenv("GEMINI_TOKEN")
def preprocess_and_load_df(path: str) -> pd.DataFrame:
df = pd.read_csv(path)
df["Timestamp"] = pd.to_datetime(df["Timestamp"])
return df
def load_agent(df: pd.DataFrame, context: str, inference_server: str, name="mixtral") -> Agent:
# llm = HuggingFaceTextGen(
# inference_server_url=inference_server,
# max_new_tokens=250,
# temperature=0.1,
# repetition_penalty=1.2,
# top_k=5,
# )
# llm.client.headers = {"Authorization": f"Bearer {hf_token}"}
if name == "gemini-pro":
llm = GoogleGenerativeAI(model=model, google_api_key=gemini_token, temperature=0.1)
else:
llm = ChatGroq(model=models[name], api_key=os.getenv("GROQ_API"), temperature=0.1)
agent = Agent(df, config={"llm": llm, "enable_cache": False, "options": {"wait_for_model": True}})
agent.add_message(context)
return agent
def load_smart_df(df: pd.DataFrame, inference_server: str, name="mixtral") -> SmartDataframe:
# llm = HuggingFaceTextGen(
# inference_server_url=inference_server,
# )
# llm.client.headers = {"Authorization": f"Bearer {hf_token}"}
llm = ChatGroq(model=models[name], api_key=os.getenv("GROQ_API"), temperature=0.1)
df = SmartDataframe(df, config={"llm": llm, "max_retries": 5, "enable_cache": False})
return df
def get_from_user(prompt):
return {"role": "user", "content": prompt}
def ask_agent(agent: Agent, prompt: str) -> Tuple[str, str, str]:
response = agent.chat(prompt)
gen_code = agent.last_code_generated
ex_code = agent.last_code_executed
last_prompt = agent.last_prompt
return {"role": "assistant", "content": response, "gen_code": gen_code, "ex_code": ex_code, "last_prompt": last_prompt}
def decorate_with_code(response: dict) -> str:
return f"""<details>
<summary>Generated Code</summary>
```python
{response["gen_code"]}
```
</details>
<details>
<summary>Prompt</summary>
{response["last_prompt"]}
"""
def show_response(st, response):
with st.chat_message(response["role"]):
try:
image = Image.open(response["content"])
if "gen_code" in response:
st.markdown(decorate_with_code(response), unsafe_allow_html=True)
st.image(image)
return {"is_image": True}
except Exception as e:
if "gen_code" in response:
display_content = decorate_with_code(response) + f"""</details>
{response["content"]}"""
else:
display_content = response["content"]
st.markdown(display_content, unsafe_allow_html=True)
return {"is_image": False}
def ask_question(model_name, question):
if model_name == "gemini-pro":
llm = GoogleGenerativeAI(model=model, google_api_key=os.environ.get("GOOGLE_API_KEY"), temperature=0)
else:
llm = ChatGroq(model=models[model_name], api_key=os.getenv("GROQ_API"), temperature=0.1)
df_check = pd.read_csv("Data.csv")
df_check["Timestamp"] = pd.to_datetime(df_check["Timestamp"])
df_check = df_check.head(5)
new_line = "\n"
template = f"""```python
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv("Data.csv")
df["Timestamp"] = pd.to_datetime(df["Timestamp"])
# df.dtypes
{new_line.join(map(lambda x: '# '+x, str(df_check.dtypes).split(new_line)))}
# {question.strip()}
# <your code here>
```
"""
query = f"""I have a pandas dataframe data of PM2.5 and PM10.
* Frequency of data is daily.
* `pollution` generally means `PM2.5`.
* Save result in a variable `answer` and make it global.
* If result is a plot, save it and save path in `answer`. Example: `answer='plot.png'`
* If result is not a plot, save it as a string in `answer`. Example: `answer='The city is Mumbai'`
Complete the following code.
{template}
"""
if model_name == "gemini-pro":
answer = llm.invoke(query)
else:
answer = llm.invoke(query).content
code = f"""
{template.split("```python")[1].split("```")[0]}
{answer.split("```python")[1].split("```")[0]}
"""
# update variable `answer` when code is executed
exec(code)
return {"role": "assistant", "content": answer.content, "gen_code": code, "ex_code": code, "last_prompt": question} |