Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -679,45 +679,31 @@
|
|
679 |
|
680 |
|
681 |
#_______________________________________________session id -- with redis - chat session memory _________________________________________________________
|
682 |
-
#
|
683 |
-
|
684 |
-
|
685 |
-
|
686 |
-
|
687 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
688 |
from dotenv import load_dotenv
|
689 |
from datetime import datetime
|
690 |
import json
|
691 |
import uuid
|
692 |
import redis
|
693 |
|
694 |
-
# Pandas AI imports
|
695 |
-
import re
|
696 |
-
import urllib.parse
|
697 |
-
import pandas as pd
|
698 |
-
import dask.dataframe as dd
|
699 |
-
from math import ceil
|
700 |
-
import psycopg2
|
701 |
-
from pandasai import SmartDataframe
|
702 |
-
from pandasai.llm.openai import OpenAI as PandasOpenAI
|
703 |
-
|
704 |
-
|
705 |
-
|
706 |
-
|
707 |
-
from fastapi import FastAPI, Request
|
708 |
-
from fastapi.responses import JSONResponse
|
709 |
-
import json
|
710 |
-
|
711 |
-
|
712 |
-
# Import your existing S3 connection details
|
713 |
-
from retrive_secrects import * # CONNECTIONS_HOST, etc.
|
714 |
-
|
715 |
# Suppress warnings
|
716 |
warnings.filterwarnings("ignore", message="Qdrant client version.*is incompatible.*")
|
717 |
|
718 |
load_dotenv()
|
719 |
|
720 |
-
app = FastAPI(title="AI Agent with Redis Session Management
|
721 |
|
722 |
# Environment variables
|
723 |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
@@ -731,26 +717,6 @@ REDIS_HOST = os.getenv("REDIS_HOST", "127.0.0.1")
|
|
731 |
REDIS_PORT = int(os.getenv("REDIS_PORT", 6379))
|
732 |
REDIS_PASSWORD = os.getenv("REDIS_PASSWORD")
|
733 |
|
734 |
-
# S3 Constants (from your original code)
|
735 |
-
S3_Bucket_Name = 'ingenspark-user-files'
|
736 |
-
S3_Raw_Files_Folder = 'User-Uploaded-Raw-Files'
|
737 |
-
S3_Modified_Files_Folder = 'Modified-Files/'
|
738 |
-
S3_Output_Files_Folder = 'Output-Files/'
|
739 |
-
S3_Published_Results_Folder = 'Published-Results/'
|
740 |
-
S3_Ingen_Customer_Output = 'Ingen-Customer/'
|
741 |
-
Dominant_Segmentation_Output = 'Dominant-Segmentation/'
|
742 |
-
Trend_Segmentation_Output = 'Trend-Segmentation/'
|
743 |
-
Decile_Quartile_segmentation_Output = 'Decile-Quartile-Segmentation/'
|
744 |
-
Combined_Segmentation_Output = 'Combine-Segmentation/'
|
745 |
-
Custom_Segmentation_Output = 'Custom-Segmentation/'
|
746 |
-
Customer_360_Output = 'Customer-360/'
|
747 |
-
Merge_file_folder = S3_Modified_Files_Folder + 'IngenData-Merged-Tables/'
|
748 |
-
S3_Dev_Doc_Images_Folder = 'Developers-Documentation-Images/'
|
749 |
-
S3_Temporary_Files_Folder = S3_Raw_Files_Folder
|
750 |
-
S3_App_Specific_Data = 'Application-Specific-Data/'
|
751 |
-
S3_Transformation_Tables_Folder = 'Modified-Files/Modified-Tables/Transformation-Tables/'
|
752 |
-
cloud_front_url = "https://files.dev.ingenspark.com/"
|
753 |
-
|
754 |
# Initialize Redis client
|
755 |
def get_redis_client():
|
756 |
"""Initialize Redis client with fallback to local Redis"""
|
@@ -797,124 +763,6 @@ embedding_model = OpenAIEmbeddings(
|
|
797 |
qdrant_client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT)
|
798 |
llm = ChatOpenAI(model="gpt-4o", temperature=0, openai_api_key=OPENAI_API_KEY)
|
799 |
|
800 |
-
# === PANDAS AI FUNCTIONS ===
|
801 |
-
|
802 |
-
def read_parquet_file_from_s3(ufuid=None, columns_list=None, records_count=None, file_location=''):
|
803 |
-
"""
|
804 |
-
Reads a Parquet file from S3 using Dask and returns it as a Pandas DataFrame.
|
805 |
-
|
806 |
-
Parameters:
|
807 |
-
ufuid (int): Optional user_file_upload_id to fetch S3 path from DB.
|
808 |
-
columns_list (list/str): Columns to read.
|
809 |
-
records_count (int): Not used currently.
|
810 |
-
file_location (str): Direct file path in S3.
|
811 |
-
|
812 |
-
Returns:
|
813 |
-
pandas.DataFrame
|
814 |
-
"""
|
815 |
-
try:
|
816 |
-
# Connect to PostgreSQL
|
817 |
-
conn = psycopg2.connect(
|
818 |
-
host=CONNECTIONS_HOST,
|
819 |
-
database=CONNECTIONS_DB,
|
820 |
-
user=CONNECTIONS_USER,
|
821 |
-
password=CONNECTIONS_PASS
|
822 |
-
)
|
823 |
-
cursor = conn.cursor()
|
824 |
-
|
825 |
-
if ufuid is not None:
|
826 |
-
query = """SELECT file_name, table_names FROM public.user_file_upload WHERE user_file_upload_id = %s"""
|
827 |
-
cursor.execute(query, (ufuid,))
|
828 |
-
file = cursor.fetchone()
|
829 |
-
if not file:
|
830 |
-
raise ValueError(f"No file found for ufuid: {ufuid}")
|
831 |
-
file_name, s3_file_path = file
|
832 |
-
else:
|
833 |
-
# Normalize input path
|
834 |
-
file_location = re.sub(r'\.parquet(?!$)', '', file_location)
|
835 |
-
s3_file_path = file_location if file_location.endswith('.parquet') else file_location + '.parquet'
|
836 |
-
|
837 |
-
# Extract relative S3 path
|
838 |
-
s3_file_path = urllib.parse.unquote(s3_file_path.split(f"{S3_Bucket_Name}/")[-1])
|
839 |
-
if not s3_file_path.endswith('.parquet'):
|
840 |
-
s3_file_path += '.parquet'
|
841 |
-
|
842 |
-
# Parse columns if given as comma-separated string
|
843 |
-
if columns_list and not isinstance(columns_list, list):
|
844 |
-
columns_list = [col.strip(' "\'') for col in columns_list.split(',')]
|
845 |
-
|
846 |
-
print(f"\n{'!' * 100}\nReading from: s3://{S3_Bucket_Name}/{s3_file_path}\n")
|
847 |
-
|
848 |
-
# Read using Dask
|
849 |
-
ddf = dd.read_parquet(
|
850 |
-
f"s3://{S3_Bucket_Name}/{s3_file_path}",
|
851 |
-
engine="pyarrow",
|
852 |
-
columns=columns_list,
|
853 |
-
assume_missing=True
|
854 |
-
)
|
855 |
-
|
856 |
-
ddf = ddf.repartition(npartitions=8) # Optimize for processing
|
857 |
-
print("Reading Parquet file from S3 completed successfully.")
|
858 |
-
|
859 |
-
# Close database connection
|
860 |
-
cursor.close()
|
861 |
-
conn.close()
|
862 |
-
|
863 |
-
return ddf.compute()
|
864 |
-
|
865 |
-
except Exception as e:
|
866 |
-
print(f"❌ Error reading Parquet file: {e}")
|
867 |
-
return pd.DataFrame() # Return empty DataFrame on error
|
868 |
-
|
869 |
-
def pandas_agent(filepath: str, query: str) -> str:
|
870 |
-
"""
|
871 |
-
PandasAI agent that reads data from S3 and answers queries about the data.
|
872 |
-
|
873 |
-
Parameters:
|
874 |
-
filepath (str): S3 file path or ufuid
|
875 |
-
query (str): Natural language query about the data
|
876 |
-
|
877 |
-
Returns:
|
878 |
-
str: Answer from PandasAI
|
879 |
-
"""
|
880 |
-
try:
|
881 |
-
# Check if filepath is a number (ufuid) or a file path
|
882 |
-
if filepath.isdigit():
|
883 |
-
# It's a ufuid
|
884 |
-
data = read_parquet_file_from_s3(ufuid=int(filepath))
|
885 |
-
else:
|
886 |
-
# It's a file path
|
887 |
-
data = read_parquet_file_from_s3(file_location=filepath)
|
888 |
-
|
889 |
-
if data.empty:
|
890 |
-
return "❌ No data found or failed to load the file. Please check the file path or ufuid."
|
891 |
-
|
892 |
-
# Initialize PandasAI LLM
|
893 |
-
if not OPENAI_API_KEY:
|
894 |
-
return "❌ OPENAI_API_KEY is not set in environment variables."
|
895 |
-
|
896 |
-
pandas_llm = PandasOpenAI(api_token=OPENAI_API_KEY)
|
897 |
-
|
898 |
-
# Create SmartDataframe
|
899 |
-
sdf = SmartDataframe(data, config={"llm": pandas_llm})
|
900 |
-
|
901 |
-
# Ask the question
|
902 |
-
print(f"🔍 Processing query: {query}")
|
903 |
-
result = sdf.chat(query)
|
904 |
-
|
905 |
-
# Handle different types of results
|
906 |
-
if isinstance(result, str):
|
907 |
-
return f"📊 Analysis Result:\n{result}"
|
908 |
-
elif isinstance(result, (pd.DataFrame, pd.Series)):
|
909 |
-
return f"📊 Analysis Result:\n{result.to_string()}"
|
910 |
-
else:
|
911 |
-
return f"📊 Analysis Result:\n{str(result)}"
|
912 |
-
|
913 |
-
except Exception as e:
|
914 |
-
error_msg = f"❌ Error in pandas_agent: {str(e)}"
|
915 |
-
print(error_msg)
|
916 |
-
return error_msg
|
917 |
-
|
918 |
# === INPUT SCHEMAS ===
|
919 |
|
920 |
class Query(BaseModel):
|
@@ -932,10 +780,6 @@ class BotQuery(BaseModel):
|
|
932 |
session_id: Optional[str] = None
|
933 |
message: str
|
934 |
|
935 |
-
class PandasAgentQuery(BaseModel):
|
936 |
-
filepath: str = Field(..., description="S3 file path or ufuid")
|
937 |
-
query: str = Field(..., description="Natural language query about the data")
|
938 |
-
|
939 |
class SessionResponse(BaseModel):
|
940 |
session_id: str
|
941 |
userLoginId: int
|
@@ -958,36 +802,6 @@ class ChatHistoryResponse(BaseModel):
|
|
958 |
|
959 |
# === SESSION MANAGEMENT FUNCTIONS ===
|
960 |
|
961 |
-
|
962 |
-
|
963 |
-
|
964 |
-
@app.middleware("http")
|
965 |
-
async def add_success_flag(request: Request, call_next):
|
966 |
-
response = await call_next(request)
|
967 |
-
|
968 |
-
# Only modify JSON responses
|
969 |
-
if "application/json" in response.headers.get("content-type", ""):
|
970 |
-
try:
|
971 |
-
body = b"".join([chunk async for chunk in response.body_iterator])
|
972 |
-
data = json.loads(body.decode())
|
973 |
-
|
974 |
-
# Add success flag
|
975 |
-
data["success"] = 200 <= response.status_code < 300
|
976 |
-
|
977 |
-
# Build new JSONResponse (auto handles Content-Length)
|
978 |
-
response = JSONResponse(
|
979 |
-
content=data,
|
980 |
-
status_code=response.status_code,
|
981 |
-
headers={k: v for k, v in response.headers.items() if k.lower() != "content-length"},
|
982 |
-
)
|
983 |
-
except Exception:
|
984 |
-
# fallback if response is not JSON parseable
|
985 |
-
pass
|
986 |
-
return response
|
987 |
-
|
988 |
-
|
989 |
-
|
990 |
-
|
991 |
def create_session(userLoginId: int, orgId: int, auth_token: str) -> dict:
|
992 |
"""Create a new chat session"""
|
993 |
session_id = str(uuid.uuid4())
|
@@ -1320,31 +1134,6 @@ def get_user_projects(userLoginId: str) -> str:
|
|
1320 |
except Exception as e:
|
1321 |
return f"❌ Error fetching projects: {str(e)}"
|
1322 |
|
1323 |
-
def pandas_data_analysis(query_with_filepath: str) -> str:
|
1324 |
-
"""
|
1325 |
-
Tool for data analysis using PandasAI.
|
1326 |
-
Input format: 'filepath|query' where filepath is S3 path or ufuid, and query is the analysis question.
|
1327 |
-
"""
|
1328 |
-
try:
|
1329 |
-
# Parse the input to extract filepath and query
|
1330 |
-
parts = query_with_filepath.split('|', 1)
|
1331 |
-
if len(parts) != 2:
|
1332 |
-
return "❌ Invalid input format. Please use: 'filepath|query' format."
|
1333 |
-
|
1334 |
-
filepath, query = parts
|
1335 |
-
filepath = filepath.strip()
|
1336 |
-
query = query.strip()
|
1337 |
-
|
1338 |
-
if not filepath or not query:
|
1339 |
-
return "❌ Both filepath and query are required."
|
1340 |
-
|
1341 |
-
# Use the pandas_agent function
|
1342 |
-
result = pandas_agent(filepath, query)
|
1343 |
-
return result
|
1344 |
-
|
1345 |
-
except Exception as e:
|
1346 |
-
return f"❌ Error in pandas data analysis: {str(e)}"
|
1347 |
-
|
1348 |
# === CREATE TOOLS ===
|
1349 |
|
1350 |
document_search_tool = Tool(
|
@@ -1364,22 +1153,6 @@ project_list_tool = Tool(
|
|
1364 |
func=get_user_projects
|
1365 |
)
|
1366 |
|
1367 |
-
pandas_analysis_tool = Tool(
|
1368 |
-
name="pandas_data_analysis",
|
1369 |
-
description="""Use this tool for data analysis on CSV/Parquet files using PandasAI.
|
1370 |
-
Perfect for when users ask questions about data analysis, statistics, insights, or want to query their datasets.
|
1371 |
-
Input format: 'filepath|query' where:
|
1372 |
-
- filepath: S3 file path (e.g., 'User-Uploaded-Raw-Files/Data2004csv1754926601269756') or ufuid (e.g., '123')
|
1373 |
-
- query: Natural language question about the data (e.g., 'What are the top 5 values?', 'Show me summary statistics')
|
1374 |
-
|
1375 |
-
Examples:
|
1376 |
-
- 'User-Uploaded-Raw-Files/mydata.csv|What is this file about?'
|
1377 |
-
- '123|Show me the first 5 rows'
|
1378 |
-
- 'Modified-Files/processed_data|What are the most common values in column X?'
|
1379 |
-
""",
|
1380 |
-
func=pandas_data_analysis
|
1381 |
-
)
|
1382 |
-
|
1383 |
# === AGENT SETUP ===
|
1384 |
|
1385 |
def create_agent_with_session_memory(session_id: str):
|
@@ -1392,13 +1165,11 @@ def create_agent_with_session_memory(session_id: str):
|
|
1392 |
("system", """You are a helpful AI assistant with access to multiple tools and conversation memory:
|
1393 |
|
1394 |
1. **Document Search**: Search through uploaded documents and knowledge base
|
1395 |
-
2. **Project Management**: Get list of user projects and project information
|
1396 |
-
3. **Data Analysis**: Analyze CSV/Parquet files using PandasAI for insights, statistics, and queries
|
1397 |
|
1398 |
Your capabilities:
|
1399 |
- Answer messages about documents using the document search tool
|
1400 |
- Help users find their projects and project information
|
1401 |
-
- Perform data analysis on uploaded datasets using natural language queries
|
1402 |
- Remember previous conversations in this session
|
1403 |
- Provide general assistance and information
|
1404 |
- Use appropriate tools based on user queries
|
@@ -1406,8 +1177,6 @@ Your capabilities:
|
|
1406 |
Guidelines:
|
1407 |
- Use the document search tool when users ask about specific content, documentation, or information that might be in uploaded files
|
1408 |
- Use the project tool when users ask about projects, want to see their projects, or need project-related information
|
1409 |
-
- Use the pandas analysis tool when users ask about data analysis, statistics, insights, or want to query datasets
|
1410 |
-
- For pandas analysis, you need both a filepath (S3 path or ufuid) and a query - ask for missing information if needed
|
1411 |
- Reference previous conversation context when relevant
|
1412 |
- Be clear about which tool you're using and what information you're providing
|
1413 |
- If you're unsure which tool to use, you can ask for clarification
|
@@ -1433,7 +1202,7 @@ Remember: Always use the most appropriate tool based on the user's message and c
|
|
1433 |
memory.chat_memory.add_ai_message(msg["message"])
|
1434 |
|
1435 |
# Create tools list
|
1436 |
-
tools = [document_search_tool, project_list_tool
|
1437 |
|
1438 |
# Create the agent
|
1439 |
agent = create_openai_tools_agent(llm, tools, agent_prompt)
|
@@ -1632,31 +1401,6 @@ def list_projects(request: ProjectRequest):
|
|
1632 |
"tool_used": "project_list"
|
1633 |
}
|
1634 |
|
1635 |
-
@app.post("/chat-with-pandas-agent")
|
1636 |
-
def chat_with_pandas_agent(request: PandasAgentQuery):
|
1637 |
-
"""Direct pandas AI agent endpoint for data analysis"""
|
1638 |
-
try:
|
1639 |
-
result = pandas_agent(request.filepath, request.query)
|
1640 |
-
|
1641 |
-
return {
|
1642 |
-
"filepath": request.filepath,
|
1643 |
-
"query": request.query,
|
1644 |
-
"answer": result,
|
1645 |
-
"tool_used": "pandas_agent",
|
1646 |
-
"timestamp": datetime.now().isoformat()
|
1647 |
-
}
|
1648 |
-
|
1649 |
-
except Exception as e:
|
1650 |
-
error_msg = f"An error occurred: {str(e)}"
|
1651 |
-
return {
|
1652 |
-
"filepath": request.filepath,
|
1653 |
-
"query": request.query,
|
1654 |
-
"answer": error_msg,
|
1655 |
-
"tool_used": "pandas_agent",
|
1656 |
-
"error": True,
|
1657 |
-
"timestamp": datetime.now().isoformat()
|
1658 |
-
}
|
1659 |
-
|
1660 |
@app.put("/sessions/{session_id}/title")
|
1661 |
def refresh_session_title(session_id: str):
|
1662 |
"""Manually refresh/regenerate session title"""
|
@@ -1712,11 +1456,10 @@ def health():
|
|
1712 |
|
1713 |
return {
|
1714 |
"status": "ok",
|
1715 |
-
"tools": ["document_search", "project_list"
|
1716 |
"agent": "active",
|
1717 |
"session_management": "enabled",
|
1718 |
"redis_status": redis_status,
|
1719 |
-
"pandas_ai": "enabled",
|
1720 |
"total_sessions": len(list(redis_client.scan_iter(match="session:*")))
|
1721 |
}
|
1722 |
|
@@ -1728,4 +1471,1054 @@ if __name__ == "__main__":
|
|
1728 |
print("\n🛑 Server stopped gracefully")
|
1729 |
except Exception as e:
|
1730 |
print(f"❌ Server error: {e}")
|
1731 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
679 |
|
680 |
|
681 |
#_______________________________________________session id -- with redis - chat session memory _________________________________________________________
|
682 |
+
#
|
683 |
+
from fastapi import FastAPI, HTTPException, Query as QueryParam
|
684 |
+
from pydantic import BaseModel, Field
|
685 |
+
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
686 |
+
from qdrant_client import QdrantClient
|
687 |
+
from langchain.agents import Tool, AgentExecutor, create_openai_tools_agent
|
688 |
+
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
|
689 |
+
from langchain.memory import ConversationBufferMemory
|
690 |
+
from typing import Optional, List, Dict, Any
|
691 |
+
import os
|
692 |
+
import warnings
|
693 |
+
import base64
|
694 |
+
import requests
|
695 |
from dotenv import load_dotenv
|
696 |
from datetime import datetime
|
697 |
import json
|
698 |
import uuid
|
699 |
import redis
|
700 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
701 |
# Suppress warnings
|
702 |
warnings.filterwarnings("ignore", message="Qdrant client version.*is incompatible.*")
|
703 |
|
704 |
load_dotenv()
|
705 |
|
706 |
+
app = FastAPI(title="AI Agent with Redis Session Management")
|
707 |
|
708 |
# Environment variables
|
709 |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
|
|
717 |
REDIS_PORT = int(os.getenv("REDIS_PORT", 6379))
|
718 |
REDIS_PASSWORD = os.getenv("REDIS_PASSWORD")
|
719 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
720 |
# Initialize Redis client
|
721 |
def get_redis_client():
|
722 |
"""Initialize Redis client with fallback to local Redis"""
|
|
|
763 |
qdrant_client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT)
|
764 |
llm = ChatOpenAI(model="gpt-4o", temperature=0, openai_api_key=OPENAI_API_KEY)
|
765 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
766 |
# === INPUT SCHEMAS ===
|
767 |
|
768 |
class Query(BaseModel):
|
|
|
780 |
session_id: Optional[str] = None
|
781 |
message: str
|
782 |
|
|
|
|
|
|
|
|
|
783 |
class SessionResponse(BaseModel):
|
784 |
session_id: str
|
785 |
userLoginId: int
|
|
|
802 |
|
803 |
# === SESSION MANAGEMENT FUNCTIONS ===
|
804 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
805 |
def create_session(userLoginId: int, orgId: int, auth_token: str) -> dict:
|
806 |
"""Create a new chat session"""
|
807 |
session_id = str(uuid.uuid4())
|
|
|
1134 |
except Exception as e:
|
1135 |
return f"❌ Error fetching projects: {str(e)}"
|
1136 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1137 |
# === CREATE TOOLS ===
|
1138 |
|
1139 |
document_search_tool = Tool(
|
|
|
1153 |
func=get_user_projects
|
1154 |
)
|
1155 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1156 |
# === AGENT SETUP ===
|
1157 |
|
1158 |
def create_agent_with_session_memory(session_id: str):
|
|
|
1165 |
("system", """You are a helpful AI assistant with access to multiple tools and conversation memory:
|
1166 |
|
1167 |
1. **Document Search**: Search through uploaded documents and knowledge base
|
1168 |
+
2. **Project Management**: Get list of user projects and project information
|
|
|
1169 |
|
1170 |
Your capabilities:
|
1171 |
- Answer messages about documents using the document search tool
|
1172 |
- Help users find their projects and project information
|
|
|
1173 |
- Remember previous conversations in this session
|
1174 |
- Provide general assistance and information
|
1175 |
- Use appropriate tools based on user queries
|
|
|
1177 |
Guidelines:
|
1178 |
- Use the document search tool when users ask about specific content, documentation, or information that might be in uploaded files
|
1179 |
- Use the project tool when users ask about projects, want to see their projects, or need project-related information
|
|
|
|
|
1180 |
- Reference previous conversation context when relevant
|
1181 |
- Be clear about which tool you're using and what information you're providing
|
1182 |
- If you're unsure which tool to use, you can ask for clarification
|
|
|
1202 |
memory.chat_memory.add_ai_message(msg["message"])
|
1203 |
|
1204 |
# Create tools list
|
1205 |
+
tools = [document_search_tool, project_list_tool]
|
1206 |
|
1207 |
# Create the agent
|
1208 |
agent = create_openai_tools_agent(llm, tools, agent_prompt)
|
|
|
1401 |
"tool_used": "project_list"
|
1402 |
}
|
1403 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1404 |
@app.put("/sessions/{session_id}/title")
|
1405 |
def refresh_session_title(session_id: str):
|
1406 |
"""Manually refresh/regenerate session title"""
|
|
|
1456 |
|
1457 |
return {
|
1458 |
"status": "ok",
|
1459 |
+
"tools": ["document_search", "project_list"],
|
1460 |
"agent": "active",
|
1461 |
"session_management": "enabled",
|
1462 |
"redis_status": redis_status,
|
|
|
1463 |
"total_sessions": len(list(redis_client.scan_iter(match="session:*")))
|
1464 |
}
|
1465 |
|
|
|
1471 |
print("\n🛑 Server stopped gracefully")
|
1472 |
except Exception as e:
|
1473 |
print(f"❌ Server error: {e}")
|
1474 |
+
|
1475 |
+
|
1476 |
+
|
1477 |
+
|
1478 |
+
#______________________________________________sucess true false _pandasai tool_________________________________________________________________
|
1479 |
+
|
1480 |
+
|
1481 |
+
# from dotenv import load_dotenv
|
1482 |
+
# from datetime import datetime
|
1483 |
+
# import json
|
1484 |
+
# import uuid
|
1485 |
+
# import redis
|
1486 |
+
|
1487 |
+
# # Pandas AI imports
|
1488 |
+
# import re
|
1489 |
+
# import urllib.parse
|
1490 |
+
# import pandas as pd
|
1491 |
+
# import dask.dataframe as dd
|
1492 |
+
# from math import ceil
|
1493 |
+
# import psycopg2
|
1494 |
+
# from pandasai import SmartDataframe
|
1495 |
+
# from pandasai.llm.openai import OpenAI as PandasOpenAI
|
1496 |
+
|
1497 |
+
|
1498 |
+
|
1499 |
+
|
1500 |
+
# from fastapi import FastAPI, Request
|
1501 |
+
# from fastapi.responses import JSONResponse
|
1502 |
+
# import json
|
1503 |
+
|
1504 |
+
|
1505 |
+
# # Import your existing S3 connection details
|
1506 |
+
# from retrive_secrects import * # CONNECTIONS_HOST, etc.
|
1507 |
+
|
1508 |
+
# # Suppress warnings
|
1509 |
+
# warnings.filterwarnings("ignore", message="Qdrant client version.*is incompatible.*")
|
1510 |
+
|
1511 |
+
# load_dotenv()
|
1512 |
+
|
1513 |
+
# app = FastAPI(title="AI Agent with Redis Session Management and Pandas AI")
|
1514 |
+
|
1515 |
+
# # Environment variables
|
1516 |
+
# OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
1517 |
+
# QDRANT_COLLECTION_NAME = os.getenv("QDRANT_COLLECTION_NAME", "vatsav_test_1")
|
1518 |
+
# QDRANT_HOST = os.getenv("QDRANT_HOST", "127.0.0.1")
|
1519 |
+
# QDRANT_PORT = int(os.getenv("QDRANT_PORT", 6333))
|
1520 |
+
|
1521 |
+
# # Redis Configuration
|
1522 |
+
# REDIS_URL = os.getenv("REDIS_URL")
|
1523 |
+
# REDIS_HOST = os.getenv("REDIS_HOST", "127.0.0.1")
|
1524 |
+
# REDIS_PORT = int(os.getenv("REDIS_PORT", 6379))
|
1525 |
+
# REDIS_PASSWORD = os.getenv("REDIS_PASSWORD")
|
1526 |
+
|
1527 |
+
# # S3 Constants (from your original code)
|
1528 |
+
# S3_Bucket_Name = 'ingenspark-user-files'
|
1529 |
+
# S3_Raw_Files_Folder = 'User-Uploaded-Raw-Files'
|
1530 |
+
# S3_Modified_Files_Folder = 'Modified-Files/'
|
1531 |
+
# S3_Output_Files_Folder = 'Output-Files/'
|
1532 |
+
# S3_Published_Results_Folder = 'Published-Results/'
|
1533 |
+
# S3_Ingen_Customer_Output = 'Ingen-Customer/'
|
1534 |
+
# Dominant_Segmentation_Output = 'Dominant-Segmentation/'
|
1535 |
+
# Trend_Segmentation_Output = 'Trend-Segmentation/'
|
1536 |
+
# Decile_Quartile_segmentation_Output = 'Decile-Quartile-Segmentation/'
|
1537 |
+
# Combined_Segmentation_Output = 'Combine-Segmentation/'
|
1538 |
+
# Custom_Segmentation_Output = 'Custom-Segmentation/'
|
1539 |
+
# Customer_360_Output = 'Customer-360/'
|
1540 |
+
# Merge_file_folder = S3_Modified_Files_Folder + 'IngenData-Merged-Tables/'
|
1541 |
+
# S3_Dev_Doc_Images_Folder = 'Developers-Documentation-Images/'
|
1542 |
+
# S3_Temporary_Files_Folder = S3_Raw_Files_Folder
|
1543 |
+
# S3_App_Specific_Data = 'Application-Specific-Data/'
|
1544 |
+
# S3_Transformation_Tables_Folder = 'Modified-Files/Modified-Tables/Transformation-Tables/'
|
1545 |
+
# cloud_front_url = "https://files.dev.ingenspark.com/"
|
1546 |
+
|
1547 |
+
# # Initialize Redis client
|
1548 |
+
# def get_redis_client():
|
1549 |
+
# """Initialize Redis client with fallback to local Redis"""
|
1550 |
+
# try:
|
1551 |
+
# if REDIS_URL:
|
1552 |
+
# # Use deployed Redis URL
|
1553 |
+
# redis_client = redis.from_url(
|
1554 |
+
# REDIS_URL,
|
1555 |
+
# decode_responses=True,
|
1556 |
+
# socket_connect_timeout=5,
|
1557 |
+
# socket_timeout=5
|
1558 |
+
# )
|
1559 |
+
# # Test connection
|
1560 |
+
# redis_client.ping()
|
1561 |
+
# print(f"✅ Connected to deployed Redis: {REDIS_URL}")
|
1562 |
+
# return redis_client
|
1563 |
+
# else:
|
1564 |
+
# # Use local Redis
|
1565 |
+
# redis_client = redis.StrictRedis(
|
1566 |
+
# host=REDIS_HOST,
|
1567 |
+
# port=REDIS_PORT,
|
1568 |
+
# password=REDIS_PASSWORD,
|
1569 |
+
# decode_responses=True,
|
1570 |
+
# socket_connect_timeout=5,
|
1571 |
+
# socket_timeout=5
|
1572 |
+
# )
|
1573 |
+
# # Test connection
|
1574 |
+
# redis_client.ping()
|
1575 |
+
# print(f"✅ Connected to local Redis: {REDIS_HOST}:{REDIS_PORT}")
|
1576 |
+
# return redis_client
|
1577 |
+
# except Exception as e:
|
1578 |
+
# print(f"❌ Redis connection failed: {e}")
|
1579 |
+
# raise HTTPException(status_code=500, detail=f"Redis connection failed: {str(e)}")
|
1580 |
+
|
1581 |
+
# # Initialize Redis client
|
1582 |
+
# redis_client = get_redis_client()
|
1583 |
+
|
1584 |
+
# # Initialize models
|
1585 |
+
# embedding_model = OpenAIEmbeddings(
|
1586 |
+
# model="text-embedding-3-large",
|
1587 |
+
# openai_api_key=OPENAI_API_KEY,
|
1588 |
+
# )
|
1589 |
+
|
1590 |
+
# qdrant_client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT)
|
1591 |
+
# llm = ChatOpenAI(model="gpt-4o", temperature=0, openai_api_key=OPENAI_API_KEY)
|
1592 |
+
|
1593 |
+
# # === PANDAS AI FUNCTIONS ===
|
1594 |
+
|
1595 |
+
# def read_parquet_file_from_s3(ufuid=None, columns_list=None, records_count=None, file_location=''):
|
1596 |
+
# """
|
1597 |
+
# Reads a Parquet file from S3 using Dask and returns it as a Pandas DataFrame.
|
1598 |
+
|
1599 |
+
# Parameters:
|
1600 |
+
# ufuid (int): Optional user_file_upload_id to fetch S3 path from DB.
|
1601 |
+
# columns_list (list/str): Columns to read.
|
1602 |
+
# records_count (int): Not used currently.
|
1603 |
+
# file_location (str): Direct file path in S3.
|
1604 |
+
|
1605 |
+
# Returns:
|
1606 |
+
# pandas.DataFrame
|
1607 |
+
# """
|
1608 |
+
# try:
|
1609 |
+
# # Connect to PostgreSQL
|
1610 |
+
# conn = psycopg2.connect(
|
1611 |
+
# host=CONNECTIONS_HOST,
|
1612 |
+
# database=CONNECTIONS_DB,
|
1613 |
+
# user=CONNECTIONS_USER,
|
1614 |
+
# password=CONNECTIONS_PASS
|
1615 |
+
# )
|
1616 |
+
# cursor = conn.cursor()
|
1617 |
+
|
1618 |
+
# if ufuid is not None:
|
1619 |
+
# query = """SELECT file_name, table_names FROM public.user_file_upload WHERE user_file_upload_id = %s"""
|
1620 |
+
# cursor.execute(query, (ufuid,))
|
1621 |
+
# file = cursor.fetchone()
|
1622 |
+
# if not file:
|
1623 |
+
# raise ValueError(f"No file found for ufuid: {ufuid}")
|
1624 |
+
# file_name, s3_file_path = file
|
1625 |
+
# else:
|
1626 |
+
# # Normalize input path
|
1627 |
+
# file_location = re.sub(r'\.parquet(?!$)', '', file_location)
|
1628 |
+
# s3_file_path = file_location if file_location.endswith('.parquet') else file_location + '.parquet'
|
1629 |
+
|
1630 |
+
# # Extract relative S3 path
|
1631 |
+
# s3_file_path = urllib.parse.unquote(s3_file_path.split(f"{S3_Bucket_Name}/")[-1])
|
1632 |
+
# if not s3_file_path.endswith('.parquet'):
|
1633 |
+
# s3_file_path += '.parquet'
|
1634 |
+
|
1635 |
+
# # Parse columns if given as comma-separated string
|
1636 |
+
# if columns_list and not isinstance(columns_list, list):
|
1637 |
+
# columns_list = [col.strip(' "\'') for col in columns_list.split(',')]
|
1638 |
+
|
1639 |
+
# print(f"\n{'!' * 100}\nReading from: s3://{S3_Bucket_Name}/{s3_file_path}\n")
|
1640 |
+
|
1641 |
+
# # Read using Dask
|
1642 |
+
# ddf = dd.read_parquet(
|
1643 |
+
# f"s3://{S3_Bucket_Name}/{s3_file_path}",
|
1644 |
+
# engine="pyarrow",
|
1645 |
+
# columns=columns_list,
|
1646 |
+
# assume_missing=True
|
1647 |
+
# )
|
1648 |
+
|
1649 |
+
# ddf = ddf.repartition(npartitions=8) # Optimize for processing
|
1650 |
+
# print("Reading Parquet file from S3 completed successfully.")
|
1651 |
+
|
1652 |
+
# # Close database connection
|
1653 |
+
# cursor.close()
|
1654 |
+
# conn.close()
|
1655 |
+
|
1656 |
+
# return ddf.compute()
|
1657 |
+
|
1658 |
+
# except Exception as e:
|
1659 |
+
# print(f"❌ Error reading Parquet file: {e}")
|
1660 |
+
# return pd.DataFrame() # Return empty DataFrame on error
|
1661 |
+
|
1662 |
+
# def pandas_agent(filepath: str, query: str) -> str:
|
1663 |
+
# """
|
1664 |
+
# PandasAI agent that reads data from S3 and answers queries about the data.
|
1665 |
+
|
1666 |
+
# Parameters:
|
1667 |
+
# filepath (str): S3 file path or ufuid
|
1668 |
+
# query (str): Natural language query about the data
|
1669 |
+
|
1670 |
+
# Returns:
|
1671 |
+
# str: Answer from PandasAI
|
1672 |
+
# """
|
1673 |
+
# try:
|
1674 |
+
# # Check if filepath is a number (ufuid) or a file path
|
1675 |
+
# if filepath.isdigit():
|
1676 |
+
# # It's a ufuid
|
1677 |
+
# data = read_parquet_file_from_s3(ufuid=int(filepath))
|
1678 |
+
# else:
|
1679 |
+
# # It's a file path
|
1680 |
+
# data = read_parquet_file_from_s3(file_location=filepath)
|
1681 |
+
|
1682 |
+
# if data.empty:
|
1683 |
+
# return "❌ No data found or failed to load the file. Please check the file path or ufuid."
|
1684 |
+
|
1685 |
+
# # Initialize PandasAI LLM
|
1686 |
+
# if not OPENAI_API_KEY:
|
1687 |
+
# return "❌ OPENAI_API_KEY is not set in environment variables."
|
1688 |
+
|
1689 |
+
# pandas_llm = PandasOpenAI(api_token=OPENAI_API_KEY)
|
1690 |
+
|
1691 |
+
# # Create SmartDataframe
|
1692 |
+
# sdf = SmartDataframe(data, config={"llm": pandas_llm})
|
1693 |
+
|
1694 |
+
# # Ask the question
|
1695 |
+
# print(f"🔍 Processing query: {query}")
|
1696 |
+
# result = sdf.chat(query)
|
1697 |
+
|
1698 |
+
# # Handle different types of results
|
1699 |
+
# if isinstance(result, str):
|
1700 |
+
# return f"📊 Analysis Result:\n{result}"
|
1701 |
+
# elif isinstance(result, (pd.DataFrame, pd.Series)):
|
1702 |
+
# return f"📊 Analysis Result:\n{result.to_string()}"
|
1703 |
+
# else:
|
1704 |
+
# return f"📊 Analysis Result:\n{str(result)}"
|
1705 |
+
|
1706 |
+
# except Exception as e:
|
1707 |
+
# error_msg = f"❌ Error in pandas_agent: {str(e)}"
|
1708 |
+
# print(error_msg)
|
1709 |
+
# return error_msg
|
1710 |
+
|
1711 |
+
# # === INPUT SCHEMAS ===
|
1712 |
+
|
1713 |
+
# class Query(BaseModel):
|
1714 |
+
# message: str
|
1715 |
+
|
1716 |
+
# class ProjectRequest(BaseModel):
|
1717 |
+
# userLoginId: int
|
1718 |
+
# orgId: int
|
1719 |
+
# auth_token: str
|
1720 |
+
|
1721 |
+
# class BotQuery(BaseModel):
|
1722 |
+
# userLoginId: int
|
1723 |
+
# orgId: int
|
1724 |
+
# auth_token: str
|
1725 |
+
# session_id: Optional[str] = None
|
1726 |
+
# message: str
|
1727 |
+
|
1728 |
+
# class PandasAgentQuery(BaseModel):
|
1729 |
+
# filepath: str = Field(..., description="S3 file path or ufuid")
|
1730 |
+
# query: str = Field(..., description="Natural language query about the data")
|
1731 |
+
|
1732 |
+
# class SessionResponse(BaseModel):
|
1733 |
+
# session_id: str
|
1734 |
+
# userLoginId: int
|
1735 |
+
# orgId: int
|
1736 |
+
# created_at: str
|
1737 |
+
# status: str
|
1738 |
+
# title: Optional[str] = "New Chat"
|
1739 |
+
|
1740 |
+
# class MessageResponse(BaseModel):
|
1741 |
+
# message_id: str
|
1742 |
+
# session_id: str
|
1743 |
+
# role: str # "user" or "assistant"
|
1744 |
+
# message: str
|
1745 |
+
# timestamp: str
|
1746 |
+
|
1747 |
+
# class ChatHistoryResponse(BaseModel):
|
1748 |
+
# session_id: str
|
1749 |
+
# messages: List[MessageResponse]
|
1750 |
+
# total_messages: int
|
1751 |
+
|
1752 |
+
# # === SESSION MANAGEMENT FUNCTIONS ===
|
1753 |
+
|
1754 |
+
|
1755 |
+
|
1756 |
+
|
1757 |
+
# @app.middleware("http")
|
1758 |
+
# async def add_success_flag(request: Request, call_next):
|
1759 |
+
# response = await call_next(request)
|
1760 |
+
|
1761 |
+
# # Only modify JSON responses
|
1762 |
+
# if "application/json" in response.headers.get("content-type", ""):
|
1763 |
+
# try:
|
1764 |
+
# body = b"".join([chunk async for chunk in response.body_iterator])
|
1765 |
+
# data = json.loads(body.decode())
|
1766 |
+
|
1767 |
+
# # Add success flag
|
1768 |
+
# data["success"] = 200 <= response.status_code < 300
|
1769 |
+
|
1770 |
+
# # Build new JSONResponse (auto handles Content-Length)
|
1771 |
+
# response = JSONResponse(
|
1772 |
+
# content=data,
|
1773 |
+
# status_code=response.status_code,
|
1774 |
+
# headers={k: v for k, v in response.headers.items() if k.lower() != "content-length"},
|
1775 |
+
# )
|
1776 |
+
# except Exception:
|
1777 |
+
# # fallback if response is not JSON parseable
|
1778 |
+
# pass
|
1779 |
+
# return response
|
1780 |
+
|
1781 |
+
|
1782 |
+
|
1783 |
+
|
1784 |
+
# def create_session(userLoginId: int, orgId: int, auth_token: str) -> dict:
|
1785 |
+
# """Create a new chat session"""
|
1786 |
+
# session_id = str(uuid.uuid4())
|
1787 |
+
# session_data = {
|
1788 |
+
# "session_id": session_id,
|
1789 |
+
# "userLoginId": userLoginId,
|
1790 |
+
# "orgId": orgId,
|
1791 |
+
# "auth_token": auth_token,
|
1792 |
+
# "created_at": datetime.now().isoformat(),
|
1793 |
+
# "status": "active",
|
1794 |
+
# "title": "New Chat" # Default title, will be updated after first message
|
1795 |
+
# }
|
1796 |
+
|
1797 |
+
# # Store session in Redis with 24 hour TTL
|
1798 |
+
# redis_client.setex(
|
1799 |
+
# f"session:{session_id}",
|
1800 |
+
# 86400, # 24 hours
|
1801 |
+
# json.dumps(session_data)
|
1802 |
+
# )
|
1803 |
+
|
1804 |
+
# # Initialize empty chat history
|
1805 |
+
# redis_client.setex(
|
1806 |
+
# f"chat:{session_id}",
|
1807 |
+
# 86400, # 24 hours
|
1808 |
+
# json.dumps([])
|
1809 |
+
# )
|
1810 |
+
|
1811 |
+
# # Initialize conversation memory
|
1812 |
+
# redis_client.setex(
|
1813 |
+
# f"memory:{session_id}",
|
1814 |
+
# 86400, # 24 hours
|
1815 |
+
# json.dumps([])
|
1816 |
+
# )
|
1817 |
+
|
1818 |
+
# return session_data
|
1819 |
+
|
1820 |
+
# def get_session(session_id: str) -> dict:
|
1821 |
+
# """Get session data from Redis"""
|
1822 |
+
# session_data = redis_client.get(f"session:{session_id}")
|
1823 |
+
# if not session_data:
|
1824 |
+
# raise HTTPException(status_code=404, detail="Session not found or expired")
|
1825 |
+
# return json.loads(session_data)
|
1826 |
+
|
1827 |
+
# def add_message_to_session(session_id: str, role: str, message: str) -> str:
|
1828 |
+
# """Add message to session chat history"""
|
1829 |
+
# message_id = str(uuid.uuid4())
|
1830 |
+
# message_data = {
|
1831 |
+
# "message_id": message_id,
|
1832 |
+
# "session_id": session_id,
|
1833 |
+
# "role": role,
|
1834 |
+
# "message": message,
|
1835 |
+
# "timestamp": datetime.now().isoformat()
|
1836 |
+
# }
|
1837 |
+
|
1838 |
+
# # Get current chat history
|
1839 |
+
# chat_history = redis_client.get(f"chat:{session_id}")
|
1840 |
+
# if chat_history:
|
1841 |
+
# messages = json.loads(chat_history)
|
1842 |
+
# else:
|
1843 |
+
# messages = []
|
1844 |
+
|
1845 |
+
# # Add new message
|
1846 |
+
# messages.append(message_data)
|
1847 |
+
|
1848 |
+
# # Update chat history in Redis with extended TTL
|
1849 |
+
# redis_client.setex(
|
1850 |
+
# f"chat:{session_id}",
|
1851 |
+
# 86400, # 24 hours
|
1852 |
+
# json.dumps(messages)
|
1853 |
+
# )
|
1854 |
+
|
1855 |
+
# return message_id
|
1856 |
+
|
1857 |
+
# def get_session_memory(session_id: str) -> List[Dict]:
|
1858 |
+
# """Get conversation memory for session"""
|
1859 |
+
# memory_data = redis_client.get(f"memory:{session_id}")
|
1860 |
+
# if memory_data:
|
1861 |
+
# return json.loads(memory_data)
|
1862 |
+
# return []
|
1863 |
+
|
1864 |
+
# def update_session_memory(session_id: str, messages: List[Dict]):
|
1865 |
+
# """Update conversation memory for session"""
|
1866 |
+
# redis_client.setex(
|
1867 |
+
# f"memory:{session_id}",
|
1868 |
+
# 86400, # 24 hours
|
1869 |
+
# json.dumps(messages)
|
1870 |
+
# )
|
1871 |
+
|
1872 |
+
# def update_session_title(session_id: str):
|
1873 |
+
# """Update session title after first message"""
|
1874 |
+
# try:
|
1875 |
+
# # Get session data
|
1876 |
+
# session_data = redis_client.get(f"session:{session_id}")
|
1877 |
+
# if not session_data:
|
1878 |
+
# return
|
1879 |
+
|
1880 |
+
# session = json.loads(session_data)
|
1881 |
+
|
1882 |
+
# # Only update if current title is "New Chat"
|
1883 |
+
# if session.get("title", "New Chat") == "New Chat":
|
1884 |
+
# new_title = generate_session_title(session_id)
|
1885 |
+
# session["title"] = new_title
|
1886 |
+
|
1887 |
+
# # Update session in Redis
|
1888 |
+
# redis_client.setex(
|
1889 |
+
# f"session:{session_id}",
|
1890 |
+
# 86400, # 24 hours
|
1891 |
+
# json.dumps(session)
|
1892 |
+
# )
|
1893 |
+
|
1894 |
+
# except Exception as e:
|
1895 |
+
# print(f"Error updating session title: {e}")
|
1896 |
+
# pass # Don't fail the request if title update fails
|
1897 |
+
|
1898 |
+
# def generate_session_title(session_id: str) -> str:
|
1899 |
+
# """Generate a title for the session based on chat history"""
|
1900 |
+
# try:
|
1901 |
+
# # Get chat history
|
1902 |
+
# chat_data = redis_client.get(f"chat:{session_id}")
|
1903 |
+
# if not chat_data:
|
1904 |
+
# return "New Chat"
|
1905 |
+
|
1906 |
+
# messages = json.loads(chat_data)
|
1907 |
+
# if not messages:
|
1908 |
+
# return "New Chat"
|
1909 |
+
|
1910 |
+
# # Get first user message for title generation
|
1911 |
+
# first_user_message = None
|
1912 |
+
# for msg in messages:
|
1913 |
+
# if msg["role"] == "user":
|
1914 |
+
# first_user_message = msg["message"]
|
1915 |
+
# break
|
1916 |
+
|
1917 |
+
# if not first_user_message:
|
1918 |
+
# return "New Chat"
|
1919 |
+
|
1920 |
+
# # Generate title using LLM
|
1921 |
+
# title_prompt = f"""Generate a short, descriptive title (maximum 6 words) for a chat conversation that starts with this message:
|
1922 |
+
|
1923 |
+
# "{first_user_message[:200]}"
|
1924 |
+
|
1925 |
+
# Return only the title, no quotes or additional text. The title should capture the main topic or intent of the conversation."""
|
1926 |
+
|
1927 |
+
# try:
|
1928 |
+
# response = llm.invoke(title_prompt)
|
1929 |
+
# title = response.content.strip()
|
1930 |
+
|
1931 |
+
# # Clean and limit title
|
1932 |
+
# title = title.replace('"', '').replace("'", "")
|
1933 |
+
# if len(title) > 50:
|
1934 |
+
# title = title[:47] + "..."
|
1935 |
+
|
1936 |
+
# return title if title else "New Chat"
|
1937 |
+
|
1938 |
+
# except Exception as e:
|
1939 |
+
# print(f"Error generating title: {e}")
|
1940 |
+
# # Fallback: use first few words of the message
|
1941 |
+
# words = first_user_message.split()[:4]
|
1942 |
+
# return " ".join(words) + ("..." if len(words) >= 4 else "")
|
1943 |
+
|
1944 |
+
# except Exception as e:
|
1945 |
+
# print(f"Error in generate_session_title: {e}")
|
1946 |
+
# return "New Chat"
|
1947 |
+
|
1948 |
+
# def get_user_sessions(userLoginId: int) -> List[dict]:
|
1949 |
+
# """Get all sessions for a user with generated titles"""
|
1950 |
+
# sessions = []
|
1951 |
+
# # Scan for all session keys
|
1952 |
+
# for key in redis_client.scan_iter(match="session:*"):
|
1953 |
+
# session_data = redis_client.get(key)
|
1954 |
+
# if session_data:
|
1955 |
+
# session = json.loads(session_data)
|
1956 |
+
# if session["userLoginId"] == userLoginId:
|
1957 |
+
# # Generate title based on chat history
|
1958 |
+
# session["title"] = generate_session_title(session["session_id"])
|
1959 |
+
# sessions.append(session)
|
1960 |
+
|
1961 |
+
# # Sort sessions by created_at (most recent first)
|
1962 |
+
# sessions.sort(key=lambda x: x["created_at"], reverse=True)
|
1963 |
+
# return sessions
|
1964 |
+
|
1965 |
+
# def delete_session(session_id: str):
|
1966 |
+
# """Delete session and associated data"""
|
1967 |
+
# # Delete session data
|
1968 |
+
# redis_client.delete(f"session:{session_id}")
|
1969 |
+
# # Delete chat history
|
1970 |
+
# redis_client.delete(f"chat:{session_id}")
|
1971 |
+
# # Delete memory
|
1972 |
+
# redis_client.delete(f"memory:{session_id}")
|
1973 |
+
|
1974 |
+
# # === UTILITY FUNCTIONS ===
|
1975 |
+
|
1976 |
+
# def get_encoded_auth_token(user: int, token: str) -> str:
|
1977 |
+
# auth_string = f"{user}:{token}"
|
1978 |
+
# return base64.b64encode(auth_string.encode("utf-8")).decode("utf-8")
|
1979 |
+
|
1980 |
+
# def fetch_user_projects(userLoginId: int, orgId: int, auth_token: str):
|
1981 |
+
# url = "https://japidemo.dev.ingenspark.com/fetchUserProjects"
|
1982 |
+
# payload = {
|
1983 |
+
# "userLoginId": userLoginId,
|
1984 |
+
# "orgId": orgId
|
1985 |
+
# }
|
1986 |
+
|
1987 |
+
# headers = {
|
1988 |
+
# 'accept': 'application/json, text/plain, */*',
|
1989 |
+
# 'authorization': f'Basic {auth_token}',
|
1990 |
+
# 'content-type': 'application/json; charset=UTF-8'
|
1991 |
+
# }
|
1992 |
+
|
1993 |
+
# try:
|
1994 |
+
# response = requests.post(url, headers=headers, json=payload)
|
1995 |
+
# response.raise_for_status()
|
1996 |
+
# return response.json()
|
1997 |
+
# except requests.exceptions.RequestException as e:
|
1998 |
+
# raise HTTPException(status_code=response.status_code if 'response' in locals() else 500,
|
1999 |
+
# detail=str(e))
|
2000 |
+
|
2001 |
+
# def format_project_response(data: dict) -> str:
|
2002 |
+
# my_projects = data.get("data", {}).get("Myprojects", [])
|
2003 |
+
# other_projects = data.get("data", {}).get("Otherprojects", [])
|
2004 |
+
|
2005 |
+
# all_projects = []
|
2006 |
+
|
2007 |
+
# for project in my_projects:
|
2008 |
+
# all_projects.append({
|
2009 |
+
# "type": "Your Project",
|
2010 |
+
# "projectNm": project["projectNm"],
|
2011 |
+
# "projectId": project["projectId"],
|
2012 |
+
# "created_dttm": project["created_dttm"].split('.')[0],
|
2013 |
+
# "description": project["description"],
|
2014 |
+
# "categoryName": project["categoryName"]
|
2015 |
+
# })
|
2016 |
+
|
2017 |
+
# for project in other_projects:
|
2018 |
+
# all_projects.append({
|
2019 |
+
# "type": "Other Project",
|
2020 |
+
# "projectNm": project["projectNm"],
|
2021 |
+
# "projectId": project["projectId"],
|
2022 |
+
# "created_dttm": project["created_dttm"].split('.')[0],
|
2023 |
+
# "description": project["description"],
|
2024 |
+
# "categoryName": project["categoryName"]
|
2025 |
+
# })
|
2026 |
+
|
2027 |
+
# if not all_projects:
|
2028 |
+
# return "❌ No projects found."
|
2029 |
+
|
2030 |
+
# # Build the formatted string
|
2031 |
+
# result = [f"✅ You have access to {len(all_projects)} project(s):\n"]
|
2032 |
+
# for i, project in enumerate(all_projects, 1):
|
2033 |
+
# result.append(f"{i}. Project Name: {project['projectNm']} ({project['type']})")
|
2034 |
+
# result.append(f" Project ID: {project['projectId']}")
|
2035 |
+
# result.append(f" Created On: {project['created_dttm']}")
|
2036 |
+
# result.append(f" Description: {project['description']}")
|
2037 |
+
# result.append(f" Category: {project['categoryName']}\n")
|
2038 |
+
# return "\n".join(result)
|
2039 |
+
|
2040 |
+
# # === TOOL FUNCTIONS ===
|
2041 |
+
|
2042 |
+
# def search_documents(query: str) -> str:
|
2043 |
+
# """Search through ingested documents and get relevant information."""
|
2044 |
+
# try:
|
2045 |
+
# # Generate embedding for the query
|
2046 |
+
# query_vector = embedding_model.embed_query(query)
|
2047 |
+
|
2048 |
+
# # Search in Qdrant
|
2049 |
+
# search_result = qdrant_client.search(
|
2050 |
+
# collection_name=QDRANT_COLLECTION_NAME,
|
2051 |
+
# query_vector=query_vector,
|
2052 |
+
# limit=5,
|
2053 |
+
# )
|
2054 |
+
|
2055 |
+
# if not search_result:
|
2056 |
+
# return "No relevant information found in the knowledge base."
|
2057 |
+
|
2058 |
+
# # Convert results to text content
|
2059 |
+
# context_texts = []
|
2060 |
+
# sources = []
|
2061 |
+
|
2062 |
+
# for hit in search_result:
|
2063 |
+
# context_texts.append(hit.payload["text"])
|
2064 |
+
# sources.append(hit.payload.get("source", "unknown"))
|
2065 |
+
|
2066 |
+
# # Create a simple prompt for answering based on context
|
2067 |
+
# context = "\n\n".join(context_texts)
|
2068 |
+
# unique_sources = list(set(sources))
|
2069 |
+
|
2070 |
+
# # Use the LLM directly to answer the message based on context
|
2071 |
+
# prompt = f"""Based on the following context, answer the message: {query}
|
2072 |
+
|
2073 |
+
# Context:
|
2074 |
+
# {context}
|
2075 |
+
|
2076 |
+
# Please provide a comprehensive answer based on the context above. If the context doesn't contain enough information to answer the message, say so clearly."""
|
2077 |
+
|
2078 |
+
# response = llm.invoke(prompt)
|
2079 |
+
|
2080 |
+
# return f"{response.content}\n\nSources: {', '.join(unique_sources)}"
|
2081 |
+
|
2082 |
+
# except Exception as e:
|
2083 |
+
# return f"Error searching documents: {str(e)}"
|
2084 |
+
|
2085 |
+
# # Global variables to store auth context (for tool functions)
|
2086 |
+
# _current_user_id = None
|
2087 |
+
# _current_org_id = None
|
2088 |
+
# _current_auth_token = None
|
2089 |
+
|
2090 |
+
# def get_user_projects(userLoginId: str) -> str:
|
2091 |
+
# """Get list of projects for a user."""
|
2092 |
+
# try:
|
2093 |
+
# # Use global auth context if available
|
2094 |
+
# if _current_auth_token and _current_user_id:
|
2095 |
+
# user_id = _current_user_id
|
2096 |
+
# org_id = _current_org_id or 1
|
2097 |
+
# auth_token = _current_auth_token
|
2098 |
+
# else:
|
2099 |
+
# return "❌ Authentication token required. Please provide auth_token in your request."
|
2100 |
+
|
2101 |
+
# # Encode auth token using the actual user ID and provided token
|
2102 |
+
# encoded_token = get_encoded_auth_token(user_id, auth_token)
|
2103 |
+
|
2104 |
+
# # Fetch projects
|
2105 |
+
# data = fetch_user_projects(user_id, org_id, encoded_token)
|
2106 |
+
|
2107 |
+
# # Format and return the project list
|
2108 |
+
# formatted = format_project_response(data)
|
2109 |
+
# return formatted
|
2110 |
+
|
2111 |
+
# except ValueError:
|
2112 |
+
# return "❌ Invalid userLoginId format. Please provide a valid number."
|
2113 |
+
# except Exception as e:
|
2114 |
+
# return f"❌ Error fetching projects: {str(e)}"
|
2115 |
+
|
2116 |
+
# def pandas_data_analysis(query_with_filepath: str) -> str:
|
2117 |
+
# """
|
2118 |
+
# Tool for data analysis using PandasAI.
|
2119 |
+
# Input format: 'filepath|query' where filepath is S3 path or ufuid, and query is the analysis question.
|
2120 |
+
# """
|
2121 |
+
# try:
|
2122 |
+
# # Parse the input to extract filepath and query
|
2123 |
+
# parts = query_with_filepath.split('|', 1)
|
2124 |
+
# if len(parts) != 2:
|
2125 |
+
# return "❌ Invalid input format. Please use: 'filepath|query' format."
|
2126 |
+
|
2127 |
+
# filepath, query = parts
|
2128 |
+
# filepath = filepath.strip()
|
2129 |
+
# query = query.strip()
|
2130 |
+
|
2131 |
+
# if not filepath or not query:
|
2132 |
+
# return "❌ Both filepath and query are required."
|
2133 |
+
|
2134 |
+
# # Use the pandas_agent function
|
2135 |
+
# result = pandas_agent(filepath, query)
|
2136 |
+
# return result
|
2137 |
+
|
2138 |
+
# except Exception as e:
|
2139 |
+
# return f"❌ Error in pandas data analysis: {str(e)}"
|
2140 |
+
|
2141 |
+
# # === CREATE TOOLS ===
|
2142 |
+
|
2143 |
+
# document_search_tool = Tool(
|
2144 |
+
# name="document_search",
|
2145 |
+
# description="""Use this tool to search through ingested documents and get relevant information from the knowledge base.
|
2146 |
+
# Perfect for answering messages about uploaded documents, manuals, or any content that was previously stored.
|
2147 |
+
# Input should be a search query or message about the documents.""",
|
2148 |
+
# func=search_documents
|
2149 |
+
# )
|
2150 |
+
|
2151 |
+
# project_list_tool = Tool(
|
2152 |
+
# name="get_user_projects",
|
2153 |
+
# description="""Use this tool to get the list of projects for a user.
|
2154 |
+
# Perfect for when users ask about their projects, want to see available projects, or need project information.
|
2155 |
+
# Input should be the userLoginId (e.g., '25').
|
2156 |
+
# Note: This tool requires authentication context to be set.""",
|
2157 |
+
# func=get_user_projects
|
2158 |
+
# )
|
2159 |
+
|
2160 |
+
# pandas_analysis_tool = Tool(
|
2161 |
+
# name="pandas_data_analysis",
|
2162 |
+
# description="""Use this tool for data analysis on CSV/Parquet files using PandasAI.
|
2163 |
+
# Perfect for when users ask questions about data analysis, statistics, insights, or want to query their datasets.
|
2164 |
+
# Input format: 'filepath|query' where:
|
2165 |
+
# - filepath: S3 file path (e.g., 'User-Uploaded-Raw-Files/Data2004csv1754926601269756') or ufuid (e.g., '123')
|
2166 |
+
# - query: Natural language question about the data (e.g., 'What are the top 5 values?', 'Show me summary statistics')
|
2167 |
+
|
2168 |
+
# Examples:
|
2169 |
+
# - 'User-Uploaded-Raw-Files/mydata.csv|What is this file about?'
|
2170 |
+
# - '123|Show me the first 5 rows'
|
2171 |
+
# - 'Modified-Files/processed_data|What are the most common values in column X?'
|
2172 |
+
# """,
|
2173 |
+
# func=pandas_data_analysis
|
2174 |
+
# )
|
2175 |
+
|
2176 |
+
# # === AGENT SETUP ===
|
2177 |
+
|
2178 |
+
# def create_agent_with_session_memory(session_id: str):
|
2179 |
+
# """Create agent with session memory from Redis"""
|
2180 |
+
|
2181 |
+
# # Get memory from Redis
|
2182 |
+
# memory_messages = get_session_memory(session_id)
|
2183 |
+
|
2184 |
+
# agent_prompt = ChatPromptTemplate.from_messages([
|
2185 |
+
# ("system", """You are a helpful AI assistant with access to multiple tools and conversation memory:
|
2186 |
+
|
2187 |
+
# 1. **Document Search**: Search through uploaded documents and knowledge base
|
2188 |
+
# 2. **Project Management**: Get list of user projects and project information
|
2189 |
+
# 3. **Data Analysis**: Analyze CSV/Parquet files using PandasAI for insights, statistics, and queries
|
2190 |
+
|
2191 |
+
# Your capabilities:
|
2192 |
+
# - Answer messages about documents using the document search tool
|
2193 |
+
# - Help users find their projects and project information
|
2194 |
+
# - Perform data analysis on uploaded datasets using natural language queries
|
2195 |
+
# - Remember previous conversations in this session
|
2196 |
+
# - Provide general assistance and information
|
2197 |
+
# - Use appropriate tools based on user queries
|
2198 |
+
|
2199 |
+
# Guidelines:
|
2200 |
+
# - Use the document search tool when users ask about specific content, documentation, or information that might be in uploaded files
|
2201 |
+
# - Use the project tool when users ask about projects, want to see their projects, or need project-related information
|
2202 |
+
# - Use the pandas analysis tool when users ask about data analysis, statistics, insights, or want to query datasets
|
2203 |
+
# - For pandas analysis, you need both a filepath (S3 path or ufuid) and a query - ask for missing information if needed
|
2204 |
+
# - Reference previous conversation context when relevant
|
2205 |
+
# - Be clear about which tool you're using and what information you're providing
|
2206 |
+
# - If you're unsure which tool to use, you can ask for clarification
|
2207 |
+
# - Provide helpful, accurate, and well-formatted responses
|
2208 |
+
|
2209 |
+
# Remember: Always use the most appropriate tool based on the user's message and conversation context to provide the best possible answer."""),
|
2210 |
+
# MessagesPlaceholder(variable_name="chat_history"),
|
2211 |
+
# ("user", "{input}"),
|
2212 |
+
# MessagesPlaceholder(variable_name="agent_scratchpad"),
|
2213 |
+
# ])
|
2214 |
+
|
2215 |
+
# # Create memory object
|
2216 |
+
# memory = ConversationBufferMemory(
|
2217 |
+
# memory_key="chat_history",
|
2218 |
+
# return_messages=True
|
2219 |
+
# )
|
2220 |
+
|
2221 |
+
# # Load existing messages into memory
|
2222 |
+
# for msg in memory_messages:
|
2223 |
+
# if msg["role"] == "user":
|
2224 |
+
# memory.chat_memory.add_user_message(msg["message"])
|
2225 |
+
# else:
|
2226 |
+
# memory.chat_memory.add_ai_message(msg["message"])
|
2227 |
+
|
2228 |
+
# # Create tools list
|
2229 |
+
# tools = [document_search_tool, project_list_tool, pandas_analysis_tool]
|
2230 |
+
|
2231 |
+
# # Create the agent
|
2232 |
+
# agent = create_openai_tools_agent(llm, tools, agent_prompt)
|
2233 |
+
|
2234 |
+
# # Create the agent executor with memory
|
2235 |
+
# agent_executor = AgentExecutor(
|
2236 |
+
# agent=agent,
|
2237 |
+
# tools=tools,
|
2238 |
+
# verbose=True,
|
2239 |
+
# memory=memory
|
2240 |
+
# )
|
2241 |
+
|
2242 |
+
# return agent_executor, memory
|
2243 |
+
|
2244 |
+
# # === API ENDPOINTS ===
|
2245 |
+
|
2246 |
+
# @app.post("/sessions", response_model=SessionResponse)
|
2247 |
+
# def create_new_session(userLoginId: int, orgId: int, auth_token: str):
|
2248 |
+
# """Create a new chat session"""
|
2249 |
+
# try:
|
2250 |
+
# session_data = create_session(userLoginId, orgId, auth_token)
|
2251 |
+
# return SessionResponse(**session_data)
|
2252 |
+
# except Exception as e:
|
2253 |
+
# raise HTTPException(status_code=500, detail=f"Error creating session: {str(e)}")
|
2254 |
+
|
2255 |
+
# @app.get("/sessions")
|
2256 |
+
# def list_user_sessions(userLoginId: int):
|
2257 |
+
# """List all sessions for a user"""
|
2258 |
+
# try:
|
2259 |
+
# sessions = get_user_sessions(userLoginId)
|
2260 |
+
# return {
|
2261 |
+
# "userLoginId": userLoginId,
|
2262 |
+
# "total_sessions": len(sessions),
|
2263 |
+
# "sessions": sessions
|
2264 |
+
# }
|
2265 |
+
# except Exception as e:
|
2266 |
+
# raise HTTPException(status_code=500, detail=f"Error fetching sessions: {str(e)}")
|
2267 |
+
|
2268 |
+
# @app.delete("/sessions/{session_id}")
|
2269 |
+
# def delete_user_session(session_id: str):
|
2270 |
+
# """Delete/close a session"""
|
2271 |
+
# try:
|
2272 |
+
# # Verify session exists
|
2273 |
+
# get_session(session_id)
|
2274 |
+
|
2275 |
+
# # Delete session
|
2276 |
+
# delete_session(session_id)
|
2277 |
+
|
2278 |
+
# return {
|
2279 |
+
# "message": f"Session {session_id} deleted successfully",
|
2280 |
+
# "session_id": session_id
|
2281 |
+
# }
|
2282 |
+
# except Exception as e:
|
2283 |
+
# raise HTTPException(status_code=500, detail=f"Error deleting session: {str(e)}")
|
2284 |
+
|
2285 |
+
# @app.post("/bot")
|
2286 |
+
# def chat_with_bot(query: BotQuery):
|
2287 |
+
# """Main bot endpoint with session management"""
|
2288 |
+
# try:
|
2289 |
+
# # Set global auth context for tools
|
2290 |
+
# global _current_user_id, _current_org_id, _current_auth_token
|
2291 |
+
# _current_user_id = query.userLoginId
|
2292 |
+
# _current_org_id = query.orgId
|
2293 |
+
# _current_auth_token = query.auth_token
|
2294 |
+
|
2295 |
+
# session_id = query.session_id
|
2296 |
+
|
2297 |
+
# # Create new session if not provided
|
2298 |
+
# if not session_id:
|
2299 |
+
# session_data = create_session(query.userLoginId, query.orgId, query.auth_token)
|
2300 |
+
# session_id = session_data["session_id"]
|
2301 |
+
# else:
|
2302 |
+
# # Verify existing session
|
2303 |
+
# get_session(session_id)
|
2304 |
+
|
2305 |
+
# # Add user message to session
|
2306 |
+
# user_message_id = add_message_to_session(session_id, "user", query.message)
|
2307 |
+
|
2308 |
+
# # Create agent with session memory
|
2309 |
+
# agent_executor, memory = create_agent_with_session_memory(session_id)
|
2310 |
+
|
2311 |
+
# # Use the agent to process the query
|
2312 |
+
# result = agent_executor.invoke({"input": query.message})
|
2313 |
+
|
2314 |
+
# # Add AI response to session
|
2315 |
+
# ai_message_id = add_message_to_session(session_id, "assistant", result["output"])
|
2316 |
+
|
2317 |
+
# # Update session memory in Redis
|
2318 |
+
# updated_messages = []
|
2319 |
+
# for message in memory.chat_memory.messages:
|
2320 |
+
# if hasattr(message, 'content'):
|
2321 |
+
# role = "user" if message.__class__.__name__ == "HumanMessage" else "assistant"
|
2322 |
+
# updated_messages.append({
|
2323 |
+
# "role": role,
|
2324 |
+
# "message": message.content,
|
2325 |
+
# "timestamp": datetime.now().isoformat()
|
2326 |
+
# })
|
2327 |
+
|
2328 |
+
# update_session_memory(session_id, updated_messages)
|
2329 |
+
|
2330 |
+
# # Update session title after first user message
|
2331 |
+
# update_session_title(session_id)
|
2332 |
+
|
2333 |
+
# # Clear auth context after use
|
2334 |
+
# _current_user_id = None
|
2335 |
+
# _current_org_id = None
|
2336 |
+
# _current_auth_token = None
|
2337 |
+
|
2338 |
+
# return {
|
2339 |
+
# "session_id": session_id,
|
2340 |
+
# "user_message_id": user_message_id,
|
2341 |
+
# "ai_message_id": ai_message_id,
|
2342 |
+
# "message": query.message,
|
2343 |
+
# "answer": result["output"],
|
2344 |
+
# "userLoginId": query.userLoginId,
|
2345 |
+
# "agent_used": True
|
2346 |
+
# }
|
2347 |
+
|
2348 |
+
# except Exception as e:
|
2349 |
+
# # Clear auth context on error
|
2350 |
+
# _current_user_id = None
|
2351 |
+
# _current_org_id = None
|
2352 |
+
# _current_auth_token = None
|
2353 |
+
|
2354 |
+
# raise HTTPException(status_code=500, detail=f"Error processing chat: {str(e)}")
|
2355 |
+
|
2356 |
+
# @app.get("/sessions/{session_id}/history", response_model=ChatHistoryResponse)
|
2357 |
+
# def get_session_history(session_id: str, n: int = QueryParam(50, description="Number of recent messages to return")):
|
2358 |
+
# """Get chat history for a session"""
|
2359 |
+
# try:
|
2360 |
+
# # Verify session exists
|
2361 |
+
# get_session(session_id)
|
2362 |
+
|
2363 |
+
# # Get chat history
|
2364 |
+
# chat_data = redis_client.get(f"chat:{session_id}")
|
2365 |
+
# if not chat_data:
|
2366 |
+
# return ChatHistoryResponse(
|
2367 |
+
# session_id=session_id,
|
2368 |
+
# messages=[],
|
2369 |
+
# total_messages=0
|
2370 |
+
# )
|
2371 |
+
|
2372 |
+
# messages = json.loads(chat_data)
|
2373 |
+
|
2374 |
+
# # Get the last n messages (or all if less than n)
|
2375 |
+
# recent_messages = messages[-n:] if len(messages) > n else messages
|
2376 |
+
|
2377 |
+
# # Convert to MessageResponse objects
|
2378 |
+
# message_responses = [MessageResponse(**msg) for msg in recent_messages]
|
2379 |
+
|
2380 |
+
# return ChatHistoryResponse(
|
2381 |
+
# session_id=session_id,
|
2382 |
+
# messages=message_responses,
|
2383 |
+
# total_messages=len(messages)
|
2384 |
+
# )
|
2385 |
+
|
2386 |
+
# except Exception as e:
|
2387 |
+
# raise HTTPException(status_code=500, detail=f"Error fetching chat history: {str(e)}")
|
2388 |
+
|
2389 |
+
# @app.post("/chat-documents")
|
2390 |
+
# def chat_documents_only(query: Query):
|
2391 |
+
# """Direct document search without agent"""
|
2392 |
+
# try:
|
2393 |
+
# result = search_documents(query.message)
|
2394 |
+
# return {
|
2395 |
+
# "message": query.message,
|
2396 |
+
# "answer": result,
|
2397 |
+
# "tool_used": "document_search"
|
2398 |
+
# }
|
2399 |
+
# except Exception as e:
|
2400 |
+
# return {
|
2401 |
+
# "message": query.message,
|
2402 |
+
# "answer": f"An error occurred: {str(e)}",
|
2403 |
+
# "tool_used": "document_search"
|
2404 |
+
# }
|
2405 |
+
|
2406 |
+
# @app.post("/list-projects")
|
2407 |
+
# def list_projects(request: ProjectRequest):
|
2408 |
+
# """Direct project listing without agent"""
|
2409 |
+
# try:
|
2410 |
+
# # Use the provided auth token and userLoginId
|
2411 |
+
# encoded_token = get_encoded_auth_token(request.userLoginId, request.auth_token)
|
2412 |
+
|
2413 |
+
# # Fetch projects
|
2414 |
+
# data = fetch_user_projects(request.userLoginId, request.orgId, encoded_token)
|
2415 |
+
|
2416 |
+
# # Format and return the project list
|
2417 |
+
# formatted = format_project_response(data)
|
2418 |
+
# return {
|
2419 |
+
# "projects": formatted,
|
2420 |
+
# "tool_used": "project_list"
|
2421 |
+
# }
|
2422 |
+
# except Exception as e:
|
2423 |
+
# return {
|
2424 |
+
# "error": f"An error occurred: {str(e)}",
|
2425 |
+
# "tool_used": "project_list"
|
2426 |
+
# }
|
2427 |
+
|
2428 |
+
# @app.post("/chat-with-pandas-agent")
|
2429 |
+
# def chat_with_pandas_agent(request: PandasAgentQuery):
|
2430 |
+
# """Direct pandas AI agent endpoint for data analysis"""
|
2431 |
+
# try:
|
2432 |
+
# result = pandas_agent(request.filepath, request.query)
|
2433 |
+
|
2434 |
+
# return {
|
2435 |
+
# "filepath": request.filepath,
|
2436 |
+
# "query": request.query,
|
2437 |
+
# "answer": result,
|
2438 |
+
# "tool_used": "pandas_agent",
|
2439 |
+
# "timestamp": datetime.now().isoformat()
|
2440 |
+
# }
|
2441 |
+
|
2442 |
+
# except Exception as e:
|
2443 |
+
# error_msg = f"An error occurred: {str(e)}"
|
2444 |
+
# return {
|
2445 |
+
# "filepath": request.filepath,
|
2446 |
+
# "query": request.query,
|
2447 |
+
# "answer": error_msg,
|
2448 |
+
# "tool_used": "pandas_agent",
|
2449 |
+
# "error": True,
|
2450 |
+
# "timestamp": datetime.now().isoformat()
|
2451 |
+
# }
|
2452 |
+
|
2453 |
+
# @app.put("/sessions/{session_id}/title")
|
2454 |
+
# def refresh_session_title(session_id: str):
|
2455 |
+
# """Manually refresh/regenerate session title"""
|
2456 |
+
# try:
|
2457 |
+
# # Verify session exists
|
2458 |
+
# session_data = get_session(session_id)
|
2459 |
+
|
2460 |
+
# # Generate new title
|
2461 |
+
# new_title = generate_session_title(session_id)
|
2462 |
+
|
2463 |
+
# # Update session
|
2464 |
+
# session_data["title"] = new_title
|
2465 |
+
# redis_client.setex(
|
2466 |
+
# f"session:{session_id}",
|
2467 |
+
# 86400, # 24 hours
|
2468 |
+
# json.dumps(session_data)
|
2469 |
+
# )
|
2470 |
+
|
2471 |
+
# return {
|
2472 |
+
# "session_id": session_id,
|
2473 |
+
# "new_title": new_title,
|
2474 |
+
# "message": "Session title updated successfully"
|
2475 |
+
# }
|
2476 |
+
|
2477 |
+
# except Exception as e:
|
2478 |
+
# raise HTTPException(status_code=500, detail=f"Error updating session title: {str(e)}")
|
2479 |
+
|
2480 |
+
# @app.get("/redis-info")
|
2481 |
+
# def redis_info():
|
2482 |
+
# """Get Redis connection information"""
|
2483 |
+
# try:
|
2484 |
+
# info = redis_client.info()
|
2485 |
+
# return {
|
2486 |
+
# "redis_connected": True,
|
2487 |
+
# "redis_version": info.get("redis_version"),
|
2488 |
+
# "used_memory": info.get("used_memory_human"),
|
2489 |
+
# "connected_clients": info.get("connected_clients"),
|
2490 |
+
# "total_keys": redis_client.dbsize()
|
2491 |
+
# }
|
2492 |
+
# except Exception as e:
|
2493 |
+
# return {
|
2494 |
+
# "redis_connected": False,
|
2495 |
+
# "error": str(e)
|
2496 |
+
# }
|
2497 |
+
|
2498 |
+
# @app.get("/health")
|
2499 |
+
# def health():
|
2500 |
+
# try:
|
2501 |
+
# redis_client.ping()
|
2502 |
+
# redis_status = "connected"
|
2503 |
+
# except:
|
2504 |
+
# redis_status = "disconnected"
|
2505 |
+
|
2506 |
+
# return {
|
2507 |
+
# "status": "ok",
|
2508 |
+
# "tools": ["document_search", "project_list", "pandas_data_analysis"],
|
2509 |
+
# "agent": "active",
|
2510 |
+
# "session_management": "enabled",
|
2511 |
+
# "redis_status": redis_status,
|
2512 |
+
# "pandas_ai": "enabled",
|
2513 |
+
# "total_sessions": len(list(redis_client.scan_iter(match="session:*")))
|
2514 |
+
# }
|
2515 |
+
|
2516 |
+
# if __name__ == "__main__":
|
2517 |
+
# import uvicorn
|
2518 |
+
# try:
|
2519 |
+
# uvicorn.run(app, host="0.0.0.0", port=8000)
|
2520 |
+
# except KeyboardInterrupt:
|
2521 |
+
# print("\n🛑 Server stopped gracefully")
|
2522 |
+
# except Exception as e:
|
2523 |
+
# print(f"❌ Server error: {e}")
|
2524 |
+
# #
|