srivatsavdamaraju commited on
Commit
26bf124
·
verified ·
1 Parent(s): 41f9404

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1068 -275
app.py CHANGED
@@ -679,45 +679,31 @@
679
 
680
 
681
  #_______________________________________________session id -- with redis - chat session memory _________________________________________________________
682
- # #
683
-
684
-
685
- #______________________________________________sucess true false _pandasai tool_________________________________________________________________
686
-
687
-
 
 
 
 
 
 
 
688
  from dotenv import load_dotenv
689
  from datetime import datetime
690
  import json
691
  import uuid
692
  import redis
693
 
694
- # Pandas AI imports
695
- import re
696
- import urllib.parse
697
- import pandas as pd
698
- import dask.dataframe as dd
699
- from math import ceil
700
- import psycopg2
701
- from pandasai import SmartDataframe
702
- from pandasai.llm.openai import OpenAI as PandasOpenAI
703
-
704
-
705
-
706
-
707
- from fastapi import FastAPI, Request
708
- from fastapi.responses import JSONResponse
709
- import json
710
-
711
-
712
- # Import your existing S3 connection details
713
- from retrive_secrects import * # CONNECTIONS_HOST, etc.
714
-
715
  # Suppress warnings
716
  warnings.filterwarnings("ignore", message="Qdrant client version.*is incompatible.*")
717
 
718
  load_dotenv()
719
 
720
- app = FastAPI(title="AI Agent with Redis Session Management and Pandas AI")
721
 
722
  # Environment variables
723
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
@@ -731,26 +717,6 @@ REDIS_HOST = os.getenv("REDIS_HOST", "127.0.0.1")
731
  REDIS_PORT = int(os.getenv("REDIS_PORT", 6379))
732
  REDIS_PASSWORD = os.getenv("REDIS_PASSWORD")
733
 
734
- # S3 Constants (from your original code)
735
- S3_Bucket_Name = 'ingenspark-user-files'
736
- S3_Raw_Files_Folder = 'User-Uploaded-Raw-Files'
737
- S3_Modified_Files_Folder = 'Modified-Files/'
738
- S3_Output_Files_Folder = 'Output-Files/'
739
- S3_Published_Results_Folder = 'Published-Results/'
740
- S3_Ingen_Customer_Output = 'Ingen-Customer/'
741
- Dominant_Segmentation_Output = 'Dominant-Segmentation/'
742
- Trend_Segmentation_Output = 'Trend-Segmentation/'
743
- Decile_Quartile_segmentation_Output = 'Decile-Quartile-Segmentation/'
744
- Combined_Segmentation_Output = 'Combine-Segmentation/'
745
- Custom_Segmentation_Output = 'Custom-Segmentation/'
746
- Customer_360_Output = 'Customer-360/'
747
- Merge_file_folder = S3_Modified_Files_Folder + 'IngenData-Merged-Tables/'
748
- S3_Dev_Doc_Images_Folder = 'Developers-Documentation-Images/'
749
- S3_Temporary_Files_Folder = S3_Raw_Files_Folder
750
- S3_App_Specific_Data = 'Application-Specific-Data/'
751
- S3_Transformation_Tables_Folder = 'Modified-Files/Modified-Tables/Transformation-Tables/'
752
- cloud_front_url = "https://files.dev.ingenspark.com/"
753
-
754
  # Initialize Redis client
755
  def get_redis_client():
756
  """Initialize Redis client with fallback to local Redis"""
@@ -797,124 +763,6 @@ embedding_model = OpenAIEmbeddings(
797
  qdrant_client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT)
798
  llm = ChatOpenAI(model="gpt-4o", temperature=0, openai_api_key=OPENAI_API_KEY)
799
 
800
- # === PANDAS AI FUNCTIONS ===
801
-
802
- def read_parquet_file_from_s3(ufuid=None, columns_list=None, records_count=None, file_location=''):
803
- """
804
- Reads a Parquet file from S3 using Dask and returns it as a Pandas DataFrame.
805
-
806
- Parameters:
807
- ufuid (int): Optional user_file_upload_id to fetch S3 path from DB.
808
- columns_list (list/str): Columns to read.
809
- records_count (int): Not used currently.
810
- file_location (str): Direct file path in S3.
811
-
812
- Returns:
813
- pandas.DataFrame
814
- """
815
- try:
816
- # Connect to PostgreSQL
817
- conn = psycopg2.connect(
818
- host=CONNECTIONS_HOST,
819
- database=CONNECTIONS_DB,
820
- user=CONNECTIONS_USER,
821
- password=CONNECTIONS_PASS
822
- )
823
- cursor = conn.cursor()
824
-
825
- if ufuid is not None:
826
- query = """SELECT file_name, table_names FROM public.user_file_upload WHERE user_file_upload_id = %s"""
827
- cursor.execute(query, (ufuid,))
828
- file = cursor.fetchone()
829
- if not file:
830
- raise ValueError(f"No file found for ufuid: {ufuid}")
831
- file_name, s3_file_path = file
832
- else:
833
- # Normalize input path
834
- file_location = re.sub(r'\.parquet(?!$)', '', file_location)
835
- s3_file_path = file_location if file_location.endswith('.parquet') else file_location + '.parquet'
836
-
837
- # Extract relative S3 path
838
- s3_file_path = urllib.parse.unquote(s3_file_path.split(f"{S3_Bucket_Name}/")[-1])
839
- if not s3_file_path.endswith('.parquet'):
840
- s3_file_path += '.parquet'
841
-
842
- # Parse columns if given as comma-separated string
843
- if columns_list and not isinstance(columns_list, list):
844
- columns_list = [col.strip(' "\'') for col in columns_list.split(',')]
845
-
846
- print(f"\n{'!' * 100}\nReading from: s3://{S3_Bucket_Name}/{s3_file_path}\n")
847
-
848
- # Read using Dask
849
- ddf = dd.read_parquet(
850
- f"s3://{S3_Bucket_Name}/{s3_file_path}",
851
- engine="pyarrow",
852
- columns=columns_list,
853
- assume_missing=True
854
- )
855
-
856
- ddf = ddf.repartition(npartitions=8) # Optimize for processing
857
- print("Reading Parquet file from S3 completed successfully.")
858
-
859
- # Close database connection
860
- cursor.close()
861
- conn.close()
862
-
863
- return ddf.compute()
864
-
865
- except Exception as e:
866
- print(f"❌ Error reading Parquet file: {e}")
867
- return pd.DataFrame() # Return empty DataFrame on error
868
-
869
- def pandas_agent(filepath: str, query: str) -> str:
870
- """
871
- PandasAI agent that reads data from S3 and answers queries about the data.
872
-
873
- Parameters:
874
- filepath (str): S3 file path or ufuid
875
- query (str): Natural language query about the data
876
-
877
- Returns:
878
- str: Answer from PandasAI
879
- """
880
- try:
881
- # Check if filepath is a number (ufuid) or a file path
882
- if filepath.isdigit():
883
- # It's a ufuid
884
- data = read_parquet_file_from_s3(ufuid=int(filepath))
885
- else:
886
- # It's a file path
887
- data = read_parquet_file_from_s3(file_location=filepath)
888
-
889
- if data.empty:
890
- return "❌ No data found or failed to load the file. Please check the file path or ufuid."
891
-
892
- # Initialize PandasAI LLM
893
- if not OPENAI_API_KEY:
894
- return "❌ OPENAI_API_KEY is not set in environment variables."
895
-
896
- pandas_llm = PandasOpenAI(api_token=OPENAI_API_KEY)
897
-
898
- # Create SmartDataframe
899
- sdf = SmartDataframe(data, config={"llm": pandas_llm})
900
-
901
- # Ask the question
902
- print(f"🔍 Processing query: {query}")
903
- result = sdf.chat(query)
904
-
905
- # Handle different types of results
906
- if isinstance(result, str):
907
- return f"📊 Analysis Result:\n{result}"
908
- elif isinstance(result, (pd.DataFrame, pd.Series)):
909
- return f"📊 Analysis Result:\n{result.to_string()}"
910
- else:
911
- return f"📊 Analysis Result:\n{str(result)}"
912
-
913
- except Exception as e:
914
- error_msg = f"❌ Error in pandas_agent: {str(e)}"
915
- print(error_msg)
916
- return error_msg
917
-
918
  # === INPUT SCHEMAS ===
919
 
920
  class Query(BaseModel):
@@ -932,10 +780,6 @@ class BotQuery(BaseModel):
932
  session_id: Optional[str] = None
933
  message: str
934
 
935
- class PandasAgentQuery(BaseModel):
936
- filepath: str = Field(..., description="S3 file path or ufuid")
937
- query: str = Field(..., description="Natural language query about the data")
938
-
939
  class SessionResponse(BaseModel):
940
  session_id: str
941
  userLoginId: int
@@ -958,36 +802,6 @@ class ChatHistoryResponse(BaseModel):
958
 
959
  # === SESSION MANAGEMENT FUNCTIONS ===
960
 
961
-
962
-
963
-
964
- @app.middleware("http")
965
- async def add_success_flag(request: Request, call_next):
966
- response = await call_next(request)
967
-
968
- # Only modify JSON responses
969
- if "application/json" in response.headers.get("content-type", ""):
970
- try:
971
- body = b"".join([chunk async for chunk in response.body_iterator])
972
- data = json.loads(body.decode())
973
-
974
- # Add success flag
975
- data["success"] = 200 <= response.status_code < 300
976
-
977
- # Build new JSONResponse (auto handles Content-Length)
978
- response = JSONResponse(
979
- content=data,
980
- status_code=response.status_code,
981
- headers={k: v for k, v in response.headers.items() if k.lower() != "content-length"},
982
- )
983
- except Exception:
984
- # fallback if response is not JSON parseable
985
- pass
986
- return response
987
-
988
-
989
-
990
-
991
  def create_session(userLoginId: int, orgId: int, auth_token: str) -> dict:
992
  """Create a new chat session"""
993
  session_id = str(uuid.uuid4())
@@ -1320,31 +1134,6 @@ def get_user_projects(userLoginId: str) -> str:
1320
  except Exception as e:
1321
  return f"❌ Error fetching projects: {str(e)}"
1322
 
1323
- def pandas_data_analysis(query_with_filepath: str) -> str:
1324
- """
1325
- Tool for data analysis using PandasAI.
1326
- Input format: 'filepath|query' where filepath is S3 path or ufuid, and query is the analysis question.
1327
- """
1328
- try:
1329
- # Parse the input to extract filepath and query
1330
- parts = query_with_filepath.split('|', 1)
1331
- if len(parts) != 2:
1332
- return "❌ Invalid input format. Please use: 'filepath|query' format."
1333
-
1334
- filepath, query = parts
1335
- filepath = filepath.strip()
1336
- query = query.strip()
1337
-
1338
- if not filepath or not query:
1339
- return "❌ Both filepath and query are required."
1340
-
1341
- # Use the pandas_agent function
1342
- result = pandas_agent(filepath, query)
1343
- return result
1344
-
1345
- except Exception as e:
1346
- return f"❌ Error in pandas data analysis: {str(e)}"
1347
-
1348
  # === CREATE TOOLS ===
1349
 
1350
  document_search_tool = Tool(
@@ -1364,22 +1153,6 @@ project_list_tool = Tool(
1364
  func=get_user_projects
1365
  )
1366
 
1367
- pandas_analysis_tool = Tool(
1368
- name="pandas_data_analysis",
1369
- description="""Use this tool for data analysis on CSV/Parquet files using PandasAI.
1370
- Perfect for when users ask questions about data analysis, statistics, insights, or want to query their datasets.
1371
- Input format: 'filepath|query' where:
1372
- - filepath: S3 file path (e.g., 'User-Uploaded-Raw-Files/Data2004csv1754926601269756') or ufuid (e.g., '123')
1373
- - query: Natural language question about the data (e.g., 'What are the top 5 values?', 'Show me summary statistics')
1374
-
1375
- Examples:
1376
- - 'User-Uploaded-Raw-Files/mydata.csv|What is this file about?'
1377
- - '123|Show me the first 5 rows'
1378
- - 'Modified-Files/processed_data|What are the most common values in column X?'
1379
- """,
1380
- func=pandas_data_analysis
1381
- )
1382
-
1383
  # === AGENT SETUP ===
1384
 
1385
  def create_agent_with_session_memory(session_id: str):
@@ -1392,13 +1165,11 @@ def create_agent_with_session_memory(session_id: str):
1392
  ("system", """You are a helpful AI assistant with access to multiple tools and conversation memory:
1393
 
1394
  1. **Document Search**: Search through uploaded documents and knowledge base
1395
- 2. **Project Management**: Get list of user projects and project information
1396
- 3. **Data Analysis**: Analyze CSV/Parquet files using PandasAI for insights, statistics, and queries
1397
 
1398
  Your capabilities:
1399
  - Answer messages about documents using the document search tool
1400
  - Help users find their projects and project information
1401
- - Perform data analysis on uploaded datasets using natural language queries
1402
  - Remember previous conversations in this session
1403
  - Provide general assistance and information
1404
  - Use appropriate tools based on user queries
@@ -1406,8 +1177,6 @@ Your capabilities:
1406
  Guidelines:
1407
  - Use the document search tool when users ask about specific content, documentation, or information that might be in uploaded files
1408
  - Use the project tool when users ask about projects, want to see their projects, or need project-related information
1409
- - Use the pandas analysis tool when users ask about data analysis, statistics, insights, or want to query datasets
1410
- - For pandas analysis, you need both a filepath (S3 path or ufuid) and a query - ask for missing information if needed
1411
  - Reference previous conversation context when relevant
1412
  - Be clear about which tool you're using and what information you're providing
1413
  - If you're unsure which tool to use, you can ask for clarification
@@ -1433,7 +1202,7 @@ Remember: Always use the most appropriate tool based on the user's message and c
1433
  memory.chat_memory.add_ai_message(msg["message"])
1434
 
1435
  # Create tools list
1436
- tools = [document_search_tool, project_list_tool, pandas_analysis_tool]
1437
 
1438
  # Create the agent
1439
  agent = create_openai_tools_agent(llm, tools, agent_prompt)
@@ -1632,31 +1401,6 @@ def list_projects(request: ProjectRequest):
1632
  "tool_used": "project_list"
1633
  }
1634
 
1635
- @app.post("/chat-with-pandas-agent")
1636
- def chat_with_pandas_agent(request: PandasAgentQuery):
1637
- """Direct pandas AI agent endpoint for data analysis"""
1638
- try:
1639
- result = pandas_agent(request.filepath, request.query)
1640
-
1641
- return {
1642
- "filepath": request.filepath,
1643
- "query": request.query,
1644
- "answer": result,
1645
- "tool_used": "pandas_agent",
1646
- "timestamp": datetime.now().isoformat()
1647
- }
1648
-
1649
- except Exception as e:
1650
- error_msg = f"An error occurred: {str(e)}"
1651
- return {
1652
- "filepath": request.filepath,
1653
- "query": request.query,
1654
- "answer": error_msg,
1655
- "tool_used": "pandas_agent",
1656
- "error": True,
1657
- "timestamp": datetime.now().isoformat()
1658
- }
1659
-
1660
  @app.put("/sessions/{session_id}/title")
1661
  def refresh_session_title(session_id: str):
1662
  """Manually refresh/regenerate session title"""
@@ -1712,11 +1456,10 @@ def health():
1712
 
1713
  return {
1714
  "status": "ok",
1715
- "tools": ["document_search", "project_list", "pandas_data_analysis"],
1716
  "agent": "active",
1717
  "session_management": "enabled",
1718
  "redis_status": redis_status,
1719
- "pandas_ai": "enabled",
1720
  "total_sessions": len(list(redis_client.scan_iter(match="session:*")))
1721
  }
1722
 
@@ -1728,4 +1471,1054 @@ if __name__ == "__main__":
1728
  print("\n🛑 Server stopped gracefully")
1729
  except Exception as e:
1730
  print(f"❌ Server error: {e}")
1731
- #
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
679
 
680
 
681
  #_______________________________________________session id -- with redis - chat session memory _________________________________________________________
682
+ #
683
+ from fastapi import FastAPI, HTTPException, Query as QueryParam
684
+ from pydantic import BaseModel, Field
685
+ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
686
+ from qdrant_client import QdrantClient
687
+ from langchain.agents import Tool, AgentExecutor, create_openai_tools_agent
688
+ from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
689
+ from langchain.memory import ConversationBufferMemory
690
+ from typing import Optional, List, Dict, Any
691
+ import os
692
+ import warnings
693
+ import base64
694
+ import requests
695
  from dotenv import load_dotenv
696
  from datetime import datetime
697
  import json
698
  import uuid
699
  import redis
700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
701
  # Suppress warnings
702
  warnings.filterwarnings("ignore", message="Qdrant client version.*is incompatible.*")
703
 
704
  load_dotenv()
705
 
706
+ app = FastAPI(title="AI Agent with Redis Session Management")
707
 
708
  # Environment variables
709
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 
717
  REDIS_PORT = int(os.getenv("REDIS_PORT", 6379))
718
  REDIS_PASSWORD = os.getenv("REDIS_PASSWORD")
719
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
720
  # Initialize Redis client
721
  def get_redis_client():
722
  """Initialize Redis client with fallback to local Redis"""
 
763
  qdrant_client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT)
764
  llm = ChatOpenAI(model="gpt-4o", temperature=0, openai_api_key=OPENAI_API_KEY)
765
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
766
  # === INPUT SCHEMAS ===
767
 
768
  class Query(BaseModel):
 
780
  session_id: Optional[str] = None
781
  message: str
782
 
 
 
 
 
783
  class SessionResponse(BaseModel):
784
  session_id: str
785
  userLoginId: int
 
802
 
803
  # === SESSION MANAGEMENT FUNCTIONS ===
804
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
805
  def create_session(userLoginId: int, orgId: int, auth_token: str) -> dict:
806
  """Create a new chat session"""
807
  session_id = str(uuid.uuid4())
 
1134
  except Exception as e:
1135
  return f"❌ Error fetching projects: {str(e)}"
1136
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1137
  # === CREATE TOOLS ===
1138
 
1139
  document_search_tool = Tool(
 
1153
  func=get_user_projects
1154
  )
1155
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1156
  # === AGENT SETUP ===
1157
 
1158
  def create_agent_with_session_memory(session_id: str):
 
1165
  ("system", """You are a helpful AI assistant with access to multiple tools and conversation memory:
1166
 
1167
  1. **Document Search**: Search through uploaded documents and knowledge base
1168
+ 2. **Project Management**: Get list of user projects and project information
 
1169
 
1170
  Your capabilities:
1171
  - Answer messages about documents using the document search tool
1172
  - Help users find their projects and project information
 
1173
  - Remember previous conversations in this session
1174
  - Provide general assistance and information
1175
  - Use appropriate tools based on user queries
 
1177
  Guidelines:
1178
  - Use the document search tool when users ask about specific content, documentation, or information that might be in uploaded files
1179
  - Use the project tool when users ask about projects, want to see their projects, or need project-related information
 
 
1180
  - Reference previous conversation context when relevant
1181
  - Be clear about which tool you're using and what information you're providing
1182
  - If you're unsure which tool to use, you can ask for clarification
 
1202
  memory.chat_memory.add_ai_message(msg["message"])
1203
 
1204
  # Create tools list
1205
+ tools = [document_search_tool, project_list_tool]
1206
 
1207
  # Create the agent
1208
  agent = create_openai_tools_agent(llm, tools, agent_prompt)
 
1401
  "tool_used": "project_list"
1402
  }
1403
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1404
  @app.put("/sessions/{session_id}/title")
1405
  def refresh_session_title(session_id: str):
1406
  """Manually refresh/regenerate session title"""
 
1456
 
1457
  return {
1458
  "status": "ok",
1459
+ "tools": ["document_search", "project_list"],
1460
  "agent": "active",
1461
  "session_management": "enabled",
1462
  "redis_status": redis_status,
 
1463
  "total_sessions": len(list(redis_client.scan_iter(match="session:*")))
1464
  }
1465
 
 
1471
  print("\n🛑 Server stopped gracefully")
1472
  except Exception as e:
1473
  print(f"❌ Server error: {e}")
1474
+
1475
+
1476
+
1477
+
1478
+ #______________________________________________sucess true false _pandasai tool_________________________________________________________________
1479
+
1480
+
1481
+ # from dotenv import load_dotenv
1482
+ # from datetime import datetime
1483
+ # import json
1484
+ # import uuid
1485
+ # import redis
1486
+
1487
+ # # Pandas AI imports
1488
+ # import re
1489
+ # import urllib.parse
1490
+ # import pandas as pd
1491
+ # import dask.dataframe as dd
1492
+ # from math import ceil
1493
+ # import psycopg2
1494
+ # from pandasai import SmartDataframe
1495
+ # from pandasai.llm.openai import OpenAI as PandasOpenAI
1496
+
1497
+
1498
+
1499
+
1500
+ # from fastapi import FastAPI, Request
1501
+ # from fastapi.responses import JSONResponse
1502
+ # import json
1503
+
1504
+
1505
+ # # Import your existing S3 connection details
1506
+ # from retrive_secrects import * # CONNECTIONS_HOST, etc.
1507
+
1508
+ # # Suppress warnings
1509
+ # warnings.filterwarnings("ignore", message="Qdrant client version.*is incompatible.*")
1510
+
1511
+ # load_dotenv()
1512
+
1513
+ # app = FastAPI(title="AI Agent with Redis Session Management and Pandas AI")
1514
+
1515
+ # # Environment variables
1516
+ # OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
1517
+ # QDRANT_COLLECTION_NAME = os.getenv("QDRANT_COLLECTION_NAME", "vatsav_test_1")
1518
+ # QDRANT_HOST = os.getenv("QDRANT_HOST", "127.0.0.1")
1519
+ # QDRANT_PORT = int(os.getenv("QDRANT_PORT", 6333))
1520
+
1521
+ # # Redis Configuration
1522
+ # REDIS_URL = os.getenv("REDIS_URL")
1523
+ # REDIS_HOST = os.getenv("REDIS_HOST", "127.0.0.1")
1524
+ # REDIS_PORT = int(os.getenv("REDIS_PORT", 6379))
1525
+ # REDIS_PASSWORD = os.getenv("REDIS_PASSWORD")
1526
+
1527
+ # # S3 Constants (from your original code)
1528
+ # S3_Bucket_Name = 'ingenspark-user-files'
1529
+ # S3_Raw_Files_Folder = 'User-Uploaded-Raw-Files'
1530
+ # S3_Modified_Files_Folder = 'Modified-Files/'
1531
+ # S3_Output_Files_Folder = 'Output-Files/'
1532
+ # S3_Published_Results_Folder = 'Published-Results/'
1533
+ # S3_Ingen_Customer_Output = 'Ingen-Customer/'
1534
+ # Dominant_Segmentation_Output = 'Dominant-Segmentation/'
1535
+ # Trend_Segmentation_Output = 'Trend-Segmentation/'
1536
+ # Decile_Quartile_segmentation_Output = 'Decile-Quartile-Segmentation/'
1537
+ # Combined_Segmentation_Output = 'Combine-Segmentation/'
1538
+ # Custom_Segmentation_Output = 'Custom-Segmentation/'
1539
+ # Customer_360_Output = 'Customer-360/'
1540
+ # Merge_file_folder = S3_Modified_Files_Folder + 'IngenData-Merged-Tables/'
1541
+ # S3_Dev_Doc_Images_Folder = 'Developers-Documentation-Images/'
1542
+ # S3_Temporary_Files_Folder = S3_Raw_Files_Folder
1543
+ # S3_App_Specific_Data = 'Application-Specific-Data/'
1544
+ # S3_Transformation_Tables_Folder = 'Modified-Files/Modified-Tables/Transformation-Tables/'
1545
+ # cloud_front_url = "https://files.dev.ingenspark.com/"
1546
+
1547
+ # # Initialize Redis client
1548
+ # def get_redis_client():
1549
+ # """Initialize Redis client with fallback to local Redis"""
1550
+ # try:
1551
+ # if REDIS_URL:
1552
+ # # Use deployed Redis URL
1553
+ # redis_client = redis.from_url(
1554
+ # REDIS_URL,
1555
+ # decode_responses=True,
1556
+ # socket_connect_timeout=5,
1557
+ # socket_timeout=5
1558
+ # )
1559
+ # # Test connection
1560
+ # redis_client.ping()
1561
+ # print(f"✅ Connected to deployed Redis: {REDIS_URL}")
1562
+ # return redis_client
1563
+ # else:
1564
+ # # Use local Redis
1565
+ # redis_client = redis.StrictRedis(
1566
+ # host=REDIS_HOST,
1567
+ # port=REDIS_PORT,
1568
+ # password=REDIS_PASSWORD,
1569
+ # decode_responses=True,
1570
+ # socket_connect_timeout=5,
1571
+ # socket_timeout=5
1572
+ # )
1573
+ # # Test connection
1574
+ # redis_client.ping()
1575
+ # print(f"✅ Connected to local Redis: {REDIS_HOST}:{REDIS_PORT}")
1576
+ # return redis_client
1577
+ # except Exception as e:
1578
+ # print(f"❌ Redis connection failed: {e}")
1579
+ # raise HTTPException(status_code=500, detail=f"Redis connection failed: {str(e)}")
1580
+
1581
+ # # Initialize Redis client
1582
+ # redis_client = get_redis_client()
1583
+
1584
+ # # Initialize models
1585
+ # embedding_model = OpenAIEmbeddings(
1586
+ # model="text-embedding-3-large",
1587
+ # openai_api_key=OPENAI_API_KEY,
1588
+ # )
1589
+
1590
+ # qdrant_client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT)
1591
+ # llm = ChatOpenAI(model="gpt-4o", temperature=0, openai_api_key=OPENAI_API_KEY)
1592
+
1593
+ # # === PANDAS AI FUNCTIONS ===
1594
+
1595
+ # def read_parquet_file_from_s3(ufuid=None, columns_list=None, records_count=None, file_location=''):
1596
+ # """
1597
+ # Reads a Parquet file from S3 using Dask and returns it as a Pandas DataFrame.
1598
+
1599
+ # Parameters:
1600
+ # ufuid (int): Optional user_file_upload_id to fetch S3 path from DB.
1601
+ # columns_list (list/str): Columns to read.
1602
+ # records_count (int): Not used currently.
1603
+ # file_location (str): Direct file path in S3.
1604
+
1605
+ # Returns:
1606
+ # pandas.DataFrame
1607
+ # """
1608
+ # try:
1609
+ # # Connect to PostgreSQL
1610
+ # conn = psycopg2.connect(
1611
+ # host=CONNECTIONS_HOST,
1612
+ # database=CONNECTIONS_DB,
1613
+ # user=CONNECTIONS_USER,
1614
+ # password=CONNECTIONS_PASS
1615
+ # )
1616
+ # cursor = conn.cursor()
1617
+
1618
+ # if ufuid is not None:
1619
+ # query = """SELECT file_name, table_names FROM public.user_file_upload WHERE user_file_upload_id = %s"""
1620
+ # cursor.execute(query, (ufuid,))
1621
+ # file = cursor.fetchone()
1622
+ # if not file:
1623
+ # raise ValueError(f"No file found for ufuid: {ufuid}")
1624
+ # file_name, s3_file_path = file
1625
+ # else:
1626
+ # # Normalize input path
1627
+ # file_location = re.sub(r'\.parquet(?!$)', '', file_location)
1628
+ # s3_file_path = file_location if file_location.endswith('.parquet') else file_location + '.parquet'
1629
+
1630
+ # # Extract relative S3 path
1631
+ # s3_file_path = urllib.parse.unquote(s3_file_path.split(f"{S3_Bucket_Name}/")[-1])
1632
+ # if not s3_file_path.endswith('.parquet'):
1633
+ # s3_file_path += '.parquet'
1634
+
1635
+ # # Parse columns if given as comma-separated string
1636
+ # if columns_list and not isinstance(columns_list, list):
1637
+ # columns_list = [col.strip(' "\'') for col in columns_list.split(',')]
1638
+
1639
+ # print(f"\n{'!' * 100}\nReading from: s3://{S3_Bucket_Name}/{s3_file_path}\n")
1640
+
1641
+ # # Read using Dask
1642
+ # ddf = dd.read_parquet(
1643
+ # f"s3://{S3_Bucket_Name}/{s3_file_path}",
1644
+ # engine="pyarrow",
1645
+ # columns=columns_list,
1646
+ # assume_missing=True
1647
+ # )
1648
+
1649
+ # ddf = ddf.repartition(npartitions=8) # Optimize for processing
1650
+ # print("Reading Parquet file from S3 completed successfully.")
1651
+
1652
+ # # Close database connection
1653
+ # cursor.close()
1654
+ # conn.close()
1655
+
1656
+ # return ddf.compute()
1657
+
1658
+ # except Exception as e:
1659
+ # print(f"❌ Error reading Parquet file: {e}")
1660
+ # return pd.DataFrame() # Return empty DataFrame on error
1661
+
1662
+ # def pandas_agent(filepath: str, query: str) -> str:
1663
+ # """
1664
+ # PandasAI agent that reads data from S3 and answers queries about the data.
1665
+
1666
+ # Parameters:
1667
+ # filepath (str): S3 file path or ufuid
1668
+ # query (str): Natural language query about the data
1669
+
1670
+ # Returns:
1671
+ # str: Answer from PandasAI
1672
+ # """
1673
+ # try:
1674
+ # # Check if filepath is a number (ufuid) or a file path
1675
+ # if filepath.isdigit():
1676
+ # # It's a ufuid
1677
+ # data = read_parquet_file_from_s3(ufuid=int(filepath))
1678
+ # else:
1679
+ # # It's a file path
1680
+ # data = read_parquet_file_from_s3(file_location=filepath)
1681
+
1682
+ # if data.empty:
1683
+ # return "❌ No data found or failed to load the file. Please check the file path or ufuid."
1684
+
1685
+ # # Initialize PandasAI LLM
1686
+ # if not OPENAI_API_KEY:
1687
+ # return "❌ OPENAI_API_KEY is not set in environment variables."
1688
+
1689
+ # pandas_llm = PandasOpenAI(api_token=OPENAI_API_KEY)
1690
+
1691
+ # # Create SmartDataframe
1692
+ # sdf = SmartDataframe(data, config={"llm": pandas_llm})
1693
+
1694
+ # # Ask the question
1695
+ # print(f"🔍 Processing query: {query}")
1696
+ # result = sdf.chat(query)
1697
+
1698
+ # # Handle different types of results
1699
+ # if isinstance(result, str):
1700
+ # return f"📊 Analysis Result:\n{result}"
1701
+ # elif isinstance(result, (pd.DataFrame, pd.Series)):
1702
+ # return f"📊 Analysis Result:\n{result.to_string()}"
1703
+ # else:
1704
+ # return f"📊 Analysis Result:\n{str(result)}"
1705
+
1706
+ # except Exception as e:
1707
+ # error_msg = f"❌ Error in pandas_agent: {str(e)}"
1708
+ # print(error_msg)
1709
+ # return error_msg
1710
+
1711
+ # # === INPUT SCHEMAS ===
1712
+
1713
+ # class Query(BaseModel):
1714
+ # message: str
1715
+
1716
+ # class ProjectRequest(BaseModel):
1717
+ # userLoginId: int
1718
+ # orgId: int
1719
+ # auth_token: str
1720
+
1721
+ # class BotQuery(BaseModel):
1722
+ # userLoginId: int
1723
+ # orgId: int
1724
+ # auth_token: str
1725
+ # session_id: Optional[str] = None
1726
+ # message: str
1727
+
1728
+ # class PandasAgentQuery(BaseModel):
1729
+ # filepath: str = Field(..., description="S3 file path or ufuid")
1730
+ # query: str = Field(..., description="Natural language query about the data")
1731
+
1732
+ # class SessionResponse(BaseModel):
1733
+ # session_id: str
1734
+ # userLoginId: int
1735
+ # orgId: int
1736
+ # created_at: str
1737
+ # status: str
1738
+ # title: Optional[str] = "New Chat"
1739
+
1740
+ # class MessageResponse(BaseModel):
1741
+ # message_id: str
1742
+ # session_id: str
1743
+ # role: str # "user" or "assistant"
1744
+ # message: str
1745
+ # timestamp: str
1746
+
1747
+ # class ChatHistoryResponse(BaseModel):
1748
+ # session_id: str
1749
+ # messages: List[MessageResponse]
1750
+ # total_messages: int
1751
+
1752
+ # # === SESSION MANAGEMENT FUNCTIONS ===
1753
+
1754
+
1755
+
1756
+
1757
+ # @app.middleware("http")
1758
+ # async def add_success_flag(request: Request, call_next):
1759
+ # response = await call_next(request)
1760
+
1761
+ # # Only modify JSON responses
1762
+ # if "application/json" in response.headers.get("content-type", ""):
1763
+ # try:
1764
+ # body = b"".join([chunk async for chunk in response.body_iterator])
1765
+ # data = json.loads(body.decode())
1766
+
1767
+ # # Add success flag
1768
+ # data["success"] = 200 <= response.status_code < 300
1769
+
1770
+ # # Build new JSONResponse (auto handles Content-Length)
1771
+ # response = JSONResponse(
1772
+ # content=data,
1773
+ # status_code=response.status_code,
1774
+ # headers={k: v for k, v in response.headers.items() if k.lower() != "content-length"},
1775
+ # )
1776
+ # except Exception:
1777
+ # # fallback if response is not JSON parseable
1778
+ # pass
1779
+ # return response
1780
+
1781
+
1782
+
1783
+
1784
+ # def create_session(userLoginId: int, orgId: int, auth_token: str) -> dict:
1785
+ # """Create a new chat session"""
1786
+ # session_id = str(uuid.uuid4())
1787
+ # session_data = {
1788
+ # "session_id": session_id,
1789
+ # "userLoginId": userLoginId,
1790
+ # "orgId": orgId,
1791
+ # "auth_token": auth_token,
1792
+ # "created_at": datetime.now().isoformat(),
1793
+ # "status": "active",
1794
+ # "title": "New Chat" # Default title, will be updated after first message
1795
+ # }
1796
+
1797
+ # # Store session in Redis with 24 hour TTL
1798
+ # redis_client.setex(
1799
+ # f"session:{session_id}",
1800
+ # 86400, # 24 hours
1801
+ # json.dumps(session_data)
1802
+ # )
1803
+
1804
+ # # Initialize empty chat history
1805
+ # redis_client.setex(
1806
+ # f"chat:{session_id}",
1807
+ # 86400, # 24 hours
1808
+ # json.dumps([])
1809
+ # )
1810
+
1811
+ # # Initialize conversation memory
1812
+ # redis_client.setex(
1813
+ # f"memory:{session_id}",
1814
+ # 86400, # 24 hours
1815
+ # json.dumps([])
1816
+ # )
1817
+
1818
+ # return session_data
1819
+
1820
+ # def get_session(session_id: str) -> dict:
1821
+ # """Get session data from Redis"""
1822
+ # session_data = redis_client.get(f"session:{session_id}")
1823
+ # if not session_data:
1824
+ # raise HTTPException(status_code=404, detail="Session not found or expired")
1825
+ # return json.loads(session_data)
1826
+
1827
+ # def add_message_to_session(session_id: str, role: str, message: str) -> str:
1828
+ # """Add message to session chat history"""
1829
+ # message_id = str(uuid.uuid4())
1830
+ # message_data = {
1831
+ # "message_id": message_id,
1832
+ # "session_id": session_id,
1833
+ # "role": role,
1834
+ # "message": message,
1835
+ # "timestamp": datetime.now().isoformat()
1836
+ # }
1837
+
1838
+ # # Get current chat history
1839
+ # chat_history = redis_client.get(f"chat:{session_id}")
1840
+ # if chat_history:
1841
+ # messages = json.loads(chat_history)
1842
+ # else:
1843
+ # messages = []
1844
+
1845
+ # # Add new message
1846
+ # messages.append(message_data)
1847
+
1848
+ # # Update chat history in Redis with extended TTL
1849
+ # redis_client.setex(
1850
+ # f"chat:{session_id}",
1851
+ # 86400, # 24 hours
1852
+ # json.dumps(messages)
1853
+ # )
1854
+
1855
+ # return message_id
1856
+
1857
+ # def get_session_memory(session_id: str) -> List[Dict]:
1858
+ # """Get conversation memory for session"""
1859
+ # memory_data = redis_client.get(f"memory:{session_id}")
1860
+ # if memory_data:
1861
+ # return json.loads(memory_data)
1862
+ # return []
1863
+
1864
+ # def update_session_memory(session_id: str, messages: List[Dict]):
1865
+ # """Update conversation memory for session"""
1866
+ # redis_client.setex(
1867
+ # f"memory:{session_id}",
1868
+ # 86400, # 24 hours
1869
+ # json.dumps(messages)
1870
+ # )
1871
+
1872
+ # def update_session_title(session_id: str):
1873
+ # """Update session title after first message"""
1874
+ # try:
1875
+ # # Get session data
1876
+ # session_data = redis_client.get(f"session:{session_id}")
1877
+ # if not session_data:
1878
+ # return
1879
+
1880
+ # session = json.loads(session_data)
1881
+
1882
+ # # Only update if current title is "New Chat"
1883
+ # if session.get("title", "New Chat") == "New Chat":
1884
+ # new_title = generate_session_title(session_id)
1885
+ # session["title"] = new_title
1886
+
1887
+ # # Update session in Redis
1888
+ # redis_client.setex(
1889
+ # f"session:{session_id}",
1890
+ # 86400, # 24 hours
1891
+ # json.dumps(session)
1892
+ # )
1893
+
1894
+ # except Exception as e:
1895
+ # print(f"Error updating session title: {e}")
1896
+ # pass # Don't fail the request if title update fails
1897
+
1898
+ # def generate_session_title(session_id: str) -> str:
1899
+ # """Generate a title for the session based on chat history"""
1900
+ # try:
1901
+ # # Get chat history
1902
+ # chat_data = redis_client.get(f"chat:{session_id}")
1903
+ # if not chat_data:
1904
+ # return "New Chat"
1905
+
1906
+ # messages = json.loads(chat_data)
1907
+ # if not messages:
1908
+ # return "New Chat"
1909
+
1910
+ # # Get first user message for title generation
1911
+ # first_user_message = None
1912
+ # for msg in messages:
1913
+ # if msg["role"] == "user":
1914
+ # first_user_message = msg["message"]
1915
+ # break
1916
+
1917
+ # if not first_user_message:
1918
+ # return "New Chat"
1919
+
1920
+ # # Generate title using LLM
1921
+ # title_prompt = f"""Generate a short, descriptive title (maximum 6 words) for a chat conversation that starts with this message:
1922
+
1923
+ # "{first_user_message[:200]}"
1924
+
1925
+ # Return only the title, no quotes or additional text. The title should capture the main topic or intent of the conversation."""
1926
+
1927
+ # try:
1928
+ # response = llm.invoke(title_prompt)
1929
+ # title = response.content.strip()
1930
+
1931
+ # # Clean and limit title
1932
+ # title = title.replace('"', '').replace("'", "")
1933
+ # if len(title) > 50:
1934
+ # title = title[:47] + "..."
1935
+
1936
+ # return title if title else "New Chat"
1937
+
1938
+ # except Exception as e:
1939
+ # print(f"Error generating title: {e}")
1940
+ # # Fallback: use first few words of the message
1941
+ # words = first_user_message.split()[:4]
1942
+ # return " ".join(words) + ("..." if len(words) >= 4 else "")
1943
+
1944
+ # except Exception as e:
1945
+ # print(f"Error in generate_session_title: {e}")
1946
+ # return "New Chat"
1947
+
1948
+ # def get_user_sessions(userLoginId: int) -> List[dict]:
1949
+ # """Get all sessions for a user with generated titles"""
1950
+ # sessions = []
1951
+ # # Scan for all session keys
1952
+ # for key in redis_client.scan_iter(match="session:*"):
1953
+ # session_data = redis_client.get(key)
1954
+ # if session_data:
1955
+ # session = json.loads(session_data)
1956
+ # if session["userLoginId"] == userLoginId:
1957
+ # # Generate title based on chat history
1958
+ # session["title"] = generate_session_title(session["session_id"])
1959
+ # sessions.append(session)
1960
+
1961
+ # # Sort sessions by created_at (most recent first)
1962
+ # sessions.sort(key=lambda x: x["created_at"], reverse=True)
1963
+ # return sessions
1964
+
1965
+ # def delete_session(session_id: str):
1966
+ # """Delete session and associated data"""
1967
+ # # Delete session data
1968
+ # redis_client.delete(f"session:{session_id}")
1969
+ # # Delete chat history
1970
+ # redis_client.delete(f"chat:{session_id}")
1971
+ # # Delete memory
1972
+ # redis_client.delete(f"memory:{session_id}")
1973
+
1974
+ # # === UTILITY FUNCTIONS ===
1975
+
1976
+ # def get_encoded_auth_token(user: int, token: str) -> str:
1977
+ # auth_string = f"{user}:{token}"
1978
+ # return base64.b64encode(auth_string.encode("utf-8")).decode("utf-8")
1979
+
1980
+ # def fetch_user_projects(userLoginId: int, orgId: int, auth_token: str):
1981
+ # url = "https://japidemo.dev.ingenspark.com/fetchUserProjects"
1982
+ # payload = {
1983
+ # "userLoginId": userLoginId,
1984
+ # "orgId": orgId
1985
+ # }
1986
+
1987
+ # headers = {
1988
+ # 'accept': 'application/json, text/plain, */*',
1989
+ # 'authorization': f'Basic {auth_token}',
1990
+ # 'content-type': 'application/json; charset=UTF-8'
1991
+ # }
1992
+
1993
+ # try:
1994
+ # response = requests.post(url, headers=headers, json=payload)
1995
+ # response.raise_for_status()
1996
+ # return response.json()
1997
+ # except requests.exceptions.RequestException as e:
1998
+ # raise HTTPException(status_code=response.status_code if 'response' in locals() else 500,
1999
+ # detail=str(e))
2000
+
2001
+ # def format_project_response(data: dict) -> str:
2002
+ # my_projects = data.get("data", {}).get("Myprojects", [])
2003
+ # other_projects = data.get("data", {}).get("Otherprojects", [])
2004
+
2005
+ # all_projects = []
2006
+
2007
+ # for project in my_projects:
2008
+ # all_projects.append({
2009
+ # "type": "Your Project",
2010
+ # "projectNm": project["projectNm"],
2011
+ # "projectId": project["projectId"],
2012
+ # "created_dttm": project["created_dttm"].split('.')[0],
2013
+ # "description": project["description"],
2014
+ # "categoryName": project["categoryName"]
2015
+ # })
2016
+
2017
+ # for project in other_projects:
2018
+ # all_projects.append({
2019
+ # "type": "Other Project",
2020
+ # "projectNm": project["projectNm"],
2021
+ # "projectId": project["projectId"],
2022
+ # "created_dttm": project["created_dttm"].split('.')[0],
2023
+ # "description": project["description"],
2024
+ # "categoryName": project["categoryName"]
2025
+ # })
2026
+
2027
+ # if not all_projects:
2028
+ # return "❌ No projects found."
2029
+
2030
+ # # Build the formatted string
2031
+ # result = [f"✅ You have access to {len(all_projects)} project(s):\n"]
2032
+ # for i, project in enumerate(all_projects, 1):
2033
+ # result.append(f"{i}. Project Name: {project['projectNm']} ({project['type']})")
2034
+ # result.append(f" Project ID: {project['projectId']}")
2035
+ # result.append(f" Created On: {project['created_dttm']}")
2036
+ # result.append(f" Description: {project['description']}")
2037
+ # result.append(f" Category: {project['categoryName']}\n")
2038
+ # return "\n".join(result)
2039
+
2040
+ # # === TOOL FUNCTIONS ===
2041
+
2042
+ # def search_documents(query: str) -> str:
2043
+ # """Search through ingested documents and get relevant information."""
2044
+ # try:
2045
+ # # Generate embedding for the query
2046
+ # query_vector = embedding_model.embed_query(query)
2047
+
2048
+ # # Search in Qdrant
2049
+ # search_result = qdrant_client.search(
2050
+ # collection_name=QDRANT_COLLECTION_NAME,
2051
+ # query_vector=query_vector,
2052
+ # limit=5,
2053
+ # )
2054
+
2055
+ # if not search_result:
2056
+ # return "No relevant information found in the knowledge base."
2057
+
2058
+ # # Convert results to text content
2059
+ # context_texts = []
2060
+ # sources = []
2061
+
2062
+ # for hit in search_result:
2063
+ # context_texts.append(hit.payload["text"])
2064
+ # sources.append(hit.payload.get("source", "unknown"))
2065
+
2066
+ # # Create a simple prompt for answering based on context
2067
+ # context = "\n\n".join(context_texts)
2068
+ # unique_sources = list(set(sources))
2069
+
2070
+ # # Use the LLM directly to answer the message based on context
2071
+ # prompt = f"""Based on the following context, answer the message: {query}
2072
+
2073
+ # Context:
2074
+ # {context}
2075
+
2076
+ # Please provide a comprehensive answer based on the context above. If the context doesn't contain enough information to answer the message, say so clearly."""
2077
+
2078
+ # response = llm.invoke(prompt)
2079
+
2080
+ # return f"{response.content}\n\nSources: {', '.join(unique_sources)}"
2081
+
2082
+ # except Exception as e:
2083
+ # return f"Error searching documents: {str(e)}"
2084
+
2085
+ # # Global variables to store auth context (for tool functions)
2086
+ # _current_user_id = None
2087
+ # _current_org_id = None
2088
+ # _current_auth_token = None
2089
+
2090
+ # def get_user_projects(userLoginId: str) -> str:
2091
+ # """Get list of projects for a user."""
2092
+ # try:
2093
+ # # Use global auth context if available
2094
+ # if _current_auth_token and _current_user_id:
2095
+ # user_id = _current_user_id
2096
+ # org_id = _current_org_id or 1
2097
+ # auth_token = _current_auth_token
2098
+ # else:
2099
+ # return "❌ Authentication token required. Please provide auth_token in your request."
2100
+
2101
+ # # Encode auth token using the actual user ID and provided token
2102
+ # encoded_token = get_encoded_auth_token(user_id, auth_token)
2103
+
2104
+ # # Fetch projects
2105
+ # data = fetch_user_projects(user_id, org_id, encoded_token)
2106
+
2107
+ # # Format and return the project list
2108
+ # formatted = format_project_response(data)
2109
+ # return formatted
2110
+
2111
+ # except ValueError:
2112
+ # return "❌ Invalid userLoginId format. Please provide a valid number."
2113
+ # except Exception as e:
2114
+ # return f"❌ Error fetching projects: {str(e)}"
2115
+
2116
+ # def pandas_data_analysis(query_with_filepath: str) -> str:
2117
+ # """
2118
+ # Tool for data analysis using PandasAI.
2119
+ # Input format: 'filepath|query' where filepath is S3 path or ufuid, and query is the analysis question.
2120
+ # """
2121
+ # try:
2122
+ # # Parse the input to extract filepath and query
2123
+ # parts = query_with_filepath.split('|', 1)
2124
+ # if len(parts) != 2:
2125
+ # return "❌ Invalid input format. Please use: 'filepath|query' format."
2126
+
2127
+ # filepath, query = parts
2128
+ # filepath = filepath.strip()
2129
+ # query = query.strip()
2130
+
2131
+ # if not filepath or not query:
2132
+ # return "❌ Both filepath and query are required."
2133
+
2134
+ # # Use the pandas_agent function
2135
+ # result = pandas_agent(filepath, query)
2136
+ # return result
2137
+
2138
+ # except Exception as e:
2139
+ # return f"❌ Error in pandas data analysis: {str(e)}"
2140
+
2141
+ # # === CREATE TOOLS ===
2142
+
2143
+ # document_search_tool = Tool(
2144
+ # name="document_search",
2145
+ # description="""Use this tool to search through ingested documents and get relevant information from the knowledge base.
2146
+ # Perfect for answering messages about uploaded documents, manuals, or any content that was previously stored.
2147
+ # Input should be a search query or message about the documents.""",
2148
+ # func=search_documents
2149
+ # )
2150
+
2151
+ # project_list_tool = Tool(
2152
+ # name="get_user_projects",
2153
+ # description="""Use this tool to get the list of projects for a user.
2154
+ # Perfect for when users ask about their projects, want to see available projects, or need project information.
2155
+ # Input should be the userLoginId (e.g., '25').
2156
+ # Note: This tool requires authentication context to be set.""",
2157
+ # func=get_user_projects
2158
+ # )
2159
+
2160
+ # pandas_analysis_tool = Tool(
2161
+ # name="pandas_data_analysis",
2162
+ # description="""Use this tool for data analysis on CSV/Parquet files using PandasAI.
2163
+ # Perfect for when users ask questions about data analysis, statistics, insights, or want to query their datasets.
2164
+ # Input format: 'filepath|query' where:
2165
+ # - filepath: S3 file path (e.g., 'User-Uploaded-Raw-Files/Data2004csv1754926601269756') or ufuid (e.g., '123')
2166
+ # - query: Natural language question about the data (e.g., 'What are the top 5 values?', 'Show me summary statistics')
2167
+
2168
+ # Examples:
2169
+ # - 'User-Uploaded-Raw-Files/mydata.csv|What is this file about?'
2170
+ # - '123|Show me the first 5 rows'
2171
+ # - 'Modified-Files/processed_data|What are the most common values in column X?'
2172
+ # """,
2173
+ # func=pandas_data_analysis
2174
+ # )
2175
+
2176
+ # # === AGENT SETUP ===
2177
+
2178
+ # def create_agent_with_session_memory(session_id: str):
2179
+ # """Create agent with session memory from Redis"""
2180
+
2181
+ # # Get memory from Redis
2182
+ # memory_messages = get_session_memory(session_id)
2183
+
2184
+ # agent_prompt = ChatPromptTemplate.from_messages([
2185
+ # ("system", """You are a helpful AI assistant with access to multiple tools and conversation memory:
2186
+
2187
+ # 1. **Document Search**: Search through uploaded documents and knowledge base
2188
+ # 2. **Project Management**: Get list of user projects and project information
2189
+ # 3. **Data Analysis**: Analyze CSV/Parquet files using PandasAI for insights, statistics, and queries
2190
+
2191
+ # Your capabilities:
2192
+ # - Answer messages about documents using the document search tool
2193
+ # - Help users find their projects and project information
2194
+ # - Perform data analysis on uploaded datasets using natural language queries
2195
+ # - Remember previous conversations in this session
2196
+ # - Provide general assistance and information
2197
+ # - Use appropriate tools based on user queries
2198
+
2199
+ # Guidelines:
2200
+ # - Use the document search tool when users ask about specific content, documentation, or information that might be in uploaded files
2201
+ # - Use the project tool when users ask about projects, want to see their projects, or need project-related information
2202
+ # - Use the pandas analysis tool when users ask about data analysis, statistics, insights, or want to query datasets
2203
+ # - For pandas analysis, you need both a filepath (S3 path or ufuid) and a query - ask for missing information if needed
2204
+ # - Reference previous conversation context when relevant
2205
+ # - Be clear about which tool you're using and what information you're providing
2206
+ # - If you're unsure which tool to use, you can ask for clarification
2207
+ # - Provide helpful, accurate, and well-formatted responses
2208
+
2209
+ # Remember: Always use the most appropriate tool based on the user's message and conversation context to provide the best possible answer."""),
2210
+ # MessagesPlaceholder(variable_name="chat_history"),
2211
+ # ("user", "{input}"),
2212
+ # MessagesPlaceholder(variable_name="agent_scratchpad"),
2213
+ # ])
2214
+
2215
+ # # Create memory object
2216
+ # memory = ConversationBufferMemory(
2217
+ # memory_key="chat_history",
2218
+ # return_messages=True
2219
+ # )
2220
+
2221
+ # # Load existing messages into memory
2222
+ # for msg in memory_messages:
2223
+ # if msg["role"] == "user":
2224
+ # memory.chat_memory.add_user_message(msg["message"])
2225
+ # else:
2226
+ # memory.chat_memory.add_ai_message(msg["message"])
2227
+
2228
+ # # Create tools list
2229
+ # tools = [document_search_tool, project_list_tool, pandas_analysis_tool]
2230
+
2231
+ # # Create the agent
2232
+ # agent = create_openai_tools_agent(llm, tools, agent_prompt)
2233
+
2234
+ # # Create the agent executor with memory
2235
+ # agent_executor = AgentExecutor(
2236
+ # agent=agent,
2237
+ # tools=tools,
2238
+ # verbose=True,
2239
+ # memory=memory
2240
+ # )
2241
+
2242
+ # return agent_executor, memory
2243
+
2244
+ # # === API ENDPOINTS ===
2245
+
2246
+ # @app.post("/sessions", response_model=SessionResponse)
2247
+ # def create_new_session(userLoginId: int, orgId: int, auth_token: str):
2248
+ # """Create a new chat session"""
2249
+ # try:
2250
+ # session_data = create_session(userLoginId, orgId, auth_token)
2251
+ # return SessionResponse(**session_data)
2252
+ # except Exception as e:
2253
+ # raise HTTPException(status_code=500, detail=f"Error creating session: {str(e)}")
2254
+
2255
+ # @app.get("/sessions")
2256
+ # def list_user_sessions(userLoginId: int):
2257
+ # """List all sessions for a user"""
2258
+ # try:
2259
+ # sessions = get_user_sessions(userLoginId)
2260
+ # return {
2261
+ # "userLoginId": userLoginId,
2262
+ # "total_sessions": len(sessions),
2263
+ # "sessions": sessions
2264
+ # }
2265
+ # except Exception as e:
2266
+ # raise HTTPException(status_code=500, detail=f"Error fetching sessions: {str(e)}")
2267
+
2268
+ # @app.delete("/sessions/{session_id}")
2269
+ # def delete_user_session(session_id: str):
2270
+ # """Delete/close a session"""
2271
+ # try:
2272
+ # # Verify session exists
2273
+ # get_session(session_id)
2274
+
2275
+ # # Delete session
2276
+ # delete_session(session_id)
2277
+
2278
+ # return {
2279
+ # "message": f"Session {session_id} deleted successfully",
2280
+ # "session_id": session_id
2281
+ # }
2282
+ # except Exception as e:
2283
+ # raise HTTPException(status_code=500, detail=f"Error deleting session: {str(e)}")
2284
+
2285
+ # @app.post("/bot")
2286
+ # def chat_with_bot(query: BotQuery):
2287
+ # """Main bot endpoint with session management"""
2288
+ # try:
2289
+ # # Set global auth context for tools
2290
+ # global _current_user_id, _current_org_id, _current_auth_token
2291
+ # _current_user_id = query.userLoginId
2292
+ # _current_org_id = query.orgId
2293
+ # _current_auth_token = query.auth_token
2294
+
2295
+ # session_id = query.session_id
2296
+
2297
+ # # Create new session if not provided
2298
+ # if not session_id:
2299
+ # session_data = create_session(query.userLoginId, query.orgId, query.auth_token)
2300
+ # session_id = session_data["session_id"]
2301
+ # else:
2302
+ # # Verify existing session
2303
+ # get_session(session_id)
2304
+
2305
+ # # Add user message to session
2306
+ # user_message_id = add_message_to_session(session_id, "user", query.message)
2307
+
2308
+ # # Create agent with session memory
2309
+ # agent_executor, memory = create_agent_with_session_memory(session_id)
2310
+
2311
+ # # Use the agent to process the query
2312
+ # result = agent_executor.invoke({"input": query.message})
2313
+
2314
+ # # Add AI response to session
2315
+ # ai_message_id = add_message_to_session(session_id, "assistant", result["output"])
2316
+
2317
+ # # Update session memory in Redis
2318
+ # updated_messages = []
2319
+ # for message in memory.chat_memory.messages:
2320
+ # if hasattr(message, 'content'):
2321
+ # role = "user" if message.__class__.__name__ == "HumanMessage" else "assistant"
2322
+ # updated_messages.append({
2323
+ # "role": role,
2324
+ # "message": message.content,
2325
+ # "timestamp": datetime.now().isoformat()
2326
+ # })
2327
+
2328
+ # update_session_memory(session_id, updated_messages)
2329
+
2330
+ # # Update session title after first user message
2331
+ # update_session_title(session_id)
2332
+
2333
+ # # Clear auth context after use
2334
+ # _current_user_id = None
2335
+ # _current_org_id = None
2336
+ # _current_auth_token = None
2337
+
2338
+ # return {
2339
+ # "session_id": session_id,
2340
+ # "user_message_id": user_message_id,
2341
+ # "ai_message_id": ai_message_id,
2342
+ # "message": query.message,
2343
+ # "answer": result["output"],
2344
+ # "userLoginId": query.userLoginId,
2345
+ # "agent_used": True
2346
+ # }
2347
+
2348
+ # except Exception as e:
2349
+ # # Clear auth context on error
2350
+ # _current_user_id = None
2351
+ # _current_org_id = None
2352
+ # _current_auth_token = None
2353
+
2354
+ # raise HTTPException(status_code=500, detail=f"Error processing chat: {str(e)}")
2355
+
2356
+ # @app.get("/sessions/{session_id}/history", response_model=ChatHistoryResponse)
2357
+ # def get_session_history(session_id: str, n: int = QueryParam(50, description="Number of recent messages to return")):
2358
+ # """Get chat history for a session"""
2359
+ # try:
2360
+ # # Verify session exists
2361
+ # get_session(session_id)
2362
+
2363
+ # # Get chat history
2364
+ # chat_data = redis_client.get(f"chat:{session_id}")
2365
+ # if not chat_data:
2366
+ # return ChatHistoryResponse(
2367
+ # session_id=session_id,
2368
+ # messages=[],
2369
+ # total_messages=0
2370
+ # )
2371
+
2372
+ # messages = json.loads(chat_data)
2373
+
2374
+ # # Get the last n messages (or all if less than n)
2375
+ # recent_messages = messages[-n:] if len(messages) > n else messages
2376
+
2377
+ # # Convert to MessageResponse objects
2378
+ # message_responses = [MessageResponse(**msg) for msg in recent_messages]
2379
+
2380
+ # return ChatHistoryResponse(
2381
+ # session_id=session_id,
2382
+ # messages=message_responses,
2383
+ # total_messages=len(messages)
2384
+ # )
2385
+
2386
+ # except Exception as e:
2387
+ # raise HTTPException(status_code=500, detail=f"Error fetching chat history: {str(e)}")
2388
+
2389
+ # @app.post("/chat-documents")
2390
+ # def chat_documents_only(query: Query):
2391
+ # """Direct document search without agent"""
2392
+ # try:
2393
+ # result = search_documents(query.message)
2394
+ # return {
2395
+ # "message": query.message,
2396
+ # "answer": result,
2397
+ # "tool_used": "document_search"
2398
+ # }
2399
+ # except Exception as e:
2400
+ # return {
2401
+ # "message": query.message,
2402
+ # "answer": f"An error occurred: {str(e)}",
2403
+ # "tool_used": "document_search"
2404
+ # }
2405
+
2406
+ # @app.post("/list-projects")
2407
+ # def list_projects(request: ProjectRequest):
2408
+ # """Direct project listing without agent"""
2409
+ # try:
2410
+ # # Use the provided auth token and userLoginId
2411
+ # encoded_token = get_encoded_auth_token(request.userLoginId, request.auth_token)
2412
+
2413
+ # # Fetch projects
2414
+ # data = fetch_user_projects(request.userLoginId, request.orgId, encoded_token)
2415
+
2416
+ # # Format and return the project list
2417
+ # formatted = format_project_response(data)
2418
+ # return {
2419
+ # "projects": formatted,
2420
+ # "tool_used": "project_list"
2421
+ # }
2422
+ # except Exception as e:
2423
+ # return {
2424
+ # "error": f"An error occurred: {str(e)}",
2425
+ # "tool_used": "project_list"
2426
+ # }
2427
+
2428
+ # @app.post("/chat-with-pandas-agent")
2429
+ # def chat_with_pandas_agent(request: PandasAgentQuery):
2430
+ # """Direct pandas AI agent endpoint for data analysis"""
2431
+ # try:
2432
+ # result = pandas_agent(request.filepath, request.query)
2433
+
2434
+ # return {
2435
+ # "filepath": request.filepath,
2436
+ # "query": request.query,
2437
+ # "answer": result,
2438
+ # "tool_used": "pandas_agent",
2439
+ # "timestamp": datetime.now().isoformat()
2440
+ # }
2441
+
2442
+ # except Exception as e:
2443
+ # error_msg = f"An error occurred: {str(e)}"
2444
+ # return {
2445
+ # "filepath": request.filepath,
2446
+ # "query": request.query,
2447
+ # "answer": error_msg,
2448
+ # "tool_used": "pandas_agent",
2449
+ # "error": True,
2450
+ # "timestamp": datetime.now().isoformat()
2451
+ # }
2452
+
2453
+ # @app.put("/sessions/{session_id}/title")
2454
+ # def refresh_session_title(session_id: str):
2455
+ # """Manually refresh/regenerate session title"""
2456
+ # try:
2457
+ # # Verify session exists
2458
+ # session_data = get_session(session_id)
2459
+
2460
+ # # Generate new title
2461
+ # new_title = generate_session_title(session_id)
2462
+
2463
+ # # Update session
2464
+ # session_data["title"] = new_title
2465
+ # redis_client.setex(
2466
+ # f"session:{session_id}",
2467
+ # 86400, # 24 hours
2468
+ # json.dumps(session_data)
2469
+ # )
2470
+
2471
+ # return {
2472
+ # "session_id": session_id,
2473
+ # "new_title": new_title,
2474
+ # "message": "Session title updated successfully"
2475
+ # }
2476
+
2477
+ # except Exception as e:
2478
+ # raise HTTPException(status_code=500, detail=f"Error updating session title: {str(e)}")
2479
+
2480
+ # @app.get("/redis-info")
2481
+ # def redis_info():
2482
+ # """Get Redis connection information"""
2483
+ # try:
2484
+ # info = redis_client.info()
2485
+ # return {
2486
+ # "redis_connected": True,
2487
+ # "redis_version": info.get("redis_version"),
2488
+ # "used_memory": info.get("used_memory_human"),
2489
+ # "connected_clients": info.get("connected_clients"),
2490
+ # "total_keys": redis_client.dbsize()
2491
+ # }
2492
+ # except Exception as e:
2493
+ # return {
2494
+ # "redis_connected": False,
2495
+ # "error": str(e)
2496
+ # }
2497
+
2498
+ # @app.get("/health")
2499
+ # def health():
2500
+ # try:
2501
+ # redis_client.ping()
2502
+ # redis_status = "connected"
2503
+ # except:
2504
+ # redis_status = "disconnected"
2505
+
2506
+ # return {
2507
+ # "status": "ok",
2508
+ # "tools": ["document_search", "project_list", "pandas_data_analysis"],
2509
+ # "agent": "active",
2510
+ # "session_management": "enabled",
2511
+ # "redis_status": redis_status,
2512
+ # "pandas_ai": "enabled",
2513
+ # "total_sessions": len(list(redis_client.scan_iter(match="session:*")))
2514
+ # }
2515
+
2516
+ # if __name__ == "__main__":
2517
+ # import uvicorn
2518
+ # try:
2519
+ # uvicorn.run(app, host="0.0.0.0", port=8000)
2520
+ # except KeyboardInterrupt:
2521
+ # print("\n🛑 Server stopped gracefully")
2522
+ # except Exception as e:
2523
+ # print(f"❌ Server error: {e}")
2524
+ # #