# main.py import re import streamlit as st from langchain_core.messages import HumanMessage, AIMessage from utils.llm_logic import generate_llm_response from utils.sql_utils import ( extract_sql_command, load_defaultdb_schema_text, load_defaultdb_queries, load_data, ) from utils.handle_sql_commands import execute_sql_duckdb st.set_page_config( page_title="Text-to-SQL Agent", page_icon="🤖", layout="wide", initial_sidebar_state="expanded", ) default_db_questions = {} default_dfs = load_data() selected_df = default_dfs use_default_schema = True llm_option = "gemini" st.markdown( """ """, unsafe_allow_html=True, ) with st.popover("Click here to see Database Schema", use_container_width=True): uploaded_df_schema = st.session_state.get("uploaded_df_schema", False) choice = st.segmented_control( "Choose", ["Default DB", "Uploaded Files"], label_visibility="collapsed", disabled=uploaded_df_schema == False, default="Default DB" if uploaded_df_schema == False else "Uploaded Files", ) if uploaded_df_schema is False: st.markdown( """> You can also upload your own files, to get your schemas. You can then use those schemas to cross-check our answers with ChatGpt/Gemini/Claude (Preferred if the Question is very Complex). You can run the queries directly with our Manual SQL Executer😊. - Ask Questions - Run Queries: automatic + manual - Download Results """ ) st.page_link( page="pages/3 📂File Upload for SQL.py", label="Upload your own CSV or Excel files", icon="📜", ) schema = load_defaultdb_schema_text() st.markdown(schema, unsafe_allow_html=True) elif choice == "Default DB": schema = load_defaultdb_schema_text() st.markdown(schema, unsafe_allow_html=True) else: pretty_schema, markdown = st.tabs(["Schema", "Copy Schema in Markdown"]) with pretty_schema: st.info( "You can copy this schema, and give it to any state of the art LLM models like (Gemini /ChatGPT /Claude etc) to cross check your answers.\n You can run the queries directly here, by using ***Manual Query Executer*** in the sidebar and download your results 😊", icon="ℹ️", ) st.markdown(uploaded_df_schema, unsafe_allow_html=True) with markdown: st.info( "You can copy this schema, and give it to any state of the art LLM models like (Gemini /ChatGPT /Claude etc) to cross check your answers.\n You can run the queries directly here, by using ***Manual Query Executer*** in the sidebar and download your results 😊", icon="ℹ️", ) st.markdown(f"```\n{uploaded_df_schema}\n```") col1, col2 = st.columns([2, 1], vertical_alignment="bottom") with col1: st.header("Natural Language to SQL Query Agent🤖") with col2: st.caption("> ***Execute on the Go!*** 🚀 In-Built DuckDB Execution Engine") st.caption( "This is a Qwen2.5-Coder-3B model fine-tuned for SQL queries integrated with langchain for Agentic Workflow. To see the Fine-Tuning code - [click here](https://www.kaggle.com/code/debopamchowdhury/qwen-2-5coder-3b-instruct-finetuning)." ) col1, col2, col3 = st.columns([1.5, 2, 1], vertical_alignment="top") with col1: disabled_selection = True if ( "uploaded_dataframes" in st.session_state ) and st.session_state.uploaded_dataframes: disabled_selection = False options = ["default_db", "uploaded_files"] selected = st.segmented_control( "Choose", options, selection_mode="single", disabled=disabled_selection, label_visibility="collapsed", default="default_db" if disabled_selection else "uploaded_files", ) if not disabled_selection: if selected == "uploaded_files": selected_df = st.session_state.uploaded_dataframes # print(selected_df) use_default_schema = False else: selected_df = default_dfs # print(selected_df) use_default_schema = True if selected_df == default_dfs: with st.popover("Default Database Queries 📚 - Trial"): default_db_questions = load_defaultdb_queries() st.markdown(default_db_questions) with col2: llm_option_radio = st.radio( "Choose LLM Model", ["Gemini-2.0-Flash-Exp", "FineTuned Qwen2.5-Coder-3B for SQL"], captions=[ "Used via API", "Run Locally on this Server. Extremely Slow because of Free vCPUs, [Download & Run on your Computer via Ollama](https://ollama.com/debopam/Text-to-SQL__Qwen2.5-Coder-3B-FineTuned)", ], label_visibility="collapsed", ) if llm_option_radio == "Gemini-2.0-Flash-Exp": llm_option = "gemini" else: llm_option = "qwen" with col3: # Button to refresh the conversation if st.button("Start New Conversation", type="primary"): st.session_state.chat_history = [] st.session_state.conversation_turns = 0 st.rerun() # Initialize chat history in session state if "chat_history" not in st.session_state: st.session_state.chat_history = [] # Initialize conversation turn counter if "conversation_turns" not in st.session_state: st.session_state.conversation_turns = 0 # Set the maximum number of conversation turns MAX_TURNS = 5 # Display existing chat messages for message in st.session_state.chat_history: with st.chat_message(message.type): st.markdown(message.content) if ( isinstance(message, AIMessage) and "response_df" in message.additional_kwargs and message.additional_kwargs["response_df"] is not None and not message.additional_kwargs["response_df"].empty ): with st.expander("View SQL-Query Execution Result"): df = message.additional_kwargs["response_df"] # download_csv = convert_df(df) # st.download_button( # label="Download data as CSV", # data=download_csv, # file_name="query_results.csv", # mime="text/csv", # ) # renderer = StreamlitRenderer( # df, # spec_io_mode="rw", # default_tab="data", # appearance="dark", # kernel_computation=True, # ) # renderer.explorer(default_tab="data") st.dataframe(df) st.info(f"Rows x Columns: {df.shape[0]} x {df.shape[1]}") st.subheader("Data Description:") st.markdown(df.describe().T.to_markdown()) st.subheader("Data Types:") st.write(df.dtypes) # Get user input only if the conversation turn limit is not reached if st.session_state.conversation_turns < MAX_TURNS: if prompt := st.chat_input("Ask me a SQL query question"): # Add user message to chat history in session state st.session_state.chat_history.append(HumanMessage(content=prompt)) # Display user message in chat with st.chat_message("user"): st.markdown(prompt) duckdb_result = None # Get assistant response with streaming with st.chat_message("assistant"): message_placeholder = st.empty() full_response = "" spinner_text = "" if llm_option == "gemini": spinner_text = ( "Using Gemini-2.0-Flash-Exp to run your query. Please wait...😊" ) else: spinner_text = "I know it is taking a lot of time. To run the model I'm using `Free` small vCPUs provided by `HuggingFace Spaces` for deployment. Thank you so much for your patience😊" with st.spinner( spinner_text, ): for response_so_far in generate_llm_response( prompt, llm_option, use_default_schema ): # Remove and tags for streaming display streaming_response = response_so_far.replace("", "").replace( "", "" ) # Remove duplicate ```sql tags with or without space for streaming display streaming_response = re.sub( r"```sql\s*```sql", "```sql", streaming_response ) message_placeholder.markdown(streaming_response + "▌") full_response = response_so_far # Remove and tags from the full response full_response = full_response.replace("", "").replace("", "") # Remove duplicate ```sql tags with or without space from the full response full_response = re.sub(r"```sql\s*```sql", "```sql", full_response) # Remove trailing duplicate ``` tags from the full response full_response = re.sub(r"[\s\n]*`+$", "```", full_response) message_placeholder.markdown(full_response) # st.text(extract_sql_command(full_response)) sql_command = extract_sql_command(full_response) # dataframe_html = None if sql_command: # st.text("Extracted SQL Command:") # st.code(sql_command, language="sql") duckdb_result = execute_sql_duckdb(sql_command, selected_df) if duckdb_result is not None: st.text("Query Execution Result:") with st.expander("View Result"): # st.dataframe(duckdb_result) st.dataframe(duckdb_result) st.info( f"Rows x Columns: {duckdb_result.shape[0]} x {duckdb_result.shape[1]}" ) st.subheader("Data Description:") st.markdown(duckdb_result.describe().T.to_markdown()) st.subheader("Data Types:") st.write(duckdb_result.dtypes) # renderer = StreamlitRenderer( # duckdb_result, # spec_io_mode="rw", # default_tab="data", # appearance="dark", # kernel_computation=True, # ) # renderer.explorer(default_tab="data") else: # st.warning("No SQL command found in the response.") pass # Add assistant response to chat history in session state st.session_state.chat_history.append( AIMessage( content=full_response, additional_kwargs={"response_df": duckdb_result}, ) ) # Increment the conversation turn counter st.session_state.conversation_turns += 1 else: st.warning( "Maximum number of questions reached. Please click 'Start New Conversation' to continue." ) st.chat_input( "Ask me a SQL query question", disabled=True ) # Disable the input field with st.sidebar: st.caption("Made with ❤️ by @Debopam_Chowdhury")