Spaces:

seanpedrickcase
/

llm_topic_modelling

Running on Zero

App Files Files Community

seanpedrickcase commited on Oct 9

Commit

3085585

1 Parent(s): 5fed34d

Added model compatibility for OpenAI and Azure endpoints. Added some Bedrock models, now compatible with thinking models

Browse files

Files changed (11) hide show

app.py +17 -40
pyproject.toml +1 -2
requirements.txt +1 -2
requirements_cpu.txt +1 -2
requirements_gpu.txt +1 -2
requirements_no_local.txt +1 -2
tools/config.py +78 -15
tools/dedup_summaries.py +22 -20
tools/helper_functions.py +12 -4
tools/llm_api_call.py +21 -16
tools/llm_funcs.py +86 -65

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ import os
 import gradio as gr
 import pandas as pd
 from datetime import datetime
-from tools.helper_functions import put_columns_in_df, get_connection_params, view_table, empty_output_vars_extract_topics, empty_output_vars_summarise, load_in_previous_reference_file, join_cols_onto_reference_df, load_in_previous_data_files, load_in_data_file, load_in_default_cost_codes, reset_base_dataframe, update_cost_code_dataframe_from_dropdown_select, df_select_callback_cost, enforce_cost_codes, _get_env_list, move_overall_summary_output_files_to_front_page
 from tools.aws_functions import upload_file_to_s3, download_file_from_s3
 from tools.llm_api_call import modify_existing_output_tables, wrapper_extract_topics_per_column_value, all_in_one_pipeline
 from tools.dedup_summaries import sample_reference_table_summaries, summarise_output_topics, deduplicate_topics, deduplicate_topics_llm, overall_summary
@@ -13,18 +13,7 @@ from tools.auth import authenticate_user
 from tools.example_table_outputs import dummy_consultation_table, case_notes_table, dummy_consultation_table_zero_shot, case_notes_table_grouped, case_notes_table_structured_summary
 from tools.prompts import initial_table_prompt, system_prompt, add_existing_topics_system_prompt, add_existing_topics_prompt, two_para_summary_format_prompt, single_para_summary_format_prompt
 # from tools.verify_titles import verify_titles
-from tools.config import RUN_AWS_FUNCTIONS, HOST_NAME, ACCESS_LOGS_FOLDER, FEEDBACK_LOGS_FOLDER, USAGE_LOGS_FOLDER, RUN_LOCAL_MODEL,  FILE_INPUT_HEIGHT, GEMINI_API_KEY, model_full_names, BATCH_SIZE_DEFAULT, CHOSEN_LOCAL_MODEL_TYPE, LLM_SEED, COGNITO_AUTH, MAX_QUEUE_SIZE, MAX_FILE_SIZE, GRADIO_SERVER_PORT, ROOT_PATH, INPUT_FOLDER, OUTPUT_FOLDER, S3_LOG_BUCKET, CONFIG_FOLDER, GRADIO_TEMP_DIR, MPLCONFIGDIR, model_name_map, GET_COST_CODES, ENFORCE_COST_CODES, DEFAULT_COST_CODE, COST_CODES_PATH, S3_COST_CODES_PATH, OUTPUT_COST_CODES_PATH, SHOW_COSTS, SAVE_LOGS_TO_CSV, SAVE_LOGS_TO_DYNAMODB, ACCESS_LOG_DYNAMODB_TABLE_NAME, USAGE_LOG_DYNAMODB_TABLE_NAME, FEEDBACK_LOG_DYNAMODB_TABLE_NAME, LOG_FILE_NAME, FEEDBACK_LOG_FILE_NAME, USAGE_LOG_FILE_NAME, CSV_ACCESS_LOG_HEADERS, CSV_FEEDBACK_LOG_HEADERS, CSV_USAGE_LOG_HEADERS, DYNAMODB_ACCESS_LOG_HEADERS, DYNAMODB_FEEDBACK_LOG_HEADERS, DYNAMODB_USAGE_LOG_HEADERS, S3_ACCESS_LOGS_FOLDER, S3_FEEDBACK_LOGS_FOLDER, S3_USAGE_LOGS_FOLDER, AWS_ACCESS_KEY, AWS_SECRET_KEY, SHOW_EXAMPLES, HF_TOKEN, AZURE_API_KEY, LLM_TEMPERATURE
-def ensure_folder_exists(output_folder:str):
-    """Checks if the specified folder exists, creates it if not."""
-    if not os.path.exists(output_folder):
-        # Create the folder if it doesn't exist
-        os.makedirs(output_folder, exist_ok=True)
-        print(f"Created the {output_folder} folder.")
-    else:
-        pass
-            #print(f"The {output_folder} folder already exists.")
 ensure_folder_exists(CONFIG_FOLDER)
 ensure_folder_exists(OUTPUT_FOLDER)
@@ -35,26 +24,8 @@ ensure_folder_exists(FEEDBACK_LOGS_FOLDER)
 ensure_folder_exists(ACCESS_LOGS_FOLDER)
 ensure_folder_exists(USAGE_LOGS_FOLDER)
-# Convert string environment variables to string or list
-if SAVE_LOGS_TO_CSV == "True": SAVE_LOGS_TO_CSV = True
-else: SAVE_LOGS_TO_CSV = False
-if SAVE_LOGS_TO_DYNAMODB == "True": SAVE_LOGS_TO_DYNAMODB = True
-else: SAVE_LOGS_TO_DYNAMODB = False
-if CSV_ACCESS_LOG_HEADERS: CSV_ACCESS_LOG_HEADERS = _get_env_list(CSV_ACCESS_LOG_HEADERS)
-if CSV_FEEDBACK_LOG_HEADERS: CSV_FEEDBACK_LOG_HEADERS = _get_env_list(CSV_FEEDBACK_LOG_HEADERS)
-if CSV_USAGE_LOG_HEADERS: CSV_USAGE_LOG_HEADERS = _get_env_list(CSV_USAGE_LOG_HEADERS)
-if DYNAMODB_ACCESS_LOG_HEADERS: DYNAMODB_ACCESS_LOG_HEADERS = _get_env_list(DYNAMODB_ACCESS_LOG_HEADERS)
-if DYNAMODB_FEEDBACK_LOG_HEADERS: DYNAMODB_FEEDBACK_LOG_HEADERS = _get_env_list(DYNAMODB_FEEDBACK_LOG_HEADERS)
-if DYNAMODB_USAGE_LOG_HEADERS: DYNAMODB_USAGE_LOG_HEADERS = _get_env_list(DYNAMODB_USAGE_LOG_HEADERS)
 today_rev = datetime.now().strftime("%Y%m%d")
-if RUN_LOCAL_MODEL == "1": default_model_choice = CHOSEN_LOCAL_MODEL_TYPE
-elif RUN_AWS_FUNCTIONS == "1": default_model_choice = "anthropic.claude-3-haiku-20240307-v1:0"
-else: default_model_choice = "gemini-2.5-flash"
 # Placeholders for example variables
 in_data_files = gr.File(height=FILE_INPUT_HEIGHT, label="Choose Excel or csv files", file_count= "multiple", file_types=['.xlsx', '.xls', '.csv', '.parquet'])
 in_colnames = gr.Dropdown(choices=[""], multiselect = False, label="Select the open text column of interest. In an Excel file, this shows columns across all sheets.", allow_custom_value=True, interactive=True)
@@ -162,7 +133,7 @@ with app:
     gr.Markdown("""# Large language model topic modelling
-    Extract topics and summarise outputs using Large Language Models (LLMs, Gemma 3 4b/GPT-OSS 20b if local (see tools/config.py to modify), Gemini, Azure, or AWS Bedrock models (e.g. Claude, Nova models). The app will query the LLM with batches of responses to produce summary tables, which are then compared iteratively to output a table with the general topics, subtopics, topic sentiment, and a topic summary. Instructions on use can be found in the README.md file. You can try out examples by clicking on one of the example datasets below. API keys for AWS, Azure, and Gemini services can be entered on the settings page (note that Gemini has a free public API).
     NOTE: Large language models are not 100% accurate and may produce biased or harmful outputs. All outputs from this app **absolutely need to be checked by a human** to check for harmful outputs, hallucinations, and accuracy.""")
@@ -198,7 +169,10 @@ with app:
     with gr.Tab(label="All in one topic extraction and summarisation"):
         with gr.Row():
-            model_choice = gr.Dropdown(value = default_model_choice, choices = model_full_names, label="Large language model for topic extraction and summarisation", multiselect=False)
         with gr.Accordion("Upload xlsx, csv, or parquet file", open = True):
             in_data_files.render()
@@ -339,8 +313,9 @@ with app:
         with gr.Accordion("Gemini API keys", open = False):
             google_api_key_textbox = gr.Textbox(value = GEMINI_API_KEY, label="Enter Gemini API key (only if using Google API models)", lines=1, type="password")
-        with gr.Accordion("Azure AI Inference", open = False):
-            azure_api_key_textbox = gr.Textbox(value = AZURE_API_KEY, label="Enter Azure AI Inference API key (only if using Azure models)", lines=1, type="password")
         with gr.Accordion("Hugging Face token for downloading gated models", open = False):
             hf_api_key_textbox = gr.Textbox(value = HF_TOKEN, label="Enter Hugging Face API key (only for gated models that need a token to download)", lines=1, type="password")
@@ -435,6 +410,7 @@ with app:
                 aws_secret_key_textbox,
                 hf_api_key_textbox,
                 azure_api_key_textbox,
                 output_folder_state,
                 logged_content_df,
                 add_existing_topics_summary_format_textbox],
@@ -478,12 +454,12 @@ with app:
         success(deduplicate_topics, inputs=[master_reference_df_state, master_unique_topics_df_state, working_data_file_name_textbox, unique_topics_table_file_name_textbox, in_excel_sheets, merge_sentiment_drop, merge_general_topics_drop, deduplicate_score_threshold, in_data_files, in_colnames, output_folder_state], outputs=[master_reference_df_state, master_unique_topics_df_state, summarisation_input_files, log_files_output, summarised_output_markdown], scroll_to_output=True, api_name="deduplicate_topics")
     # When LLM deduplication button pressed, deduplicate data using LLM
-    def deduplicate_topics_llm_wrapper(reference_df, topic_summary_df, reference_table_file_name, unique_topics_table_file_name, model_choice, in_api_key, temperature, in_excel_sheets, merge_sentiment, merge_general_topics, in_data_files, chosen_cols, output_folder, candidate_topics=None):
         model_source = model_name_map[model_choice]["source"]
-        return deduplicate_topics_llm(reference_df, topic_summary_df, reference_table_file_name, unique_topics_table_file_name, model_choice, in_api_key, temperature, model_source, None, None, None, None, in_excel_sheets, merge_sentiment, merge_general_topics, in_data_files, chosen_cols, output_folder, candidate_topics)
     deduplicate_llm_previous_data_btn.click(load_in_previous_data_files, inputs=[deduplication_input_files], outputs=[master_reference_df_state, master_unique_topics_df_state, latest_batch_completed_no_loop, deduplication_input_files_status, working_data_file_name_textbox, unique_topics_table_file_name_textbox]).\
-        success(deduplicate_topics_llm_wrapper, inputs=[master_reference_df_state, master_unique_topics_df_state, working_data_file_name_textbox, unique_topics_table_file_name_textbox, model_choice, google_api_key_textbox, temperature_slide, in_excel_sheets, merge_sentiment_drop, merge_general_topics_drop, in_data_files, in_colnames, output_folder_state, candidate_topics], outputs=[master_reference_df_state, master_unique_topics_df_state, summarisation_input_files, log_files_output, summarised_output_markdown, input_tokens_num, output_tokens_num, number_of_calls_num, estimated_time_taken_number], scroll_to_output=True, api_name="deduplicate_topics_llm").\
         success(lambda *args: usage_callback.flag(list(args), save_to_csv=SAVE_LOGS_TO_CSV, save_to_dynamodb=SAVE_LOGS_TO_DYNAMODB,  dynamodb_table_name=USAGE_LOG_DYNAMODB_TABLE_NAME, dynamodb_headers=DYNAMODB_USAGE_LOG_HEADERS, replacement_headers=CSV_USAGE_LOG_HEADERS), [session_hash_textbox, original_data_file_name_textbox, in_colnames, model_choice, conversation_metadata_textbox_placeholder, input_tokens_num, output_tokens_num, number_of_calls_num, estimated_time_taken_number, cost_code_choice_drop], None, preprocess=False, api_name="usage_logs_llm_dedup")
     # When button pressed, summarise previous data
@@ -491,14 +467,14 @@ with app:
     success(fn= enforce_cost_codes, inputs=[enforce_cost_code_textbox, cost_code_choice_drop, cost_code_dataframe_base]).\
         success(load_in_previous_data_files, inputs=[summarisation_input_files], outputs=[master_reference_df_state, master_unique_topics_df_state, latest_batch_completed_no_loop, deduplication_input_files_status, working_data_file_name_textbox, unique_topics_table_file_name_textbox]).\
             success(sample_reference_table_summaries, inputs=[master_reference_df_state, random_seed], outputs=[summary_reference_table_sample_state, summarised_references_markdown], api_name="sample_summaries").\
-                success(summarise_output_topics, inputs=[summary_reference_table_sample_state, master_unique_topics_df_state, master_reference_df_state, model_choice, google_api_key_textbox, temperature_slide, working_data_file_name_textbox, summarised_outputs_list, latest_summary_completed_num, conversation_metadata_textbox, in_data_files, in_excel_sheets, in_colnames, log_files_output_list_state, summarise_format_radio, output_folder_state, context_textbox, aws_access_key_textbox, aws_secret_key_textbox, model_name_map_state, hf_api_key_textbox, logged_content_df], outputs=[summary_reference_table_sample_state, master_unique_topics_df_revised_summaries_state, master_reference_df_revised_summaries_state, summary_output_files, summarised_outputs_list, latest_summary_completed_num, conversation_metadata_textbox, summarised_output_markdown, log_files_output, overall_summarisation_input_files, input_tokens_num, output_tokens_num, number_of_calls_num, estimated_time_taken_number, output_messages_textbox, logged_content_df], api_name="summarise_topics", show_progress_on=[output_messages_textbox, summary_output_files]).\
                 success(lambda *args: usage_callback.flag(list(args), save_to_csv=SAVE_LOGS_TO_CSV, save_to_dynamodb=SAVE_LOGS_TO_DYNAMODB,  dynamodb_table_name=USAGE_LOG_DYNAMODB_TABLE_NAME, dynamodb_headers=DYNAMODB_USAGE_LOG_HEADERS, replacement_headers=CSV_USAGE_LOG_HEADERS), [session_hash_textbox, original_data_file_name_textbox, in_colnames, model_choice, conversation_metadata_textbox_placeholder, input_tokens_num, output_tokens_num, number_of_calls_num, estimated_time_taken_number, cost_code_choice_drop], None, preprocess=False).\
                 then(collect_output_csvs_and_create_excel_output, inputs=[in_data_files, in_colnames, original_data_file_name_textbox, in_group_col, model_choice, master_reference_df_revised_summaries_state, master_unique_topics_df_revised_summaries_state, summarised_output_df, missing_df_state, in_excel_sheets, usage_logs_state, model_name_map_state, output_folder_state, produce_structured_summary_radio], outputs=[summary_output_files_xlsx, summary_xlsx_output_files_list])
     # SUMMARISE WHOLE TABLE PAGE
     overall_summarise_previous_data_btn.click(fn= enforce_cost_codes, inputs=[enforce_cost_code_textbox, cost_code_choice_drop, cost_code_dataframe_base]).\
             success(load_in_previous_data_files, inputs=[overall_summarisation_input_files], outputs=[master_reference_df_state, master_unique_topics_df_state, latest_batch_completed_no_loop, deduplication_input_files_status, working_data_file_name_textbox, unique_topics_table_file_name_textbox]).\
-            success(overall_summary, inputs=[master_unique_topics_df_state, model_choice, google_api_key_textbox, temperature_slide, working_data_file_name_textbox, output_folder_state, in_colnames, context_textbox, aws_access_key_textbox, aws_secret_key_textbox, model_name_map_state, hf_api_key_textbox, logged_content_df], outputs=[overall_summary_output_files, overall_summarised_output_markdown, summarised_output_df, conversation_metadata_textbox, input_tokens_num, output_tokens_num, number_of_calls_num, estimated_time_taken_number, output_messages_textbox, logged_content_df], scroll_to_output=True, api_name="overall_summary", show_progress_on=[output_messages_textbox, overall_summary_output_files]).\
             success(lambda *args: usage_callback.flag(list(args), save_to_csv=SAVE_LOGS_TO_CSV, save_to_dynamodb=SAVE_LOGS_TO_DYNAMODB,  dynamodb_table_name=USAGE_LOG_DYNAMODB_TABLE_NAME, dynamodb_headers=DYNAMODB_USAGE_LOG_HEADERS, replacement_headers=CSV_USAGE_LOG_HEADERS), [session_hash_textbox, original_data_file_name_textbox, in_colnames, model_choice, conversation_metadata_textbox_placeholder, input_tokens_num, output_tokens_num, number_of_calls_num, estimated_time_taken_number, cost_code_choice_drop], None, preprocess=False).\
             then(collect_output_csvs_and_create_excel_output, inputs=[in_data_files, in_colnames, original_data_file_name_textbox, in_group_col, model_choice, master_reference_df_state, master_unique_topics_df_state, summarised_output_df, missing_df_state, in_excel_sheets, usage_logs_state, model_name_map_state, output_folder_state, produce_structured_summary_radio], outputs=[overall_summary_output_files_xlsx, summary_xlsx_output_files_list])
@@ -545,6 +521,7 @@ with app:
                 aws_secret_key_textbox,
                 hf_api_key_textbox,
                 azure_api_key_textbox,
                 output_folder_state,
                 merge_sentiment_drop,
                 merge_general_topics_drop,

 import gradio as gr
 import pandas as pd
 from datetime import datetime
+from tools.helper_functions import put_columns_in_df, get_connection_params, view_table, empty_output_vars_extract_topics, empty_output_vars_summarise, load_in_previous_reference_file, join_cols_onto_reference_df, load_in_previous_data_files, load_in_data_file, load_in_default_cost_codes, reset_base_dataframe, update_cost_code_dataframe_from_dropdown_select, df_select_callback_cost, enforce_cost_codes, _get_env_list, move_overall_summary_output_files_to_front_page, update_model_choice
 from tools.aws_functions import upload_file_to_s3, download_file_from_s3
 from tools.llm_api_call import modify_existing_output_tables, wrapper_extract_topics_per_column_value, all_in_one_pipeline
 from tools.dedup_summaries import sample_reference_table_summaries, summarise_output_topics, deduplicate_topics, deduplicate_topics_llm, overall_summary
 from tools.example_table_outputs import dummy_consultation_table, case_notes_table, dummy_consultation_table_zero_shot, case_notes_table_grouped, case_notes_table_structured_summary
 from tools.prompts import initial_table_prompt, system_prompt, add_existing_topics_system_prompt, add_existing_topics_prompt, two_para_summary_format_prompt, single_para_summary_format_prompt
 # from tools.verify_titles import verify_titles
+from tools.config import RUN_AWS_FUNCTIONS, HOST_NAME, ACCESS_LOGS_FOLDER, FEEDBACK_LOGS_FOLDER, USAGE_LOGS_FOLDER, FILE_INPUT_HEIGHT, GEMINI_API_KEY, BATCH_SIZE_DEFAULT, LLM_SEED, COGNITO_AUTH, MAX_QUEUE_SIZE, MAX_FILE_SIZE, GRADIO_SERVER_PORT, ROOT_PATH, INPUT_FOLDER, OUTPUT_FOLDER, S3_LOG_BUCKET, CONFIG_FOLDER, GRADIO_TEMP_DIR, MPLCONFIGDIR, GET_COST_CODES, ENFORCE_COST_CODES, DEFAULT_COST_CODE, COST_CODES_PATH, S3_COST_CODES_PATH, OUTPUT_COST_CODES_PATH, SHOW_COSTS, SAVE_LOGS_TO_CSV, SAVE_LOGS_TO_DYNAMODB, ACCESS_LOG_DYNAMODB_TABLE_NAME, USAGE_LOG_DYNAMODB_TABLE_NAME, FEEDBACK_LOG_DYNAMODB_TABLE_NAME, LOG_FILE_NAME, FEEDBACK_LOG_FILE_NAME, USAGE_LOG_FILE_NAME, CSV_ACCESS_LOG_HEADERS, CSV_FEEDBACK_LOG_HEADERS, CSV_USAGE_LOG_HEADERS, DYNAMODB_ACCESS_LOG_HEADERS, DYNAMODB_FEEDBACK_LOG_HEADERS, DYNAMODB_USAGE_LOG_HEADERS, S3_ACCESS_LOGS_FOLDER, S3_FEEDBACK_LOGS_FOLDER, S3_USAGE_LOGS_FOLDER, AWS_ACCESS_KEY, AWS_SECRET_KEY, SHOW_EXAMPLES, HF_TOKEN, AZURE_OPENAI_API_KEY, AZURE_OPENAI_INFERENCE_ENDPOINT, LLM_TEMPERATURE, model_name_map, default_model_choice, default_source_models, default_model_source, model_sources, ensure_folder_exists
 ensure_folder_exists(CONFIG_FOLDER)
 ensure_folder_exists(OUTPUT_FOLDER)
 ensure_folder_exists(ACCESS_LOGS_FOLDER)
 ensure_folder_exists(USAGE_LOGS_FOLDER)
 today_rev = datetime.now().strftime("%Y%m%d")
 # Placeholders for example variables
 in_data_files = gr.File(height=FILE_INPUT_HEIGHT, label="Choose Excel or csv files", file_count= "multiple", file_types=['.xlsx', '.xls', '.csv', '.parquet'])
 in_colnames = gr.Dropdown(choices=[""], multiselect = False, label="Select the open text column of interest. In an Excel file, this shows columns across all sheets.", allow_custom_value=True, interactive=True)
     gr.Markdown("""# Large language model topic modelling
+    Extract topics and summarise outputs using Large Language Models (LLMs, Gemma 3 4b/GPT-OSS 20b if local (see tools/config.py to modify), Gemini, Azure/OpenAI, or AWS Bedrock models (e.g. Claude, Nova models). The app will query the LLM with batches of responses to produce summary tables, which are then compared iteratively to output a table with the general topics, subtopics, topic sentiment, and a topic summary. Instructions on use can be found in the README.md file. You can try out examples by clicking on one of the example datasets below. API keys for AWS, Azure/OpenAI, and Gemini services can be entered on the settings page (note that Gemini has a free public API).
     NOTE: Large language models are not 100% accurate and may produce biased or harmful outputs. All outputs from this app **absolutely need to be checked by a human** to check for harmful outputs, hallucinations, and accuracy.""")
     with gr.Tab(label="All in one topic extraction and summarisation"):
         with gr.Row():
+            model_source = gr.Dropdown(value = default_model_source, choices = model_sources, label="Large language model family", multiselect=False)
+            model_choice = gr.Dropdown(value = default_model_choice, choices = default_source_models, label="Large language model for topic extraction and summarisation", multiselect=False)
+            model_source.change(fn=update_model_choice, inputs=[model_source], outputs=[model_choice])
         with gr.Accordion("Upload xlsx, csv, or parquet file", open = True):
             in_data_files.render()
         with gr.Accordion("Gemini API keys", open = False):
             google_api_key_textbox = gr.Textbox(value = GEMINI_API_KEY, label="Enter Gemini API key (only if using Google API models)", lines=1, type="password")
+        with gr.Accordion("Azure/OpenAI Inference", open = False):
+            azure_api_key_textbox = gr.Textbox(value = AZURE_OPENAI_API_KEY, label="Enter Azure/OpenAI Inference API key (only if using Azure/OpenAI models)", lines=1, type="password")
+            azure_endpoint_textbox = gr.Textbox(value = AZURE_OPENAI_INFERENCE_ENDPOINT, label="Enter Azure/OpenAI Inference endpoint URL (only if using Azure/OpenAI models)", lines=1)
         with gr.Accordion("Hugging Face token for downloading gated models", open = False):
             hf_api_key_textbox = gr.Textbox(value = HF_TOKEN, label="Enter Hugging Face API key (only for gated models that need a token to download)", lines=1, type="password")
                 aws_secret_key_textbox,
                 hf_api_key_textbox,
                 azure_api_key_textbox,
+                azure_endpoint_textbox,
                 output_folder_state,
                 logged_content_df,
                 add_existing_topics_summary_format_textbox],
         success(deduplicate_topics, inputs=[master_reference_df_state, master_unique_topics_df_state, working_data_file_name_textbox, unique_topics_table_file_name_textbox, in_excel_sheets, merge_sentiment_drop, merge_general_topics_drop, deduplicate_score_threshold, in_data_files, in_colnames, output_folder_state], outputs=[master_reference_df_state, master_unique_topics_df_state, summarisation_input_files, log_files_output, summarised_output_markdown], scroll_to_output=True, api_name="deduplicate_topics")
     # When LLM deduplication button pressed, deduplicate data using LLM
+    def deduplicate_topics_llm_wrapper(reference_df, topic_summary_df, reference_table_file_name, unique_topics_table_file_name, model_choice, in_api_key, temperature, in_excel_sheets, merge_sentiment, merge_general_topics, in_data_files, chosen_cols, output_folder, candidate_topics=None, azure_endpoint=""):
         model_source = model_name_map[model_choice]["source"]
+        return deduplicate_topics_llm(reference_df, topic_summary_df, reference_table_file_name, unique_topics_table_file_name, model_choice, in_api_key, temperature, model_source, None, None, None, None, in_excel_sheets, merge_sentiment, merge_general_topics, in_data_files, chosen_cols, output_folder, candidate_topics, azure_endpoint)
     deduplicate_llm_previous_data_btn.click(load_in_previous_data_files, inputs=[deduplication_input_files], outputs=[master_reference_df_state, master_unique_topics_df_state, latest_batch_completed_no_loop, deduplication_input_files_status, working_data_file_name_textbox, unique_topics_table_file_name_textbox]).\
+        success(deduplicate_topics_llm_wrapper, inputs=[master_reference_df_state, master_unique_topics_df_state, working_data_file_name_textbox, unique_topics_table_file_name_textbox, model_choice, google_api_key_textbox, temperature_slide, in_excel_sheets, merge_sentiment_drop, merge_general_topics_drop, in_data_files, in_colnames, output_folder_state, candidate_topics, azure_endpoint_textbox], outputs=[master_reference_df_state, master_unique_topics_df_state, summarisation_input_files, log_files_output, summarised_output_markdown, input_tokens_num, output_tokens_num, number_of_calls_num, estimated_time_taken_number], scroll_to_output=True, api_name="deduplicate_topics_llm").\
         success(lambda *args: usage_callback.flag(list(args), save_to_csv=SAVE_LOGS_TO_CSV, save_to_dynamodb=SAVE_LOGS_TO_DYNAMODB,  dynamodb_table_name=USAGE_LOG_DYNAMODB_TABLE_NAME, dynamodb_headers=DYNAMODB_USAGE_LOG_HEADERS, replacement_headers=CSV_USAGE_LOG_HEADERS), [session_hash_textbox, original_data_file_name_textbox, in_colnames, model_choice, conversation_metadata_textbox_placeholder, input_tokens_num, output_tokens_num, number_of_calls_num, estimated_time_taken_number, cost_code_choice_drop], None, preprocess=False, api_name="usage_logs_llm_dedup")
     # When button pressed, summarise previous data
     success(fn= enforce_cost_codes, inputs=[enforce_cost_code_textbox, cost_code_choice_drop, cost_code_dataframe_base]).\
         success(load_in_previous_data_files, inputs=[summarisation_input_files], outputs=[master_reference_df_state, master_unique_topics_df_state, latest_batch_completed_no_loop, deduplication_input_files_status, working_data_file_name_textbox, unique_topics_table_file_name_textbox]).\
             success(sample_reference_table_summaries, inputs=[master_reference_df_state, random_seed], outputs=[summary_reference_table_sample_state, summarised_references_markdown], api_name="sample_summaries").\
+                success(summarise_output_topics, inputs=[summary_reference_table_sample_state, master_unique_topics_df_state, master_reference_df_state, model_choice, google_api_key_textbox, temperature_slide, working_data_file_name_textbox, summarised_outputs_list, latest_summary_completed_num, conversation_metadata_textbox, in_data_files, in_excel_sheets, in_colnames, log_files_output_list_state, summarise_format_radio, output_folder_state, context_textbox, aws_access_key_textbox, aws_secret_key_textbox, model_name_map_state, hf_api_key_textbox, azure_endpoint_textbox, logged_content_df], outputs=[summary_reference_table_sample_state, master_unique_topics_df_revised_summaries_state, master_reference_df_revised_summaries_state, summary_output_files, summarised_outputs_list, latest_summary_completed_num, conversation_metadata_textbox, summarised_output_markdown, log_files_output, overall_summarisation_input_files, input_tokens_num, output_tokens_num, number_of_calls_num, estimated_time_taken_number, output_messages_textbox, logged_content_df], api_name="summarise_topics", show_progress_on=[output_messages_textbox, summary_output_files]).\
                 success(lambda *args: usage_callback.flag(list(args), save_to_csv=SAVE_LOGS_TO_CSV, save_to_dynamodb=SAVE_LOGS_TO_DYNAMODB,  dynamodb_table_name=USAGE_LOG_DYNAMODB_TABLE_NAME, dynamodb_headers=DYNAMODB_USAGE_LOG_HEADERS, replacement_headers=CSV_USAGE_LOG_HEADERS), [session_hash_textbox, original_data_file_name_textbox, in_colnames, model_choice, conversation_metadata_textbox_placeholder, input_tokens_num, output_tokens_num, number_of_calls_num, estimated_time_taken_number, cost_code_choice_drop], None, preprocess=False).\
                 then(collect_output_csvs_and_create_excel_output, inputs=[in_data_files, in_colnames, original_data_file_name_textbox, in_group_col, model_choice, master_reference_df_revised_summaries_state, master_unique_topics_df_revised_summaries_state, summarised_output_df, missing_df_state, in_excel_sheets, usage_logs_state, model_name_map_state, output_folder_state, produce_structured_summary_radio], outputs=[summary_output_files_xlsx, summary_xlsx_output_files_list])
     # SUMMARISE WHOLE TABLE PAGE
     overall_summarise_previous_data_btn.click(fn= enforce_cost_codes, inputs=[enforce_cost_code_textbox, cost_code_choice_drop, cost_code_dataframe_base]).\
             success(load_in_previous_data_files, inputs=[overall_summarisation_input_files], outputs=[master_reference_df_state, master_unique_topics_df_state, latest_batch_completed_no_loop, deduplication_input_files_status, working_data_file_name_textbox, unique_topics_table_file_name_textbox]).\
+            success(overall_summary, inputs=[master_unique_topics_df_state, model_choice, google_api_key_textbox, temperature_slide, working_data_file_name_textbox, output_folder_state, in_colnames, context_textbox, aws_access_key_textbox, aws_secret_key_textbox, model_name_map_state, hf_api_key_textbox, azure_endpoint_textbox, logged_content_df], outputs=[overall_summary_output_files, overall_summarised_output_markdown, summarised_output_df, conversation_metadata_textbox, input_tokens_num, output_tokens_num, number_of_calls_num, estimated_time_taken_number, output_messages_textbox, logged_content_df], scroll_to_output=True, api_name="overall_summary", show_progress_on=[output_messages_textbox, overall_summary_output_files]).\
             success(lambda *args: usage_callback.flag(list(args), save_to_csv=SAVE_LOGS_TO_CSV, save_to_dynamodb=SAVE_LOGS_TO_DYNAMODB,  dynamodb_table_name=USAGE_LOG_DYNAMODB_TABLE_NAME, dynamodb_headers=DYNAMODB_USAGE_LOG_HEADERS, replacement_headers=CSV_USAGE_LOG_HEADERS), [session_hash_textbox, original_data_file_name_textbox, in_colnames, model_choice, conversation_metadata_textbox_placeholder, input_tokens_num, output_tokens_num, number_of_calls_num, estimated_time_taken_number, cost_code_choice_drop], None, preprocess=False).\
             then(collect_output_csvs_and_create_excel_output, inputs=[in_data_files, in_colnames, original_data_file_name_textbox, in_group_col, model_choice, master_reference_df_state, master_unique_topics_df_state, summarised_output_df, missing_df_state, in_excel_sheets, usage_logs_state, model_name_map_state, output_folder_state, produce_structured_summary_radio], outputs=[overall_summary_output_files_xlsx, summary_xlsx_output_files_list])
                 aws_secret_key_textbox,
                 hf_api_key_textbox,
                 azure_api_key_textbox,
+                azure_endpoint_textbox,
                 output_folder_state,
                 merge_sentiment_drop,
                 merge_general_topics_drop,

pyproject.toml CHANGED Viewed

@@ -17,8 +17,7 @@ dependencies = [
     "tabulate==0.9.0",
     "lxml==5.3.0",
     "google-genai==1.33.0",
-    "azure-ai-inference==1.0.0b9",
-    "azure-core==1.35.0",
     "html5lib==1.1",
     "beautifulsoup4==4.12.3",
     "rapidfuzz==3.13.0",

     "tabulate==0.9.0",
     "lxml==5.3.0",
     "google-genai==1.33.0",
+    "openai==2.2.0",
     "html5lib==1.1",
     "beautifulsoup4==4.12.3",
     "rapidfuzz==3.13.0",

requirements.txt CHANGED Viewed

@@ -10,8 +10,7 @@ markdown==3.7
 tabulate==0.9.0
 lxml==5.3.0
 google-genai==1.33.0
-azure-ai-inference==1.0.0b9
-azure-core==1.35.0
 html5lib==1.1
 beautifulsoup4==4.12.3
 rapidfuzz==3.13.0

 tabulate==0.9.0
 lxml==5.3.0
 google-genai==1.33.0
+openai==2.2.0
 html5lib==1.1
 beautifulsoup4==4.12.3
 rapidfuzz==3.13.0

requirements_cpu.txt CHANGED Viewed

@@ -9,8 +9,7 @@ markdown==3.7
 tabulate==0.9.0
 lxml==5.3.0
 google-genai==1.33.0
-azure-ai-inference==1.0.0b9
-azure-core==1.35.0
 html5lib==1.1
 beautifulsoup4==4.12.3
 rapidfuzz==3.13.0

 tabulate==0.9.0
 lxml==5.3.0
 google-genai==1.33.0
+openai==2.2.0
 html5lib==1.1
 beautifulsoup4==4.12.3
 rapidfuzz==3.13.0

requirements_gpu.txt CHANGED Viewed

@@ -9,8 +9,7 @@ markdown==3.7
 tabulate==0.9.0
 lxml==5.3.0
 google-genai==1.33.0
-azure-ai-inference==1.0.0b9
-azure-core==1.35.0
 html5lib==1.1
 beautifulsoup4==4.12.3
 rapidfuzz==3.13.0

 tabulate==0.9.0
 lxml==5.3.0
 google-genai==1.33.0
+openai==2.2.0
 html5lib==1.1
 beautifulsoup4==4.12.3
 rapidfuzz==3.13.0

requirements_no_local.txt CHANGED Viewed

@@ -10,8 +10,7 @@ markdown==3.7
 tabulate==0.9.0
 lxml==5.3.0
 google-genai==1.33.0
-azure-ai-inference==1.0.0b9
-azure-core==1.35.0
 html5lib==1.1
 beautifulsoup4==4.12.3
 rapidfuzz==3.13.0

 tabulate==0.9.0
 lxml==5.3.0
 google-genai==1.33.0
+openai==2.2.0
 html5lib==1.1
 beautifulsoup4==4.12.3
 rapidfuzz==3.13.0

tools/config.py CHANGED Viewed

@@ -2,6 +2,8 @@ import os
 import tempfile
 import socket
 import logging
 from datetime import datetime
 from dotenv import load_dotenv
@@ -217,10 +219,10 @@ RUN_AWS_BEDROCK_MODELS = get_or_create_env_var("RUN_AWS_BEDROCK_MODELS", "1")
 RUN_GEMINI_MODELS = get_or_create_env_var("RUN_GEMINI_MODELS", "1")
 GEMINI_API_KEY = get_or_create_env_var('GEMINI_API_KEY', '')
-# Azure AI Inference settings
-RUN_AZURE_MODELS = get_or_create_env_var("RUN_AZURE_MODELS", "0")
-AZURE_API_KEY = get_or_create_env_var('AZURE_API_KEY', '')
-AZURE_INFERENCE_ENDPOINT = get_or_create_env_var('AZURE_INFERENCE_ENDPOINT', '')
 # Build up options for models
@@ -236,27 +238,48 @@ if RUN_LOCAL_MODEL == "1" and CHOSEN_LOCAL_MODEL_TYPE:
     model_source.append("Local")
 if RUN_AWS_BEDROCK_MODELS == "1":
-    model_full_names.extend(["anthropic.claude-3-haiku-20240307-v1:0", "anthropic.claude-3-7-sonnet-20250219-v1:0", "amazon.nova-micro-v1:0", "amazon.nova-lite-v1:0", "amazon.nova-pro-v1:0"])
-    model_short_names.extend(["haiku", "sonnet", "nova_micro", "nova_lite", "nova_pro"])
-    model_source.extend(["AWS", "AWS", "AWS", "AWS", "AWS"])
 if RUN_GEMINI_MODELS == "1":
-    model_full_names.extend(["gemini-2.5-flash-lite", "gemini-2.5-flash", "gemini-2.5-pro"])
     model_short_names.extend(["gemini_flash_lite_2.5", "gemini_flash_2.5", "gemini_pro"])
-    model_source.extend(["Gemini", "Gemini", "Gemini"])
-# Register Azure AI models (model names must match your Azure deployments)
 if RUN_AZURE_MODELS == "1":
-    # Example deployments; adjust to the deployments you actually create in Azure
-    model_full_names.extend(["gpt-5-mini"])
-    model_short_names.extend(["gpt-5-mini"])
-    model_source.extend(["Azure"])
 model_name_map = {
     full: {"short_name": short, "source": source}
     for full, short, source in zip(model_full_names, model_short_names, model_source)
 }
 #print("model_name_map:", model_name_map)
 # HF token may or may not be needed for downloading models from Hugging Face
@@ -453,4 +476,44 @@ else: OUTPUT_COST_CODES_PATH = 'config/cost_codes.csv'
 ENFORCE_COST_CODES = get_or_create_env_var('ENFORCE_COST_CODES', 'False') # If you have cost codes listed, is it compulsory to choose one before redacting?
-if ENFORCE_COST_CODES == 'True': GET_COST_CODES = 'True'

 import tempfile
 import socket
 import logging
+import codecs
+from typing import List
 from datetime import datetime
 from dotenv import load_dotenv
 RUN_GEMINI_MODELS = get_or_create_env_var("RUN_GEMINI_MODELS", "1")
 GEMINI_API_KEY = get_or_create_env_var('GEMINI_API_KEY', '')
+# Azure/OpenAI AI Inference settings
+RUN_AZURE_MODELS = get_or_create_env_var("RUN_AZURE_MODELS", "1")
+AZURE_OPENAI_API_KEY = get_or_create_env_var('AZURE_OPENAI_API_KEY', '')
+AZURE_OPENAI_INFERENCE_ENDPOINT = get_or_create_env_var('AZURE_OPENAI_INFERENCE_ENDPOINT', '')
 # Build up options for models
     model_source.append("Local")
 if RUN_AWS_BEDROCK_MODELS == "1":
+    amazon_models = ["anthropic.claude-3-haiku-20240307-v1:0", "anthropic.claude-3-7-sonnet-20250219-v1:0", "anthropic.claude-sonnet-4-5-20250929-v1:0", "amazon.nova-micro-v1:0", "amazon.nova-lite-v1:0", "amazon.nova-pro-v1:0",  "deepseek.v3-v1:0", "openai.gpt-oss-20b-1:0", "openai.gpt-oss-120b-1:0"]
+    model_full_names.extend(amazon_models)
+    model_short_names.extend(["haiku", "sonnet_3_7", "sonnet_4_5", "nova_micro", "nova_lite", "nova_pro", "deepseek_v3", "gpt_oss_20b_aws", "gpt_oss_120b_aws"])
+    model_source.extend(["AWS"] * len(amazon_models))
 if RUN_GEMINI_MODELS == "1":
+    gemini_models = ["gemini-2.5-flash-lite", "gemini-2.5-flash", "gemini-2.5-pro"]
+    model_full_names.extend(gemini_models)
     model_short_names.extend(["gemini_flash_lite_2.5", "gemini_flash_2.5", "gemini_pro"])
+    model_source.extend(["Gemini"] * len(gemini_models))
+# Register Azure/OpenAI AI models (model names must match your Azure/OpenAI deployments)
 if RUN_AZURE_MODELS == "1":
+    # Example deployments; adjust to the deployments you actually create in Azure/OpenAI
+    azure_models = ["gpt-5-mini", "gpt-4o-mini"]
+    model_full_names.extend(azure_models)
+    model_short_names.extend(["gpt-5-mini", "gpt-4o-mini"])
+    model_source.extend(["Azure/OpenAI"] * len(azure_models))
 model_name_map = {
     full: {"short_name": short, "source": source}
     for full, short, source in zip(model_full_names, model_short_names, model_source)
 }
+if RUN_LOCAL_MODEL == "1": default_model_choice = CHOSEN_LOCAL_MODEL_TYPE
+elif RUN_AWS_FUNCTIONS == "1": default_model_choice = amazon_models[0]
+else: default_model_choice = gemini_models[0]
+default_model_source = model_name_map[default_model_choice]["source"]
+model_sources = list(set([model_name_map[model]["source"] for model in model_full_names]))
+def update_model_choice_config(default_model_source, model_name_map):
+    # Filter models by source and return the first matching model name
+    matching_models = [model_name for model_name, model_info in model_name_map.items()
+                    if model_info["source"] == default_model_source]
+    output_model = matching_models[0] if matching_models else model_full_names[0]
+    return output_model, matching_models
+default_model_choice, default_source_models = update_model_choice_config(default_model_source, model_name_map)
 #print("model_name_map:", model_name_map)
 # HF token may or may not be needed for downloading models from Hugging Face
 ENFORCE_COST_CODES = get_or_create_env_var('ENFORCE_COST_CODES', 'False') # If you have cost codes listed, is it compulsory to choose one before redacting?
+if ENFORCE_COST_CODES == 'True': GET_COST_CODES = 'True'
+###
+# VALIDATE FOLDERS AND CONFIG OPTIONS
+###
+def ensure_folder_exists(output_folder:str):
+    """Checks if the specified folder exists, creates it if not."""
+    if not os.path.exists(output_folder):
+        # Create the folder if it doesn't exist
+        os.makedirs(output_folder, exist_ok=True)
+        print(f"Created the {output_folder} folder.")
+    else:
+        pass
+            #print(f"The {output_folder} folder already exists.")
+def _get_env_list(env_var_name: str, strip_strings:bool=True) -> List[str]:
+    """Parses a comma-separated environment variable into a list of strings."""
+    value = env_var_name[1:-1].strip().replace('\"', '').replace("\'","")
+    if not value:
+        return []
+    # Split by comma and filter out any empty strings that might result from extra commas
+    if strip_strings:
+        return [s.strip() for s in value.split(',') if s.strip()]
+    else:
+        return [codecs.decode(s, 'unicode_escape') for s in value.split(',') if s]
+# Convert string environment variables to string or list
+if SAVE_LOGS_TO_CSV == "True": SAVE_LOGS_TO_CSV = True
+else: SAVE_LOGS_TO_CSV = False
+if SAVE_LOGS_TO_DYNAMODB == "True": SAVE_LOGS_TO_DYNAMODB = True
+else: SAVE_LOGS_TO_DYNAMODB = False
+if CSV_ACCESS_LOG_HEADERS: CSV_ACCESS_LOG_HEADERS = _get_env_list(CSV_ACCESS_LOG_HEADERS)
+if CSV_FEEDBACK_LOG_HEADERS: CSV_FEEDBACK_LOG_HEADERS = _get_env_list(CSV_FEEDBACK_LOG_HEADERS)
+if CSV_USAGE_LOG_HEADERS: CSV_USAGE_LOG_HEADERS = _get_env_list(CSV_USAGE_LOG_HEADERS)
+if DYNAMODB_ACCESS_LOG_HEADERS: DYNAMODB_ACCESS_LOG_HEADERS = _get_env_list(DYNAMODB_ACCESS_LOG_HEADERS)
+if DYNAMODB_FEEDBACK_LOG_HEADERS: DYNAMODB_FEEDBACK_LOG_HEADERS = _get_env_list(DYNAMODB_FEEDBACK_LOG_HEADERS)
+if DYNAMODB_USAGE_LOG_HEADERS: DYNAMODB_USAGE_LOG_HEADERS = _get_env_list(DYNAMODB_USAGE_LOG_HEADERS)

tools/dedup_summaries.py CHANGED Viewed

@@ -11,10 +11,10 @@ from tqdm import tqdm
 import os
 from tools.llm_api_call import generate_zero_shot_topics_df
 from tools.prompts import summarise_topic_descriptions_prompt, summarise_topic_descriptions_system_prompt, system_prompt, summarise_everything_prompt, comprehensive_summary_format_prompt, summarise_everything_system_prompt, comprehensive_summary_format_prompt_by_group, summary_assistant_prefill, llm_deduplication_system_prompt, llm_deduplication_prompt, llm_deduplication_prompt_with_candidates
-from tools.llm_funcs import construct_gemini_generative_model, process_requests, ResponseObject, load_model, calculate_tokens_from_metadata, construct_azure_client, get_model, get_tokenizer, get_assistant_model, send_request, construct_gemini_generative_model, construct_azure_client, call_llm_with_markdown_table_checks
 from tools.helper_functions import create_topic_summary_df_from_reference_table, load_in_data_file, get_basic_response_data, convert_reference_table_to_pivot_table, wrap_text, clean_column_name, get_file_name_no_ext, create_batch_file_path_details, read_file
 from tools.aws_functions import connect_to_bedrock_runtime
-from tools.config import OUTPUT_FOLDER, RUN_LOCAL_MODEL, MAX_COMMENT_CHARS, LLM_MAX_NEW_TOKENS, LLM_SEED, TIMEOUT_WAIT, NUMBER_OF_RETRY_ATTEMPTS, MAX_TIME_FOR_LOOP, BATCH_SIZE_DEFAULT, DEDUPLICATION_THRESHOLD, model_name_map, CHOSEN_LOCAL_MODEL_TYPE, LOCAL_REPO_ID, LOCAL_MODEL_FILE, LOCAL_MODEL_FOLDER, REASONING_SUFFIX, AZURE_INFERENCE_ENDPOINT, MAX_SPACES_GPU_RUN_TIME, OUTPUT_DEBUG_FILES
 max_tokens = LLM_MAX_NEW_TOKENS
 timeout_wait = TIMEOUT_WAIT
@@ -393,7 +393,8 @@ def deduplicate_topics_llm(reference_df:pd.DataFrame,
                           in_data_files:List[str]=list(),
                           chosen_cols:List[str]="",
                           output_folder:str=OUTPUT_FOLDER,
-                          candidate_topics=None
                           ):
     '''
     Deduplicate topics using LLM semantic understanding to identify and merge similar topics.
@@ -501,7 +502,7 @@ def deduplicate_topics_llm(reference_df:pd.DataFrame,
     # Set up model clients based on model source
     if "Gemini" in model_source:
-        google_client, config = construct_gemini_generative_model(
             in_api_key, temperature, model_choice, llm_deduplication_system_prompt,
             max_tokens, LLM_SEED
         )
@@ -509,13 +510,13 @@ def deduplicate_topics_llm(reference_df:pd.DataFrame,
     elif "AWS" in model_source:
         if not bedrock_runtime:
             bedrock_runtime = boto3.client('bedrock-runtime')
-        google_client = None
         config = None
-    elif "Azure" in model_source:
-        google_client, config = construct_azure_client(in_api_key, "")
         bedrock_runtime = None
     elif "Local" in model_source:
-        google_client = None
         config = None
         bedrock_runtime = None
     else:
@@ -531,8 +532,8 @@ def deduplicate_topics_llm(reference_df:pd.DataFrame,
         conversation_history=conversation_history,
         whole_conversation=whole_conversation,
         whole_conversation_metadata=whole_conversation_metadata,
-        google_client=google_client,
-        google_config=config,
         model_choice=model_choice,
         temperature=temperature,
         reported_batch_no=1,
@@ -758,7 +759,7 @@ def sample_reference_table_summaries(reference_df:pd.DataFrame,
     return sampled_reference_table_df, summarised_references_markdown#, reference_df, topic_summary_df
-def summarise_output_topics_query(model_choice:str, in_api_key:str, temperature:float, formatted_summary_prompt:str, summarise_topic_descriptions_system_prompt:str, model_source:str, bedrock_runtime:boto3.Session.client, local_model=list(), tokenizer=list(), assistant_model=list()):
     """
     Query an LLM to generate a summary of topics based on the provided prompts.
@@ -780,16 +781,15 @@ def summarise_output_topics_query(model_choice:str, in_api_key:str, temperature:
     """
     conversation_history = list()
     whole_conversation_metadata = list()
-    google_client = list()
-    google_config = {}
     # Prepare Gemini models before query
     if "Gemini" in model_source:
         #print("Using Gemini model:", model_choice)
-        google_client, config = construct_gemini_generative_model(in_api_key=in_api_key, temperature=temperature, model_choice=model_choice, system_prompt=system_prompt, max_tokens=max_tokens)
-    elif "Azure" in model_source:
-        # Azure client (endpoint from env/config)
-        google_client, config = construct_azure_client(in_api_key=os.environ.get("AZURE_INFERENCE_CREDENTIAL", ""), endpoint=AZURE_INFERENCE_ENDPOINT)
     elif "Local" in model_source:
         pass
         #print("Using local model: ", model_choice)
@@ -800,7 +800,7 @@ def summarise_output_topics_query(model_choice:str, in_api_key:str, temperature:
     whole_conversation = [summarise_topic_descriptions_system_prompt]
     # Process requests to large language model
-    responses, conversation_history, whole_conversation, whole_conversation_metadata, response_text = process_requests(formatted_summary_prompt, system_prompt, conversation_history, whole_conversation, whole_conversation_metadata, google_client, google_config, model_choice, temperature, bedrock_runtime=bedrock_runtime, model_source=model_source, local_model=local_model, tokenizer=tokenizer, assistant_model=assistant_model, assistant_prefill=summary_assistant_prefill)
     summarised_output = re.sub(r'\n{2,}', '\n', response_text)  # Replace multiple line breaks with a single line break
     summarised_output = re.sub(r'^\n{1,}', '', summarised_output)  # Remove one or more line breaks at the start
@@ -898,6 +898,7 @@ def summarise_output_topics(sampled_reference_table_df:pd.DataFrame,
                             aws_secret_key_textbox:str='',
                             model_name_map:dict=model_name_map,
                             hf_api_key_textbox:str='',
                             existing_logged_content:list=list(),
                             output_debug_files:str=output_debug_files,
                             reasoning_suffix:str=reasoning_suffix,
@@ -1041,7 +1042,7 @@ def summarise_output_topics(sampled_reference_table_df:pd.DataFrame,
             if "Local" in model_source and reasoning_suffix: formatted_summarise_topic_descriptions_system_prompt = formatted_summarise_topic_descriptions_system_prompt + "\n" + reasoning_suffix
             try:
-                response, conversation_history, metadata, response_text = summarise_output_topics_query(model_choice, in_api_key, temperature, formatted_summary_prompt, formatted_summarise_topic_descriptions_system_prompt, model_source, bedrock_runtime, local_model, tokenizer=tokenizer, assistant_model=assistant_model)
                 summarised_output = response_text
             except Exception as e:
                 print("Creating summary failed:", e)
@@ -1183,6 +1184,7 @@ def overall_summary(topic_summary_df:pd.DataFrame,
                     aws_secret_key_textbox:str='',
                     model_name_map:dict=model_name_map,
                     hf_api_key_textbox:str='',
                     existing_logged_content:list=list(),
                     output_debug_files:str=output_debug_files,
                     log_output_files:list=list(),
@@ -1313,7 +1315,7 @@ def overall_summary(topic_summary_df:pd.DataFrame,
             if "Local" in model_source and reasoning_suffix: formatted_summarise_everything_system_prompt = formatted_summarise_everything_system_prompt + "\n" + reasoning_suffix
             try:
-                response, conversation_history, metadata, response_text = summarise_output_topics_query(model_choice, in_api_key, temperature, formatted_summary_prompt, formatted_summarise_everything_system_prompt, model_source, bedrock_runtime, local_model, tokenizer=tokenizer, assistant_model=assistant_model)
                 summarised_output_for_df = response_text
                 summarised_output = response
             except Exception as e:

 import os
 from tools.llm_api_call import generate_zero_shot_topics_df
 from tools.prompts import summarise_topic_descriptions_prompt, summarise_topic_descriptions_system_prompt, system_prompt, summarise_everything_prompt, comprehensive_summary_format_prompt, summarise_everything_system_prompt, comprehensive_summary_format_prompt_by_group, summary_assistant_prefill, llm_deduplication_system_prompt, llm_deduplication_prompt, llm_deduplication_prompt_with_candidates
+from tools.llm_funcs import construct_gemini_generative_model, process_requests, calculate_tokens_from_metadata, construct_azure_client, get_model, get_tokenizer, get_assistant_model, construct_gemini_generative_model, construct_azure_client, call_llm_with_markdown_table_checks
 from tools.helper_functions import create_topic_summary_df_from_reference_table, load_in_data_file, get_basic_response_data, convert_reference_table_to_pivot_table, wrap_text, clean_column_name, get_file_name_no_ext, create_batch_file_path_details, read_file
 from tools.aws_functions import connect_to_bedrock_runtime
+from tools.config import OUTPUT_FOLDER, RUN_LOCAL_MODEL, MAX_COMMENT_CHARS, LLM_MAX_NEW_TOKENS, LLM_SEED, TIMEOUT_WAIT, NUMBER_OF_RETRY_ATTEMPTS, MAX_TIME_FOR_LOOP, BATCH_SIZE_DEFAULT, DEDUPLICATION_THRESHOLD, model_name_map, CHOSEN_LOCAL_MODEL_TYPE, LOCAL_REPO_ID, LOCAL_MODEL_FILE, LOCAL_MODEL_FOLDER, REASONING_SUFFIX, AZURE_OPENAI_INFERENCE_ENDPOINT, MAX_SPACES_GPU_RUN_TIME, OUTPUT_DEBUG_FILES
 max_tokens = LLM_MAX_NEW_TOKENS
 timeout_wait = TIMEOUT_WAIT
                           in_data_files:List[str]=list(),
                           chosen_cols:List[str]="",
                           output_folder:str=OUTPUT_FOLDER,
+                          candidate_topics=None,
+                          azure_endpoint:str=""
                           ):
     '''
     Deduplicate topics using LLM semantic understanding to identify and merge similar topics.
     # Set up model clients based on model source
     if "Gemini" in model_source:
+        client, config = construct_gemini_generative_model(
             in_api_key, temperature, model_choice, llm_deduplication_system_prompt,
             max_tokens, LLM_SEED
         )
     elif "AWS" in model_source:
         if not bedrock_runtime:
             bedrock_runtime = boto3.client('bedrock-runtime')
+        client = None
         config = None
+    elif "Azure/OpenAI" in model_source:
+        client, config = construct_azure_client(in_api_key, azure_endpoint)
         bedrock_runtime = None
     elif "Local" in model_source:
+        client = None
         config = None
         bedrock_runtime = None
     else:
         conversation_history=conversation_history,
         whole_conversation=whole_conversation,
         whole_conversation_metadata=whole_conversation_metadata,
+        client=client,
+        client_config=config,
         model_choice=model_choice,
         temperature=temperature,
         reported_batch_no=1,
     return sampled_reference_table_df, summarised_references_markdown#, reference_df, topic_summary_df
+def summarise_output_topics_query(model_choice:str, in_api_key:str, temperature:float, formatted_summary_prompt:str, summarise_topic_descriptions_system_prompt:str, model_source:str, bedrock_runtime:boto3.Session.client, local_model=list(), tokenizer=list(), assistant_model=list(), azure_endpoint:str=""):
     """
     Query an LLM to generate a summary of topics based on the provided prompts.
     """
     conversation_history = list()
     whole_conversation_metadata = list()
+    client = list()
+    client_config = {}
     # Prepare Gemini models before query
     if "Gemini" in model_source:
         #print("Using Gemini model:", model_choice)
+        client, config = construct_gemini_generative_model(in_api_key=in_api_key, temperature=temperature, model_choice=model_choice, system_prompt=system_prompt, max_tokens=max_tokens)
+    elif "Azure/OpenAI" in model_source:
+        client, config = construct_azure_client(in_api_key=os.environ.get("AZURE_INFERENCE_CREDENTIAL", ""), endpoint=azure_endpoint)
     elif "Local" in model_source:
         pass
         #print("Using local model: ", model_choice)
     whole_conversation = [summarise_topic_descriptions_system_prompt]
     # Process requests to large language model
+    responses, conversation_history, whole_conversation, whole_conversation_metadata, response_text = process_requests(formatted_summary_prompt, system_prompt, conversation_history, whole_conversation, whole_conversation_metadata, client, client_config, model_choice, temperature, bedrock_runtime=bedrock_runtime, model_source=model_source, local_model=local_model, tokenizer=tokenizer, assistant_model=assistant_model, assistant_prefill=summary_assistant_prefill)
     summarised_output = re.sub(r'\n{2,}', '\n', response_text)  # Replace multiple line breaks with a single line break
     summarised_output = re.sub(r'^\n{1,}', '', summarised_output)  # Remove one or more line breaks at the start
                             aws_secret_key_textbox:str='',
                             model_name_map:dict=model_name_map,
                             hf_api_key_textbox:str='',
+                            azure_endpoint_textbox:str='',
                             existing_logged_content:list=list(),
                             output_debug_files:str=output_debug_files,
                             reasoning_suffix:str=reasoning_suffix,
             if "Local" in model_source and reasoning_suffix: formatted_summarise_topic_descriptions_system_prompt = formatted_summarise_topic_descriptions_system_prompt + "\n" + reasoning_suffix
             try:
+                response, conversation_history, metadata, response_text = summarise_output_topics_query(model_choice, in_api_key, temperature, formatted_summary_prompt, formatted_summarise_topic_descriptions_system_prompt, model_source, bedrock_runtime, local_model, tokenizer=tokenizer, assistant_model=assistant_model, azure_endpoint=azure_endpoint_textbox)
                 summarised_output = response_text
             except Exception as e:
                 print("Creating summary failed:", e)
                     aws_secret_key_textbox:str='',
                     model_name_map:dict=model_name_map,
                     hf_api_key_textbox:str='',
+                    azure_endpoint_textbox:str='',
                     existing_logged_content:list=list(),
                     output_debug_files:str=output_debug_files,
                     log_output_files:list=list(),
             if "Local" in model_source and reasoning_suffix: formatted_summarise_everything_system_prompt = formatted_summarise_everything_system_prompt + "\n" + reasoning_suffix
             try:
+                response, conversation_history, metadata, response_text = summarise_output_topics_query(model_choice, in_api_key, temperature, formatted_summary_prompt, formatted_summarise_everything_system_prompt, model_source, bedrock_runtime, local_model, tokenizer=tokenizer, assistant_model=assistant_model, azure_endpoint=azure_endpoint_textbox)
                 summarised_output_for_df = response_text
                 summarised_output = response
             except Exception as e:

tools/helper_functions.py CHANGED Viewed

@@ -6,8 +6,9 @@ import pandas as pd
 import numpy as np
 from typing import List
 import math
 from botocore.exceptions import ClientError
-from tools.config import OUTPUT_FOLDER, INPUT_FOLDER, SESSION_OUTPUT_FOLDER, CUSTOM_HEADER, CUSTOM_HEADER_VALUE, AWS_USER_POOL_ID, MAXIMUM_ZERO_SHOT_TOPICS
 def empty_output_vars_extract_topics():
     # Empty output objects before processing a new file
@@ -745,8 +746,6 @@ def enforce_cost_codes(enforce_cost_code_textbox:str, cost_code_choice:str, cost
                     raise Exception("Selected cost code not found in list. Please contact Finance if you cannot find the correct cost code from the given list of suggestions.")
     return
-import codecs
 def _get_env_list(env_var_name: str, strip_strings:bool=True) -> List[str]:
     """Parses a comma-separated environment variable into a list of strings."""
     value = env_var_name[1:-1].strip().replace('\"', '').replace("\'","")
@@ -898,4 +897,13 @@ def generate_zero_shot_topics_df(zero_shot_topics:pd.DataFrame,
                 "Description": zero_shot_topics_description_list
                 })
-        return zero_shot_topics_df

 import numpy as np
 from typing import List
 import math
+import codecs
 from botocore.exceptions import ClientError
+from tools.config import OUTPUT_FOLDER, INPUT_FOLDER, SESSION_OUTPUT_FOLDER, CUSTOM_HEADER, CUSTOM_HEADER_VALUE, AWS_USER_POOL_ID, MAXIMUM_ZERO_SHOT_TOPICS, model_name_map, model_full_names
 def empty_output_vars_extract_topics():
     # Empty output objects before processing a new file
                     raise Exception("Selected cost code not found in list. Please contact Finance if you cannot find the correct cost code from the given list of suggestions.")
     return
 def _get_env_list(env_var_name: str, strip_strings:bool=True) -> List[str]:
     """Parses a comma-separated environment variable into a list of strings."""
     value = env_var_name[1:-1].strip().replace('\"', '').replace("\'","")
                 "Description": zero_shot_topics_description_list
                 })
+        return zero_shot_topics_df
+def update_model_choice(model_source):
+    # Filter models by source and return the first matching model name
+    matching_models = [model_name for model_name, model_info in model_name_map.items()
+                    if model_info["source"] == model_source]
+    output_model = matching_models[0] if matching_models else model_full_names[0]
+    return gr.Dropdown(value = output_model, choices = matching_models, label="Large language model for topic extraction and summarisation", multiselect=False)

tools/llm_api_call.py CHANGED Viewed

@@ -18,7 +18,7 @@ GradioFileData = gr.FileData
 from tools.prompts import initial_table_prompt, initial_table_system_prompt, add_existing_topics_system_prompt, add_existing_topics_prompt,  force_existing_topics_prompt, allow_new_topics_prompt, force_single_topic_prompt, add_existing_topics_assistant_prefill, initial_table_assistant_prefill, structured_summary_prompt, default_response_reference_format, negative_neutral_positive_sentiment_prompt, negative_or_positive_sentiment_prompt,  default_sentiment_prompt
 from tools.helper_functions import read_file, put_columns_in_df, wrap_text, initial_clean, load_in_data_file, load_in_file, create_topic_summary_df_from_reference_table, convert_reference_table_to_pivot_table, get_basic_response_data, clean_column_name, load_in_previous_data_files, create_batch_file_path_details, move_overall_summary_output_files_to_front_page, generate_zero_shot_topics_df
 from tools.llm_funcs import ResponseObject, construct_gemini_generative_model, call_llm_with_markdown_table_checks, create_missing_references_df, calculate_tokens_from_metadata, construct_azure_client, get_model, get_tokenizer, get_assistant_model
-from tools.config import RUN_LOCAL_MODEL, AWS_REGION, MAX_COMMENT_CHARS, MAX_OUTPUT_VALIDATION_ATTEMPTS, LLM_MAX_NEW_TOKENS, TIMEOUT_WAIT, NUMBER_OF_RETRY_ATTEMPTS, MAX_TIME_FOR_LOOP, BATCH_SIZE_DEFAULT, DEDUPLICATION_THRESHOLD, model_name_map, OUTPUT_FOLDER, CHOSEN_LOCAL_MODEL_TYPE, LOCAL_REPO_ID, LOCAL_MODEL_FILE, LOCAL_MODEL_FOLDER, LLM_SEED, MAX_GROUPS, REASONING_SUFFIX, AZURE_INFERENCE_ENDPOINT, MAX_ROWS, MAXIMUM_ZERO_SHOT_TOPICS, MAX_SPACES_GPU_RUN_TIME, OUTPUT_DEBUG_FILES
 from tools.aws_functions import connect_to_bedrock_runtime
 from tools.dedup_summaries import sample_reference_table_summaries, summarise_output_topics, deduplicate_topics, overall_summary, process_debug_output_iteration
 from tools.combine_sheets_into_xlsx import collect_output_csvs_and_create_excel_output
@@ -620,6 +620,7 @@ def extract_topics(in_data_file: GradioFileData,
               aws_secret_key_textbox:str='',
               hf_api_key_textbox:str='',
               azure_api_key_textbox:str='',
               max_tokens:int=max_tokens,
               model_name_map:dict=model_name_map,
               existing_logged_content:list=list(),
@@ -635,7 +636,7 @@ def extract_topics(in_data_file: GradioFileData,
               progress=Progress(track_tqdm=False)):
     '''
-    Query an LLM (local, (Gemma/GPT-OSS if local, Gemini, AWS Bedrock or Azure AI Inference) with up to three prompts about a table of open text data. Up to 'batch_size' rows will be queried at a time.
     Parameters:
     - in_data_file (gr.File): Gradio file object containing input data
@@ -693,8 +694,8 @@ def extract_topics(in_data_file: GradioFileData,
     tic = time.perf_counter()
-    google_client = list()
-    google_config = {}
     final_time = 0.0
     whole_conversation_metadata = list()
     is_error = False
@@ -822,13 +823,13 @@ def extract_topics(in_data_file: GradioFileData,
                     # Prepare clients before query
                     if "Gemini" in model_source:
                         #print("Using Gemini model:", model_choice)
-                        google_client, google_config = construct_gemini_generative_model(in_api_key=in_api_key, temperature=temperature, model_choice=model_choice, system_prompt=formatted_system_prompt, max_tokens=max_tokens)
-                    elif "Azure" in model_source:
-                        #print("Using Azure AI Inference model:", model_choice)
                         # If provided, set env for downstream calls too
                         if azure_api_key_textbox:
                             os.environ["AZURE_INFERENCE_CREDENTIAL"] = azure_api_key_textbox
-                        google_client, google_config = construct_azure_client(in_api_key=azure_api_key_textbox, endpoint=AZURE_INFERENCE_ENDPOINT)
                     elif "anthropic.claude" in model_choice:
                         #print("Using AWS Bedrock model:", model_choice)
                         pass
@@ -949,7 +950,7 @@ def extract_topics(in_data_file: GradioFileData,
                     whole_conversation = list()
                     # Process requests to large language model
-                    responses, conversation_history, whole_conversation, whole_conversation_metadata, response_text = call_llm_with_markdown_table_checks(summary_prompt_list, formatted_system_prompt, conversation_history, whole_conversation, whole_conversation_metadata, google_client, google_config, model_choice, temperature, reported_batch_no, local_model, tokenizer, bedrock_runtime, model_source, MAX_OUTPUT_VALIDATION_ATTEMPTS, assistant_prefill=add_existing_topics_assistant_prefill,  master = True)
                     # Return output tables
                     topic_table_out_path, reference_table_out_path, topic_summary_df_out_path, new_topic_df, new_reference_df, new_topic_summary_df, master_batch_out_file_part, is_error = write_llm_output_and_logs(response_text, whole_conversation, whole_conversation_metadata, file_name, latest_batch_completed, start_row, end_row, model_choice_clean, temperature, log_files_output_paths, existing_reference_df, existing_topic_summary_df, batch_size, chosen_cols, batch_basic_response_df, model_name_map, group_name, produce_structured_summary_radio, first_run=False, output_folder=output_folder)
@@ -1011,12 +1012,12 @@ def extract_topics(in_data_file: GradioFileData,
                     # Prepare Gemini models before query
                     if model_source == "Gemini":
                         print("Using Gemini model:", model_choice)
-                        google_client, google_config = construct_gemini_generative_model(in_api_key=in_api_key, temperature=temperature, model_choice=model_choice, system_prompt=formatted_system_prompt, max_tokens=max_tokens)
-                    elif model_source == "Azure":
-                        print("Using Azure AI Inference model:", model_choice)
                         if azure_api_key_textbox:
                             os.environ["AZURE_INFERENCE_CREDENTIAL"] = azure_api_key_textbox
-                        google_client, google_config = construct_azure_client(in_api_key=azure_api_key_textbox, endpoint=AZURE_INFERENCE_ENDPOINT)
                     elif model_choice == CHOSEN_LOCAL_MODEL_TYPE:
                         pass
                         #print("Using local model:", model_choice)
@@ -1038,7 +1039,7 @@ def extract_topics(in_data_file: GradioFileData,
                     whole_conversation = list()
-                    responses, conversation_history, whole_conversation, whole_conversation_metadata, response_text = call_llm_with_markdown_table_checks(batch_prompts, formatted_system_prompt, conversation_history, whole_conversation, whole_conversation_metadata, google_client, google_config, model_choice, temperature, reported_batch_no, local_model, tokenizer,bedrock_runtime, model_source, MAX_OUTPUT_VALIDATION_ATTEMPTS, assistant_prefill=initial_table_assistant_prefill)
                     topic_table_out_path, reference_table_out_path, topic_summary_df_out_path, topic_table_df, reference_df, new_topic_summary_df, batch_file_path_details, is_error =  write_llm_output_and_logs(response_text, whole_conversation, whole_conversation_metadata, file_name, latest_batch_completed, start_row, end_row, model_choice_clean, temperature, log_files_output_paths, existing_reference_df, existing_topic_summary_df, batch_size, chosen_cols, batch_basic_response_df, model_name_map, group_name, produce_structured_summary_radio, first_run=True, output_folder=output_folder)
@@ -1243,6 +1244,7 @@ def wrapper_extract_topics_per_column_value(
     aws_secret_key_textbox:str="",
     hf_api_key_textbox:str="",
     azure_api_key_textbox:str="",
     output_folder: str = OUTPUT_FOLDER,
     existing_logged_content:list=list(),
     additional_instructions_summary_format:str="",
@@ -1296,7 +1298,7 @@ def wrapper_extract_topics_per_column_value(
     :param aws_access_key_textbox: AWS access key for Bedrock.
     :param aws_secret_key_textbox: AWS secret key for Bedrock.
     :param hf_api_key_textbox: Hugging Face API key for local models.
-    :param azure_api_key_textbox: Azure API key for Azure AI Inference.
     :param output_folder: The folder where output files will be saved.
     :param existing_logged_content: A list of existing logged content.
     :param force_single_topic_prompt: Prompt for forcing a single topic.
@@ -1475,6 +1477,7 @@ def wrapper_extract_topics_per_column_value(
                 aws_secret_key_textbox=aws_secret_key_textbox,
                 hf_api_key_textbox=hf_api_key_textbox,
                 azure_api_key_textbox=azure_api_key_textbox,
                 max_tokens=max_tokens,
                 model_name_map=model_name_map,
                 max_time_for_loop=max_time_for_loop,
@@ -1736,6 +1739,7 @@ def all_in_one_pipeline(
     aws_secret_key_text: str,
     hf_api_key_text: str,
     azure_api_key_text: str,
     output_folder: str = OUTPUT_FOLDER,
     merge_sentiment: str = "No",
     merge_general_topics: str = "Yes",
@@ -1790,7 +1794,7 @@ def all_in_one_pipeline(
         aws_access_key_text (str): AWS access key.
         aws_secret_key_text (str): AWS secret key.
         hf_api_key_text (str): Hugging Face API key.
-        azure_api_key_text (str): Azure API key.
         output_folder (str, optional): Folder to save output files. Defaults to OUTPUT_FOLDER.
         merge_sentiment (str, optional): Whether to merge sentiment. Defaults to "No".
         merge_general_topics (str, optional): Whether to merge general topics. Defaults to "Yes".
@@ -1884,6 +1888,7 @@ def all_in_one_pipeline(
         aws_secret_key_textbox=aws_secret_key_text,
         hf_api_key_textbox=hf_api_key_text,
         azure_api_key_textbox=azure_api_key_text,
         output_folder=output_folder,
         existing_logged_content=existing_logged_content,
         model_name_map=model_name_map_state,

 from tools.prompts import initial_table_prompt, initial_table_system_prompt, add_existing_topics_system_prompt, add_existing_topics_prompt,  force_existing_topics_prompt, allow_new_topics_prompt, force_single_topic_prompt, add_existing_topics_assistant_prefill, initial_table_assistant_prefill, structured_summary_prompt, default_response_reference_format, negative_neutral_positive_sentiment_prompt, negative_or_positive_sentiment_prompt,  default_sentiment_prompt
 from tools.helper_functions import read_file, put_columns_in_df, wrap_text, initial_clean, load_in_data_file, load_in_file, create_topic_summary_df_from_reference_table, convert_reference_table_to_pivot_table, get_basic_response_data, clean_column_name, load_in_previous_data_files, create_batch_file_path_details, move_overall_summary_output_files_to_front_page, generate_zero_shot_topics_df
 from tools.llm_funcs import ResponseObject, construct_gemini_generative_model, call_llm_with_markdown_table_checks, create_missing_references_df, calculate_tokens_from_metadata, construct_azure_client, get_model, get_tokenizer, get_assistant_model
+from tools.config import RUN_LOCAL_MODEL, AWS_REGION, MAX_COMMENT_CHARS, MAX_OUTPUT_VALIDATION_ATTEMPTS, LLM_MAX_NEW_TOKENS, TIMEOUT_WAIT, NUMBER_OF_RETRY_ATTEMPTS, MAX_TIME_FOR_LOOP, BATCH_SIZE_DEFAULT, DEDUPLICATION_THRESHOLD, model_name_map, OUTPUT_FOLDER, CHOSEN_LOCAL_MODEL_TYPE, LOCAL_REPO_ID, LOCAL_MODEL_FILE, LOCAL_MODEL_FOLDER, LLM_SEED, MAX_GROUPS, REASONING_SUFFIX, AZURE_OPENAI_INFERENCE_ENDPOINT, MAX_ROWS, MAXIMUM_ZERO_SHOT_TOPICS, MAX_SPACES_GPU_RUN_TIME, OUTPUT_DEBUG_FILES
 from tools.aws_functions import connect_to_bedrock_runtime
 from tools.dedup_summaries import sample_reference_table_summaries, summarise_output_topics, deduplicate_topics, overall_summary, process_debug_output_iteration
 from tools.combine_sheets_into_xlsx import collect_output_csvs_and_create_excel_output
               aws_secret_key_textbox:str='',
               hf_api_key_textbox:str='',
               azure_api_key_textbox:str='',
+              azure_endpoint_textbox:str='',
               max_tokens:int=max_tokens,
               model_name_map:dict=model_name_map,
               existing_logged_content:list=list(),
               progress=Progress(track_tqdm=False)):
     '''
+    Query an LLM (local, (Gemma/GPT-OSS if local, Gemini, AWS Bedrock or Azure/OpenAI AI Inference) with up to three prompts about a table of open text data. Up to 'batch_size' rows will be queried at a time.
     Parameters:
     - in_data_file (gr.File): Gradio file object containing input data
     tic = time.perf_counter()
+    client = list()
+    client_config = {}
     final_time = 0.0
     whole_conversation_metadata = list()
     is_error = False
                     # Prepare clients before query
                     if "Gemini" in model_source:
                         #print("Using Gemini model:", model_choice)
+                        client, client_config = construct_gemini_generative_model(in_api_key=in_api_key, temperature=temperature, model_choice=model_choice, system_prompt=formatted_system_prompt, max_tokens=max_tokens)
+                    elif "Azure/OpenAI" in model_source:
+                        #print("Using Azure/OpenAI AI Inference model:", model_choice)
                         # If provided, set env for downstream calls too
                         if azure_api_key_textbox:
                             os.environ["AZURE_INFERENCE_CREDENTIAL"] = azure_api_key_textbox
+                        client, client_config = construct_azure_client(in_api_key=azure_api_key_textbox, endpoint=azure_endpoint_textbox)
                     elif "anthropic.claude" in model_choice:
                         #print("Using AWS Bedrock model:", model_choice)
                         pass
                     whole_conversation = list()
                     # Process requests to large language model
+                    responses, conversation_history, whole_conversation, whole_conversation_metadata, response_text = call_llm_with_markdown_table_checks(summary_prompt_list, formatted_system_prompt, conversation_history, whole_conversation, whole_conversation_metadata, client, client_config, model_choice, temperature, reported_batch_no, local_model, tokenizer, bedrock_runtime, model_source, MAX_OUTPUT_VALIDATION_ATTEMPTS, assistant_prefill=add_existing_topics_assistant_prefill,  master = True)
                     # Return output tables
                     topic_table_out_path, reference_table_out_path, topic_summary_df_out_path, new_topic_df, new_reference_df, new_topic_summary_df, master_batch_out_file_part, is_error = write_llm_output_and_logs(response_text, whole_conversation, whole_conversation_metadata, file_name, latest_batch_completed, start_row, end_row, model_choice_clean, temperature, log_files_output_paths, existing_reference_df, existing_topic_summary_df, batch_size, chosen_cols, batch_basic_response_df, model_name_map, group_name, produce_structured_summary_radio, first_run=False, output_folder=output_folder)
                     # Prepare Gemini models before query
                     if model_source == "Gemini":
                         print("Using Gemini model:", model_choice)
+                        client, client_config = construct_gemini_generative_model(in_api_key=in_api_key, temperature=temperature, model_choice=model_choice, system_prompt=formatted_system_prompt, max_tokens=max_tokens)
+                    elif model_source == "Azure/OpenAI":
+                        print("Using Azure/OpenAI AI Inference model:", model_choice)
                         if azure_api_key_textbox:
                             os.environ["AZURE_INFERENCE_CREDENTIAL"] = azure_api_key_textbox
+                        client, client_config = construct_azure_client(in_api_key=azure_api_key_textbox, endpoint=azure_endpoint_textbox)
                     elif model_choice == CHOSEN_LOCAL_MODEL_TYPE:
                         pass
                         #print("Using local model:", model_choice)
                     whole_conversation = list()
+                    responses, conversation_history, whole_conversation, whole_conversation_metadata, response_text = call_llm_with_markdown_table_checks(batch_prompts, formatted_system_prompt, conversation_history, whole_conversation, whole_conversation_metadata, client, client_config, model_choice, temperature, reported_batch_no, local_model, tokenizer,bedrock_runtime, model_source, MAX_OUTPUT_VALIDATION_ATTEMPTS, assistant_prefill=initial_table_assistant_prefill)
                     topic_table_out_path, reference_table_out_path, topic_summary_df_out_path, topic_table_df, reference_df, new_topic_summary_df, batch_file_path_details, is_error =  write_llm_output_and_logs(response_text, whole_conversation, whole_conversation_metadata, file_name, latest_batch_completed, start_row, end_row, model_choice_clean, temperature, log_files_output_paths, existing_reference_df, existing_topic_summary_df, batch_size, chosen_cols, batch_basic_response_df, model_name_map, group_name, produce_structured_summary_radio, first_run=True, output_folder=output_folder)
     aws_secret_key_textbox:str="",
     hf_api_key_textbox:str="",
     azure_api_key_textbox:str="",
+    azure_endpoint_textbox:str="",
     output_folder: str = OUTPUT_FOLDER,
     existing_logged_content:list=list(),
     additional_instructions_summary_format:str="",
     :param aws_access_key_textbox: AWS access key for Bedrock.
     :param aws_secret_key_textbox: AWS secret key for Bedrock.
     :param hf_api_key_textbox: Hugging Face API key for local models.
+    :param azure_api_key_textbox: Azure/OpenAI API key for Azure/OpenAI AI Inference.
     :param output_folder: The folder where output files will be saved.
     :param existing_logged_content: A list of existing logged content.
     :param force_single_topic_prompt: Prompt for forcing a single topic.
                 aws_secret_key_textbox=aws_secret_key_textbox,
                 hf_api_key_textbox=hf_api_key_textbox,
                 azure_api_key_textbox=azure_api_key_textbox,
+                azure_endpoint_textbox=azure_endpoint_textbox,
                 max_tokens=max_tokens,
                 model_name_map=model_name_map,
                 max_time_for_loop=max_time_for_loop,
     aws_secret_key_text: str,
     hf_api_key_text: str,
     azure_api_key_text: str,
+    azure_endpoint_text: str,
     output_folder: str = OUTPUT_FOLDER,
     merge_sentiment: str = "No",
     merge_general_topics: str = "Yes",
         aws_access_key_text (str): AWS access key.
         aws_secret_key_text (str): AWS secret key.
         hf_api_key_text (str): Hugging Face API key.
+        azure_api_key_text (str): Azure/OpenAI API key.
         output_folder (str, optional): Folder to save output files. Defaults to OUTPUT_FOLDER.
         merge_sentiment (str, optional): Whether to merge sentiment. Defaults to "No".
         merge_general_topics (str, optional): Whether to merge general topics. Defaults to "Yes".
         aws_secret_key_textbox=aws_secret_key_text,
         hf_api_key_textbox=hf_api_key_text,
         azure_api_key_textbox=azure_api_key_text,
+        azure_endpoint_textbox=azure_endpoint_text,
         output_folder=output_folder,
         existing_logged_content=existing_logged_content,
         model_name_map=model_name_map_state,

tools/llm_funcs.py CHANGED Viewed

@@ -10,10 +10,7 @@ from typing import List, Tuple, TypeVar
 from google import genai as ai
 from google.genai import types
 from gradio import Progress
-from azure.ai.inference import ChatCompletionsClient
-from azure.core.credentials import AzureKeyCredential
-from azure.ai.inference.models import SystemMessage, UserMessage
 model_type = None # global variable setup
 full_text = "" # Define dummy source text (full text) just to enable highlight function to load
@@ -674,27 +671,31 @@ def construct_gemini_generative_model(in_api_key: str, temperature: float, model
 def construct_azure_client(in_api_key: str, endpoint: str) -> Tuple[object, dict]:
     """
-    Constructs a ChatCompletionsClient for Azure AI Inference.
     """
     try:
         key = None
         if in_api_key:
             key = in_api_key
-        elif os.environ.get("AZURE_INFERENCE_CREDENTIAL"):
-            key = os.environ["AZURE_INFERENCE_CREDENTIAL"]
-        elif os.environ.get("AZURE_API_KEY"):
-            key = os.environ["AZURE_API_KEY"]
         if not key:
-            raise Warning("No Azure API key found.")
         if not endpoint:
-            endpoint = os.environ.get("AZURE_INFERENCE_ENDPOINT", "")
             if not endpoint:
-                raise Warning("No Azure inference endpoint found.")
-        client = ChatCompletionsClient(endpoint=endpoint, credential=AzureKeyCredential(key))
-        return client, {}
     except Exception as e:
-        print("Error constructing Azure ChatCompletions client:", e)
         raise
 def call_aws_claude(prompt: str, system_prompt: str, temperature: float, max_tokens: int, model_choice:str, bedrock_runtime:boto3.Session.client, assistant_prefill:str="") -> ResponseObject:
@@ -756,7 +757,15 @@ def call_aws_claude(prompt: str, system_prompt: str, temperature: float, max_tok
     )
     output_message = api_response['output']['message']
-    text = assistant_prefill + output_message['content'][0]['text']
     # The usage statistics are neatly provided in the 'usage' key.
     usage = api_response['usage']
@@ -803,9 +812,6 @@ def call_transformers_model(prompt: str, system_prompt: str, gen_config: LlamaCP
         {"role": "system", "content": system_prompt},
         {"role": "user", "content": prompt}
         ]
-    #print("Conversation:", conversation)
-    #import pprint
-    #pprint.pprint(conversation)
     # 2. Apply the chat template
     # This function formats the conversation into the exact string Gemma 3 expects.
@@ -820,9 +826,6 @@ def call_transformers_model(prompt: str, system_prompt: str, gen_config: LlamaCP
             ).to("cuda")
     except Exception as e:
         print("Error applying chat template:", e)
-        print("Conversation type:", type(conversation))
-        for turn in conversation:
-            print("Turn type:", type(turn), "Content type:", type(turn.get("content")))
         raise
     # Map LlamaCPP parameters to transformers parameters
@@ -850,7 +853,7 @@ def call_transformers_model(prompt: str, system_prompt: str, gen_config: LlamaCP
     # Use speculative decoding if assistant model is available
     if speculative_decoding and assistant_model is not None:
-        print("Using speculative decoding with assistant model")
         outputs = model.generate(
             input_ids,
             assistant_model=assistant_model,
@@ -858,7 +861,7 @@ def call_transformers_model(prompt: str, system_prompt: str, gen_config: LlamaCP
         streamer = streamer
         )
     else:
-        print("Generating without speculative decoding")
         outputs = model.generate(
             input_ids,
             **generation_kwargs,
@@ -868,11 +871,9 @@ def call_transformers_model(prompt: str, system_prompt: str, gen_config: LlamaCP
     end_time = time.time()
     # --- Decode and Display Results ---
-    #generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # To get only the model's reply, we can decode just the newly generated tokens
     new_tokens = outputs[0][input_ids.shape[-1]:]
     assistant_reply = tokenizer.decode(new_tokens, skip_special_tokens=True)
-    #print("Assistant reply:", assistant_reply)
     num_input_tokens = input_ids.shape[-1]  # This gets the sequence length (number of tokens)
     num_generated_tokens = len(new_tokens)
@@ -887,12 +888,32 @@ def call_transformers_model(prompt: str, system_prompt: str, gen_config: LlamaCP
     return assistant_reply, num_input_tokens, num_generated_tokens
 # Function to send a request and update history
-def send_request(prompt: str, conversation_history: List[dict], google_client: ai.Client, config: types.GenerateContentConfig, model_choice: str, system_prompt: str, temperature: float, bedrock_runtime:boto3.Session.client, model_source:str, local_model= list(), tokenizer=None, assistant_model=None, assistant_prefill = "", progress=Progress(track_tqdm=True)) -> Tuple[str, List[dict]]:
-    """
-    This function sends a request to a language model with the given prompt, conversation history, model configuration, model choice, system prompt, and temperature.
-    It constructs the full prompt by appending the new user prompt to the conversation history, generates a response from the model, and updates the conversation history with the new prompt and response.
-    If the model choice is specific to AWS Claude, it calls the `call_aws_claude` function; otherwise, it uses the `client.models.generate_content` method.
-    The function returns the response text and the updated conversation history.
     """
     # Constructing the full prompt from the conversation history
     full_prompt = "Conversation history:\n"
@@ -920,7 +941,7 @@ def send_request(prompt: str, conversation_history: List[dict], google_client: a
             try:
                 print("Calling Gemini model, attempt", i + 1)
-                response = google_client.models.generate_content(model=model_choice, contents=full_prompt, config=config)
                 #print("Successful call to Gemini model.")
                 break
@@ -948,18 +969,29 @@ def send_request(prompt: str, conversation_history: List[dict], google_client: a
             if i == number_of_api_retry_attempts:
                 return ResponseObject(text="", usage_metadata={'RequestId':"FAILED"}), conversation_history, response_text, num_transformer_input_tokens, num_transformer_generated_tokens
-    elif "Azure" in model_source:
         for i in progress_bar:
             try:
-                print("Calling Azure AI Inference model, attempt", i + 1)
-                # Use structured messages for Azure
-                response_raw = google_client.complete(
-                    messages=[
-                        SystemMessage(content=system_prompt),
-                        UserMessage(content=prompt),
-                    ],
-                    model=model_choice
                 )
                 response_text = response_raw.choices[0].message.content
                 usage = getattr(response_raw, "usage", None)
                 input_tokens = 0
@@ -973,7 +1005,7 @@ def send_request(prompt: str, conversation_history: List[dict], google_client: a
                 )
                 break
             except Exception as e:
-                print("Call to Azure model failed:", e, " Waiting for ", str(timeout_wait), "seconds and trying again.")
                 time.sleep(timeout_wait)
             if i == number_of_api_retry_attempts:
                 return ResponseObject(text="", usage_metadata={'RequestId':"FAILED"}), conversation_history, response_text, num_transformer_input_tokens, num_transformer_generated_tokens
@@ -993,7 +1025,6 @@ def send_request(prompt: str, conversation_history: List[dict], google_client: a
                     response, num_transformer_input_tokens, num_transformer_generated_tokens = call_transformers_model(prompt, system_prompt, gen_config, model=local_model, tokenizer=tokenizer, assistant_model=assistant_model)
                     response_text = response
-                #print("Successful call to local model.")
                 break
             except Exception as e:
                 # If fails, try again after X seconds in case there is a throttle limit
@@ -1035,7 +1066,7 @@ system_prompt: str,
 conversation_history: List[dict],
 whole_conversation: List[str],
 whole_conversation_metadata: List[str],
-google_client: ai.Client,
 config: types.GenerateContentConfig,
 model_choice: str,
 temperature: float,
@@ -1056,7 +1087,7 @@ assistant_prefill="") -> Tuple[List[ResponseObject], List[dict], List[str], List
         conversation_history (List[dict]): The history of the conversation.
         whole_conversation (List[str]): The complete conversation including prompts and responses.
         whole_conversation_metadata (List[str]): Metadata about the whole conversation.
-        google_client (object): The google_client to use for processing the prompts.
         config (dict): Configuration for the model.
         model_choice (str): The choice of model to use.
         temperature (float): The temperature parameter for the model.
@@ -1077,22 +1108,15 @@ assistant_prefill="") -> Tuple[List[ResponseObject], List[dict], List[str], List
     for prompt in prompts:
-        response, conversation_history, response_text, num_transformer_input_tokens, num_transformer_generated_tokens = send_request(prompt, conversation_history, google_client=google_client, config=config, model_choice=model_choice, system_prompt=system_prompt, temperature=temperature, local_model=local_model, tokenizer=tokenizer, assistant_model=assistant_model, assistant_prefill=assistant_prefill, bedrock_runtime=bedrock_runtime, model_source=model_source)
         responses.append(response)
         whole_conversation.append(system_prompt)
         whole_conversation.append(prompt)
         whole_conversation.append(response_text)
-        # Create conversation metadata
-        # if master == False:
-        #     whole_conversation_metadata.append(f"Batch {batch_no}:")
-        # else:
-        #     #whole_conversation_metadata.append(f"Query summary metadata:")
         whole_conversation_metadata.append(f"Batch {batch_no}:")
-        # if not isinstance(response, str):
         try:
             if "AWS" in model_source:
                 output_tokens = response.usage_metadata.get('outputTokens', 0)
@@ -1102,7 +1126,7 @@ assistant_prefill="") -> Tuple[List[ResponseObject], List[dict], List[str], List
                 output_tokens = response.usage_metadata.candidates_token_count
                 input_tokens = response.usage_metadata.prompt_token_count
-            elif "Azure" in model_source:
                 input_tokens = response.usage_metadata.get('inputTokens', 0)
                 output_tokens = response.usage_metadata.get('outputTokens', 0)
@@ -1123,9 +1147,6 @@ assistant_prefill="") -> Tuple[List[ResponseObject], List[dict], List[str], List
         except KeyError as e:
             print(f"Key error: {e} - Check the structure of response.usage_metadata")
-        # else:
-        #     print("Response is a string object.")
-        #     whole_conversation_metadata.append("Length prompt: " + str(len(prompt)) + ". Length response: " + str(len(response)))
     return responses, conversation_history, whole_conversation, whole_conversation_metadata, response_text
@@ -1134,8 +1155,8 @@ def call_llm_with_markdown_table_checks(batch_prompts: List[str],
                                         conversation_history: List[dict],
                                         whole_conversation: List[str],
                                         whole_conversation_metadata: List[str],
-                                        google_client: ai.Client,
-                                        google_config: types.GenerateContentConfig,
                                         model_choice: str,
                                         temperature: float,
                                         reported_batch_no: int,
@@ -1157,8 +1178,8 @@ def call_llm_with_markdown_table_checks(batch_prompts: List[str],
     - conversation_history (List[dict]): The history of the conversation.
     - whole_conversation (List[str]): The complete conversation including prompts and responses.
     - whole_conversation_metadata (List[str]): Metadata about the whole conversation.
-    - google_client (ai.Client): The Google client object for running Gemini API calls.
-    - google_config (types.GenerateContentConfig): Configuration for the model.
     - model_choice (str): The choice of model to use.
     - temperature (float): The temperature parameter for the model.
     - reported_batch_no (int): The reported batch number.
@@ -1179,13 +1200,13 @@ def call_llm_with_markdown_table_checks(batch_prompts: List[str],
     call_temperature = temperature  # This is correct now with the fixed parameter name
     # Update Gemini config with the new temperature settings
-    google_config = types.GenerateContentConfig(temperature=call_temperature, max_output_tokens=max_tokens, seed=random_seed)
     for attempt in range(MAX_OUTPUT_VALIDATION_ATTEMPTS):
         # Process requests to large language model
         responses, conversation_history, whole_conversation, whole_conversation_metadata, response_text = process_requests(
             batch_prompts, system_prompt, conversation_history, whole_conversation,
-            whole_conversation_metadata, google_client, google_config, model_choice,
             call_temperature, bedrock_runtime, model_source, reported_batch_no, local_model, tokenizer=tokenizer, master=master, assistant_prefill=assistant_prefill
         )

 from google import genai as ai
 from google.genai import types
 from gradio import Progress
+from openai import OpenAI
 model_type = None # global variable setup
 full_text = "" # Define dummy source text (full text) just to enable highlight function to load
 def construct_azure_client(in_api_key: str, endpoint: str) -> Tuple[object, dict]:
     """
+    Constructs an OpenAI client for Azure/OpenAI AI Inference.
     """
     try:
         key = None
         if in_api_key:
             key = in_api_key
+        elif os.environ.get("AZURE_OPENAI_API_KEY"):
+            key = os.environ["AZURE_OPENAI_API_KEY"]
         if not key:
+            raise Warning("No Azure/OpenAI API key found.")
         if not endpoint:
+            endpoint = os.environ.get("AZURE_OPENAI_INFERENCE_ENDPOINT", "")
             if not endpoint:
+                raise Warning("No Azure/OpenAI inference endpoint found.")
+        # Use the provided endpoint instead of hardcoded value
+        client = OpenAI(
+        api_key=key,
+        base_url=f"{endpoint}",
+        )
+        return client, dict()
     except Exception as e:
+        print("Error constructing Azure/OpenAI client:", e)
         raise
 def call_aws_claude(prompt: str, system_prompt: str, temperature: float, max_tokens: int, model_choice:str, bedrock_runtime:boto3.Session.client, assistant_prefill:str="") -> ResponseObject:
     )
     output_message = api_response['output']['message']
+    if 'reasoningContent' in output_message['content'][0]:
+        # Extract the reasoning text
+        reasoning_text = output_message['content'][0]['reasoningContent']['reasoningText']['text']
+        # Extract the output text
+        text = assistant_prefill + output_message['content'][1]['text']
+    else:
+        text = assistant_prefill + output_message['content'][0]['text']
     # The usage statistics are neatly provided in the 'usage' key.
     usage = api_response['usage']
         {"role": "system", "content": system_prompt},
         {"role": "user", "content": prompt}
         ]
     # 2. Apply the chat template
     # This function formats the conversation into the exact string Gemma 3 expects.
             ).to("cuda")
     except Exception as e:
         print("Error applying chat template:", e)
         raise
     # Map LlamaCPP parameters to transformers parameters
     # Use speculative decoding if assistant model is available
     if speculative_decoding and assistant_model is not None:
+        #print("Using speculative decoding with assistant model")
         outputs = model.generate(
             input_ids,
             assistant_model=assistant_model,
         streamer = streamer
         )
     else:
+        #print("Generating without speculative decoding")
         outputs = model.generate(
             input_ids,
             **generation_kwargs,
     end_time = time.time()
     # --- Decode and Display Results ---
     new_tokens = outputs[0][input_ids.shape[-1]:]
     assistant_reply = tokenizer.decode(new_tokens, skip_special_tokens=True)
     num_input_tokens = input_ids.shape[-1]  # This gets the sequence length (number of tokens)
     num_generated_tokens = len(new_tokens)
     return assistant_reply, num_input_tokens, num_generated_tokens
 # Function to send a request and update history
+def send_request(prompt: str, conversation_history: List[dict], client: ai.Client | OpenAI, config: types.GenerateContentConfig, model_choice: str, system_prompt: str, temperature: float, bedrock_runtime:boto3.Session.client, model_source:str, local_model= list(), tokenizer=None, assistant_model=None, assistant_prefill = "", progress=Progress(track_tqdm=True)) -> Tuple[str, List[dict]]:
+    """Sends a request to a language model and manages the conversation history.
+    This function constructs the full prompt by appending the new user prompt to the conversation history,
+    generates a response from the model, and updates the conversation history with the new prompt and response.
+    It handles different model sources (Gemini, AWS, Local) and includes retry logic for API calls.
+    Args:
+        prompt (str): The user's input prompt to be sent to the model.
+        conversation_history (List[dict]): A list of dictionaries representing the ongoing conversation.
+                                           Each dictionary should have 'role' and 'parts' keys.
+        client (ai.Client): The API client object for the chosen model (e.g., Gemini `ai.Client`, or Azure/OpenAI `OpenAI`).
+        config (types.GenerateContentConfig): Configuration settings for content generation (e.g., Gemini `types.GenerateContentConfig`).
+        model_choice (str): The specific model identifier to use (e.g., "gemini-pro", "claude-v2").
+        system_prompt (str): An optional system-level instruction or context for the model.
+        temperature (float): Controls the randomness of the model's output, with higher values leading to more diverse responses.
+        bedrock_runtime (boto3.Session.client): The boto3 Bedrock runtime client object for AWS models.
+        model_source (str): Indicates the source/provider of the model (e.g., "Gemini", "AWS", "Local").
+        local_model (list, optional): A list containing the local model and its tokenizer (if `model_source` is "Local"). Defaults to [].
+        tokenizer (object, optional): The tokenizer object for local models. Defaults to None.
+        assistant_model (object, optional): An optional assistant model used for speculative decoding with local models. Defaults to None.
+        assistant_prefill (str, optional): A string to pre-fill the assistant's response, useful for certain models like Claude. Defaults to "".
+        progress (Progress, optional): A progress object for tracking the operation, typically from `tqdm`. Defaults to Progress(track_tqdm=True).
+    Returns:
+        Tuple[str, List[dict]]: A tuple containing the model's response text and the updated conversation history.
     """
     # Constructing the full prompt from the conversation history
     full_prompt = "Conversation history:\n"
             try:
                 print("Calling Gemini model, attempt", i + 1)
+                response = client.models.generate_content(model=model_choice, contents=full_prompt, config=config)
                 #print("Successful call to Gemini model.")
                 break
             if i == number_of_api_retry_attempts:
                 return ResponseObject(text="", usage_metadata={'RequestId':"FAILED"}), conversation_history, response_text, num_transformer_input_tokens, num_transformer_generated_tokens
+    elif "Azure/OpenAI" in model_source:
         for i in progress_bar:
             try:
+                print("Calling Azure/OpenAI inference model, attempt", i + 1)
+                messages=[
+                            {
+                                "role": "system",
+                                "content": system_prompt,
+                            },
+                            {
+                                "role": "user",
+                                "content": prompt,
+                            },
+                        ]
+                response_raw = client.chat.completions.create(
+                messages=messages,
+                model=model_choice,
+                temperature=temperature,
+                max_completion_tokens=max_tokens
                 )
                 response_text = response_raw.choices[0].message.content
                 usage = getattr(response_raw, "usage", None)
                 input_tokens = 0
                 )
                 break
             except Exception as e:
+                print("Call to Azure/OpenAI model failed:", e, " Waiting for ", str(timeout_wait), "seconds and trying again.")
                 time.sleep(timeout_wait)
             if i == number_of_api_retry_attempts:
                 return ResponseObject(text="", usage_metadata={'RequestId':"FAILED"}), conversation_history, response_text, num_transformer_input_tokens, num_transformer_generated_tokens
                     response, num_transformer_input_tokens, num_transformer_generated_tokens = call_transformers_model(prompt, system_prompt, gen_config, model=local_model, tokenizer=tokenizer, assistant_model=assistant_model)
                     response_text = response
                 break
             except Exception as e:
                 # If fails, try again after X seconds in case there is a throttle limit
 conversation_history: List[dict],
 whole_conversation: List[str],
 whole_conversation_metadata: List[str],
+client: ai.Client | OpenAI,
 config: types.GenerateContentConfig,
 model_choice: str,
 temperature: float,
         conversation_history (List[dict]): The history of the conversation.
         whole_conversation (List[str]): The complete conversation including prompts and responses.
         whole_conversation_metadata (List[str]): Metadata about the whole conversation.
+        client (object): The client to use for processing the prompts, from either Gemini or OpenAI client.
         config (dict): Configuration for the model.
         model_choice (str): The choice of model to use.
         temperature (float): The temperature parameter for the model.
     for prompt in prompts:
+        response, conversation_history, response_text, num_transformer_input_tokens, num_transformer_generated_tokens = send_request(prompt, conversation_history, client=client, config=config, model_choice=model_choice, system_prompt=system_prompt, temperature=temperature, local_model=local_model, tokenizer=tokenizer, assistant_model=assistant_model, assistant_prefill=assistant_prefill, bedrock_runtime=bedrock_runtime, model_source=model_source)
         responses.append(response)
         whole_conversation.append(system_prompt)
         whole_conversation.append(prompt)
         whole_conversation.append(response_text)
         whole_conversation_metadata.append(f"Batch {batch_no}:")
         try:
             if "AWS" in model_source:
                 output_tokens = response.usage_metadata.get('outputTokens', 0)
                 output_tokens = response.usage_metadata.candidates_token_count
                 input_tokens = response.usage_metadata.prompt_token_count
+            elif "Azure/OpenAI" in model_source:
                 input_tokens = response.usage_metadata.get('inputTokens', 0)
                 output_tokens = response.usage_metadata.get('outputTokens', 0)
         except KeyError as e:
             print(f"Key error: {e} - Check the structure of response.usage_metadata")
     return responses, conversation_history, whole_conversation, whole_conversation_metadata, response_text
                                         conversation_history: List[dict],
                                         whole_conversation: List[str],
                                         whole_conversation_metadata: List[str],
+                                        client: ai.Client | OpenAI,
+                                        client_config: types.GenerateContentConfig,
                                         model_choice: str,
                                         temperature: float,
                                         reported_batch_no: int,
     - conversation_history (List[dict]): The history of the conversation.
     - whole_conversation (List[str]): The complete conversation including prompts and responses.
     - whole_conversation_metadata (List[str]): Metadata about the whole conversation.
+    - client (ai.Client | OpenAI): The client object for running Gemini or Azure/OpenAI API calls.
+    - client_config (types.GenerateContentConfig): Configuration for the model.
     - model_choice (str): The choice of model to use.
     - temperature (float): The temperature parameter for the model.
     - reported_batch_no (int): The reported batch number.
     call_temperature = temperature  # This is correct now with the fixed parameter name
     # Update Gemini config with the new temperature settings
+    client_config = types.GenerateContentConfig(temperature=call_temperature, max_output_tokens=max_tokens, seed=random_seed)
     for attempt in range(MAX_OUTPUT_VALIDATION_ATTEMPTS):
         # Process requests to large language model
         responses, conversation_history, whole_conversation, whole_conversation_metadata, response_text = process_requests(
             batch_prompts, system_prompt, conversation_history, whole_conversation,
+            whole_conversation_metadata, client, client_config, model_choice,
             call_temperature, bedrock_runtime, model_source, reported_batch_no, local_model, tokenizer=tokenizer, master=master, assistant_prefill=assistant_prefill
         )