import os import pandas as pd from openai import OpenAI import streamlit as st from dotenv import load_dotenv import re load_dotenv() # take environment variables from .env. api_key = os.getenv('OPENAI_API_KEY') #print(api_key) # Create a filtered dataframe with the most 20 ballanced intents def get_ballanced_intents(data): intents = data['intent'].value_counts() intents = intents[intents <= 40].index.tolist() filtered_data = data[data['intent'].isin(intents)] print(f"Filtered data shape: {filtered_data.shape}") return filtered_data # Create a prompt for the OpenAI Chat model def create_prompt(user_text, utterances): prompt = f"User text: {user_text}\n\nUtterance examples:\n" for i, utterance in enumerate(utterances): prompt += f"{i + 1}. {utterance}\n" prompt += "\nPlease rate the similarity of the user text to each of the utterance examples on a scale from 0 to 1." return prompt # Get the similarity scores for the user text and the utterance examples def get_similarity_scores(prompt, temperature): load_dotenv() # take environment variables from .env. api_key = os.getenv('OPENAI_API_KEY') #print(api_key) if not api_key: st.write("Please set the OPENAI_API_KEY environment variable.") return #client = OpenAI(api_key=api_key) client = OpenAI(api_key='sk-proj-SNnm3Z9t6BvJgQ3ztjWLT3BlbkFJ1037ZT68ltN47zcir44l') response = client.chat.completions.create( model="gpt-4o", messages=[ {"role": "system", "content": "You are a helpful assistant. Respond without details in ()"}, {"role": "user", "content": prompt} ], max_tokens=150, n=1, stop='8.', temperature=temperature, ) similarities = response.choices[0].message.content.strip().split('\n') #print(similarities) for sim in similarities: #print(sim) print(sim.split('- ')[-1]) similarity_scores = [sim.split('- ')[-1] for sim in similarities] similarity_scores = [sim if re.match(r'^0\.[1-9]$|^1\.0$', sim) else '0' for sim in similarity_scores] # Filter out the similarity scores return similarity_scores # Get the most similar intent and its confidence score def get_most_similar_intent(user_text, utterances, intents, temperature=0.5): prompt = create_prompt(user_text, utterances) similarity_scores = get_similarity_scores(prompt, temperature) print(similarity_scores) max_index = similarity_scores.index(max(similarity_scores)) print(f'max_index: {max_index}') most_similar_intent = intents[max_index] return most_similar_intent, similarity_scores[max_index]