|
import os |
|
from openai import OpenAI |
|
import pandas as pd |
|
import numpy as np |
|
import faiss |
|
import streamlit as st |
|
from dotenv import load_dotenv |
|
from pathlib import Path |
|
from src.E_openAI_embeddings import calculate_openai_similarity, load_openai_embeddings |
|
|
|
|
|
st.title("Sentence Similarity Finder") |
|
|
|
|
|
if 'embeddings' not in st.session_state: |
|
st.session_state.embeddings = None |
|
|
|
if 'api_key' not in st.session_state: |
|
st.session_state.api_key = None |
|
|
|
try: |
|
api_key = os.getenv('OPENAI_API_KEY') |
|
|
|
st.session_state.api_key = api_key |
|
|
|
if not api_key: |
|
st.error("OpenAI API key not found. Please set it in the .env file.") |
|
else: |
|
st.success("OpenAI API key loaded successfully.") |
|
api_key = st.session_state.api_key |
|
openai = OpenAI() |
|
openai.api_key = api_key |
|
except Exception as e: |
|
st.error(f"An error occurred while loading the OpenAI API: {e}") |
|
|
|
|
|
|
|
if st.button("Get OpenAI embeddings"): |
|
embeddings_path = Path("embeddings") / "openai_embeddings.csv" |
|
if not embeddings_path.exists(): |
|
st.error(f"Embeddings file not found at {embeddings_path}.") |
|
else: |
|
|
|
embeddings = load_openai_embeddings(str(embeddings_path)) |
|
st.session_state.embeddings = embeddings |
|
st.success(f"Embeddings loaded from {embeddings_path}.") |
|
|
|
|
|
user_input = st.text_input("Enter a sentence:") |
|
|
|
|
|
|
|
similarity_button = st.button("Calculate similarity") |
|
if similarity_button: |
|
if st.session_state.embeddings is None: |
|
st.warning("Please load embeddings by clicking the 'Get OpenAI embeddings' button.") |
|
elif not user_input.strip(): |
|
st.warning("Please enter a valid sentence for similarity calculation.") |
|
else: |
|
top_similar_sentences = get_openai_similarity(user_input, st.session_state.embeddings, top_n=5) |
|
st.write(f'top_similar_sentences: {top_similar_sentences[:1]}') |
|
if top_similar_sentences: |
|
st.write("**Top 5 similar sentences:**") |
|
for i, (sentence, score) in enumerate(top_similar_sentences, 1): |
|
st.write(f"{i}. **Sentence:** {sentence}") |
|
st.write(f" **Similarity score:** {score:.4f}") |
|
else: |
|
st.info("No similar sentences found.") |
|
|
|
stop() |
|
|
|
if st.button("Calculate similarity"): |
|
try: |
|
if st.session_state.embeddings is None: |
|
st.warning("Please load embeddings by clicking the 'Get OpenAI embeddings' button.") |
|
elif not user_input.strip(): |
|
st.warning("Please enter a valid sentence for similarity calculation.") |
|
else: |
|
top_similar_sentences = calculate_openai_similarity(user_input, st.session_state.embeddings, top_n=5) |
|
st.write(f'top_similar_sentences: {top_similar_sentences[:1]}') |
|
if top_similar_sentences: |
|
st.write("**Top 5 similar sentences:**") |
|
for i, (sentence, score) in enumerate(top_similar_sentences, 1): |
|
st.write(f"{i}. **Sentence:** {sentence}") |
|
st.write(f" **Similarity score:** {score:.4f}") |
|
else: |
|
st.info("No similar sentences found.") |
|
except Exception as e: |
|
print(f"An error occurred during similarity calculation: {e}") |
|
st.error(f"An error occurred during similarity calculation: {e}") |