""" utils.py """ # Standard imports import os from typing import List # Third party imports import numpy as np from openai import OpenAI client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) # Maximum tokens for text-embedding-3-large MAX_TOKENS = 8191 # We don't have access to the tokenizer for text-embedding-3-large, and just assume 1 character = 1 token here def get_embeddings( texts: List[str], model: str = "text-embedding-3-large" ) -> List[List[float]]: """ Generate embeddings for a list of texts using OpenAI API synchronously. Args: texts: List of strings to embed. model: OpenAI embedding model to use (default: text-embedding-3-large). Returns: A list of embeddings (each embedding is a list of floats). Raises: Exception: If the OpenAI API call fails. """ # Truncate texts to max token limit truncated_texts = [text[:MAX_TOKENS] for text in texts] # Make the API call response = client.embeddings.create(input=truncated_texts, model=model) # Extract embeddings from response embeddings = np.array([data.embedding for data in response.data]) return embeddings