gabrielchua's picture
update repo
27a346a unverified
"""
utils.py
"""
# Standard imports
import os
from typing import List
# Third party imports
import numpy as np
from openai import OpenAI
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
# Maximum tokens for text-embedding-3-large
MAX_TOKENS = 8191 # We don't have access to the tokenizer for text-embedding-3-large, and just assume 1 character = 1 token here
def get_embeddings(
texts: List[str], model: str = "text-embedding-3-large"
) -> List[List[float]]:
"""
Generate embeddings for a list of texts using OpenAI API synchronously.
Args:
texts: List of strings to embed.
model: OpenAI embedding model to use (default: text-embedding-3-large).
Returns:
A list of embeddings (each embedding is a list of floats).
Raises:
Exception: If the OpenAI API call fails.
"""
# Truncate texts to max token limit
truncated_texts = [text[:MAX_TOKENS] for text in texts]
# Make the API call
response = client.embeddings.create(input=truncated_texts, model=model)
# Extract embeddings from response
embeddings = np.array([data.embedding for data in response.data])
return embeddings