|
""" |
|
utils.py |
|
""" |
|
|
|
|
|
import os |
|
from typing import List |
|
|
|
|
|
import numpy as np |
|
from openai import OpenAI |
|
|
|
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) |
|
|
|
|
|
MAX_TOKENS = 8191 |
|
|
|
|
|
def get_embeddings( |
|
texts: List[str], model: str = "text-embedding-3-large" |
|
) -> List[List[float]]: |
|
""" |
|
Generate embeddings for a list of texts using OpenAI API synchronously. |
|
|
|
Args: |
|
texts: List of strings to embed. |
|
model: OpenAI embedding model to use (default: text-embedding-3-large). |
|
|
|
Returns: |
|
A list of embeddings (each embedding is a list of floats). |
|
|
|
Raises: |
|
Exception: If the OpenAI API call fails. |
|
""" |
|
|
|
|
|
truncated_texts = [text[:MAX_TOKENS] for text in texts] |
|
|
|
|
|
response = client.embeddings.create(input=truncated_texts, model=model) |
|
|
|
|
|
embeddings = np.array([data.embedding for data in response.data]) |
|
return embeddings |
|
|