fix OpenAI Embedding length error (#1972)
Browse files### What problem does this PR solve?
#1958 fix OpenAI Embedding length error
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
---------
Co-authored-by: Zhedong Cen <[email protected]>
rag/llm/embedding_model.py
CHANGED
|
@@ -99,14 +99,14 @@ class OpenAIEmbed(Base):
|
|
| 99 |
self.model_name = model_name
|
| 100 |
|
| 101 |
def encode(self, texts: list, batch_size=32):
|
| 102 |
-
texts = [truncate(t,
|
| 103 |
res = self.client.embeddings.create(input=texts,
|
| 104 |
model=self.model_name)
|
| 105 |
return np.array([d.embedding for d in res.data]
|
| 106 |
), res.usage.total_tokens
|
| 107 |
|
| 108 |
def encode_queries(self, text):
|
| 109 |
-
res = self.client.embeddings.create(input=[truncate(text,
|
| 110 |
model=self.model_name)
|
| 111 |
return np.array(res.data[0].embedding), res.usage.total_tokens
|
| 112 |
|
|
|
|
| 99 |
self.model_name = model_name
|
| 100 |
|
| 101 |
def encode(self, texts: list, batch_size=32):
|
| 102 |
+
texts = [truncate(t, 8191) for t in texts]
|
| 103 |
res = self.client.embeddings.create(input=texts,
|
| 104 |
model=self.model_name)
|
| 105 |
return np.array([d.embedding for d in res.data]
|
| 106 |
), res.usage.total_tokens
|
| 107 |
|
| 108 |
def encode_queries(self, text):
|
| 109 |
+
res = self.client.embeddings.create(input=[truncate(text, 8191)],
|
| 110 |
model=self.model_name)
|
| 111 |
return np.array(res.data[0].embedding), res.usage.total_tokens
|
| 112 |
|