Omar Leonardo Sanchez Granados
commited on
Commit
·
57bca82
1
Parent(s):
4f95287
Fix/bedrock issues (#2718)
Browse files### What problem does this PR solve?
Adding a Bedrock API key for Claude Sonnet was broken. I find the issue
came up when trying to test the LLM configuration, the system is a
required parameter in boto3.
As well, there were problems in Bedrock implementation for embeddings
when trying to encode queries.
### Type of change
- [X] Bug Fix (non-breaking change which fixes an issue)
- rag/llm/chat_model.py +3 -2
- rag/llm/embedding_model.py +1 -1
rag/llm/chat_model.py
CHANGED
|
@@ -630,7 +630,7 @@ class BedrockChat(Base):
|
|
| 630 |
modelId=self.model_name,
|
| 631 |
messages=history,
|
| 632 |
inferenceConfig=gen_conf,
|
| 633 |
-
system=[{"text": system
|
| 634 |
)
|
| 635 |
|
| 636 |
# Extract and print the response text.
|
|
@@ -675,7 +675,8 @@ class BedrockChat(Base):
|
|
| 675 |
streaming_response = self.client.converse_stream(
|
| 676 |
modelId=self.model_name,
|
| 677 |
messages=history,
|
| 678 |
-
inferenceConfig=gen_conf
|
|
|
|
| 679 |
)
|
| 680 |
|
| 681 |
# Extract and print the streamed response text in real-time.
|
|
|
|
| 630 |
modelId=self.model_name,
|
| 631 |
messages=history,
|
| 632 |
inferenceConfig=gen_conf,
|
| 633 |
+
system=[{"text": (system if system else "Answer the user's message.")}] ,
|
| 634 |
)
|
| 635 |
|
| 636 |
# Extract and print the response text.
|
|
|
|
| 675 |
streaming_response = self.client.converse_stream(
|
| 676 |
modelId=self.model_name,
|
| 677 |
messages=history,
|
| 678 |
+
inferenceConfig=gen_conf,
|
| 679 |
+
system=[{"text": system if system else ""}],
|
| 680 |
)
|
| 681 |
|
| 682 |
# Extract and print the streamed response text in real-time.
|
rag/llm/embedding_model.py
CHANGED
|
@@ -443,7 +443,7 @@ class BedrockEmbed(Base):
|
|
| 443 |
|
| 444 |
response = self.client.invoke_model(modelId=self.model_name, body=json.dumps(body))
|
| 445 |
model_response = json.loads(response["body"].read())
|
| 446 |
-
embeddings.extend(
|
| 447 |
|
| 448 |
return np.array(embeddings), token_count
|
| 449 |
|
|
|
|
| 443 |
|
| 444 |
response = self.client.invoke_model(modelId=self.model_name, body=json.dumps(body))
|
| 445 |
model_response = json.loads(response["body"].read())
|
| 446 |
+
embeddings.extend(model_response["embedding"])
|
| 447 |
|
| 448 |
return np.array(embeddings), token_count
|
| 449 |
|