|
|
import os |
|
|
import json |
|
|
|
|
|
SYSTEM_PROMPT = """From this delivery note document, extract the following information by following these instructions. |
|
|
The information is in French. Return the information in JSON format according to the schema below without changing the items name. |
|
|
The details of each piece of information to be extracted are found in the description field of each item. |
|
|
|
|
|
{{schema}} |
|
|
""" |
|
|
|
|
|
SCHEMA_PATH = os.path.join(os.path.dirname(__file__), "schema.json") |
|
|
with open(SCHEMA_PATH, "r", encoding="utf-8") as f: |
|
|
schema_content = f.read() |
|
|
|
|
|
SYSTEM_PROMPT = SYSTEM_PROMPT.replace("{{schema}}", schema_content) |
|
|
|
|
|
def extract_from_bl (client, document_source): |
|
|
model = "mistral-medium-2508" |
|
|
|
|
|
messages = [ |
|
|
{ |
|
|
"role": "system", |
|
|
"content": SYSTEM_PROMPT, |
|
|
}, |
|
|
{ |
|
|
"role": "user", |
|
|
"content": [ |
|
|
{ |
|
|
"type": "text", |
|
|
"text": "Extract data from this document" |
|
|
}, |
|
|
document_source |
|
|
] |
|
|
} |
|
|
] |
|
|
|
|
|
chat_response = client.chat.complete( |
|
|
model=model, |
|
|
messages=messages, |
|
|
response_format = { |
|
|
"type": "json_object" |
|
|
}, |
|
|
temperature=0.0, |
|
|
) |
|
|
|
|
|
res = json.loads(chat_response.choices[0].message.content) |
|
|
|
|
|
if type(res) is dict: |
|
|
if 'items' in res and type(res['items']) is list: |
|
|
return res['items'] |
|
|
raise Exception(f'LLM return a dict instead of a list, and it does not contain an items key:\n{res}') |
|
|
|
|
|
return res |