|
|
import altair as alt |
|
|
import numpy as np |
|
|
import pandas as pd |
|
|
import streamlit as st |
|
|
from dotenv import load_dotenv |
|
|
import os |
|
|
from mistralai import Mistral |
|
|
import json |
|
|
|
|
|
from util import display_pdf, upload_pdf |
|
|
from bon_livraison import extract_from_bl |
|
|
from analyse_fourrage import extract_from_af |
|
|
from document_classifier import classify_document |
|
|
from carousel import carousel |
|
|
import io |
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
MISTRAL_API_KEY = os.environ.get("MISTRAL_API_KEY") |
|
|
|
|
|
def main(): |
|
|
""" |
|
|
Main function to run the Streamlit app. |
|
|
""" |
|
|
|
|
|
st.set_page_config(page_icon="🐮", page_title="BOVALIM") |
|
|
|
|
|
|
|
|
if not MISTRAL_API_KEY: |
|
|
api_key = st.sidebar.text_input("Mistral API Key", type="password") |
|
|
else: |
|
|
api_key = MISTRAL_API_KEY |
|
|
|
|
|
if not api_key: |
|
|
st.warning("Enter API key to continue") |
|
|
return |
|
|
|
|
|
|
|
|
col1, col2, col3 = st.columns([1,1,1]) |
|
|
with col1: |
|
|
st.image("img/logo_cniel.jpg", width=50) |
|
|
with col2: |
|
|
st.image("img/logo_bovalim.png", width=98) |
|
|
with col3: |
|
|
st.image("https://upload.wikimedia.org/wikipedia/commons/thumb/6/6a/GS1_France_Large_RGB_2014-12-17.png/1200px-GS1_France_Large_RGB_2014-12-17.png", width=84) |
|
|
|
|
|
st.markdown("<center><b style=\"font-size: 1.2em;\">Outil pour standardiser les données d'alimentation des bovins</b></center>", unsafe_allow_html=True) |
|
|
|
|
|
tab1, tab2 = st.tabs(["Je veux charger un document", "Je veux charger plusieurs documents"]) |
|
|
|
|
|
with tab1: |
|
|
|
|
|
client = Mistral(api_key=api_key) |
|
|
|
|
|
uploaded_file = st.file_uploader("Je charge mon document PDF", type=["pdf"]) |
|
|
|
|
|
document_source = None |
|
|
|
|
|
if uploaded_file: |
|
|
content = uploaded_file.read() |
|
|
|
|
|
|
|
|
display_pdf(content) |
|
|
|
|
|
|
|
|
document_source = { |
|
|
"type": "document_url", |
|
|
"document_url": upload_pdf(client, content, uploaded_file.name) |
|
|
} |
|
|
|
|
|
if document_source and st.button("J'obtiens mes données sous format JSON"): |
|
|
|
|
|
with st.spinner("Extracting JSON content..."): |
|
|
try: |
|
|
doc_type = classify_document(client, document_source) |
|
|
st.write(f"Document type: {doc_type}") |
|
|
|
|
|
if doc_type == "livraison": |
|
|
response = extract_from_bl(client, document_source) |
|
|
st.json(response) |
|
|
elif doc_type == "analyse": |
|
|
response = extract_from_af(client, document_source) |
|
|
st.json(response) |
|
|
except Exception as e: |
|
|
|
|
|
st.error(f"Processing error: {str(e)}") |
|
|
|
|
|
with tab2: |
|
|
uploaded_files = st.file_uploader("Je charge mes documents PDF", type=["pdf"], accept_multiple_files=True) |
|
|
|
|
|
if len(uploaded_files) > 0 and st.button("J'obtiens mes données sous format Excel"): |
|
|
progress_bar = st.progress(0, text="Extraction en cours...") |
|
|
livraisons = [] |
|
|
analyses = [] |
|
|
|
|
|
for i, file in enumerate(uploaded_files): |
|
|
content = file.read() |
|
|
|
|
|
document_source = { |
|
|
"type": "document_url", |
|
|
"document_url": upload_pdf(client, content, file.name) |
|
|
} |
|
|
|
|
|
try: |
|
|
doc_type = classify_document(client, document_source) |
|
|
|
|
|
if doc_type == "livraison": |
|
|
response = extract_from_bl(client, document_source) |
|
|
livraisons.append(response) |
|
|
elif doc_type == "analyse": |
|
|
response = extract_from_af(client, document_source) |
|
|
analyses.append(response) |
|
|
else: |
|
|
st.error("Le document n'est pas supporté.") |
|
|
|
|
|
progress = (i + 1) / len(uploaded_files) |
|
|
progress_bar.progress( |
|
|
progress, |
|
|
text='Extraction terminée' if progress >= 1.0 else f'{i + 1}/{len(uploaded_files)} fichiers traités' |
|
|
) |
|
|
except Exception as e: |
|
|
|
|
|
st.error(f"Processing error: {str(e)}") |
|
|
|
|
|
st.success(f'{len(livraisons)} bons de livraison extraits at {len(analyses)} analyses fourrages extraites.') |
|
|
|
|
|
tab_livraison, tab_analyse = st.tabs(["Bons de livraison", "Analyses fourrages"]) |
|
|
|
|
|
with tab_livraison: |
|
|
if len(livraisons) > 0: |
|
|
parsed_flat = [] |
|
|
|
|
|
SCHEMA_PATH = os.path.join(os.path.dirname(__file__), "schema.json") |
|
|
with open(SCHEMA_PATH, "r", encoding="utf-8") as f: |
|
|
schema_content = f.read() |
|
|
schema = json.loads(schema_content) |
|
|
|
|
|
columns = [key for key in schema['items']['properties']] |
|
|
|
|
|
with st.expander('json'): |
|
|
st.json(livraisons) |
|
|
|
|
|
for livraison in livraisons: |
|
|
for product in livraison: |
|
|
flatten_product = [] |
|
|
for column in columns: |
|
|
if column=="additifs": |
|
|
result = "" |
|
|
for item in product[column]: |
|
|
nom = item.get("nom", "Inconnu") |
|
|
identifiant = item.get("identifiant", "N/A") |
|
|
quantite = item.get("quantite", "N/A") |
|
|
|
|
|
groupe = item.get("groupe_fonctionnel", "").split(",")[0].capitalize() |
|
|
|
|
|
formatted = f"{nom} ({groupe}) id:{identifiant} {quantite}\n" |
|
|
result += formatted |
|
|
flatten_product.append(result) |
|
|
elif column=="matières premières" or column=="valeur nutritionelle" or column=="animaux": |
|
|
result = "" |
|
|
for item in product[column]: |
|
|
result += item+"\n" |
|
|
flatten_product.append(result) |
|
|
else: |
|
|
flatten_product.append(str(product[column]) if column in product else None) |
|
|
parsed_flat.append(flatten_product) |
|
|
|
|
|
|
|
|
df_livraisons = pd.DataFrame(parsed_flat, columns=columns) |
|
|
|
|
|
|
|
|
df_livraisons = df_livraisons.apply( |
|
|
lambda col: col.round(2) if pd.api.types.is_numeric_dtype(col) else col |
|
|
) |
|
|
|
|
|
|
|
|
new_fields = [ |
|
|
"Code d'identification de l'aliment", |
|
|
"Numéro d'agrément ou d'enregistrement du fournisseur de MP (si disponible)", |
|
|
"Numéro d'agrément ou d'enregistrement du fabricant d'aliment", |
|
|
"Numéro de lot", |
|
|
"Date de durabilité minimale", |
|
|
"La teneur en eau", |
|
|
"Pays d'origine ou zone géographique", |
|
|
"Certification / label", |
|
|
"Autres certification / label - champ libre", |
|
|
"Poids carbone de l'aliment", |
|
|
"Méthode de calcul du poids carbone de l'aliment" |
|
|
] |
|
|
for field in new_fields: |
|
|
if field not in df_livraisons.columns: |
|
|
df_livraisons[field] = None |
|
|
|
|
|
|
|
|
st.dataframe(df_livraisons, use_container_width=True) |
|
|
|
|
|
|
|
|
|
|
|
output = io.BytesIO() |
|
|
with pd.ExcelWriter(output, engine='xlsxwriter') as writer: |
|
|
df_livraisons.to_excel(writer, index=False, sheet_name='BonsLivraisons') |
|
|
processed_data = output.getvalue() |
|
|
|
|
|
|
|
|
st.download_button( |
|
|
label="Télécharger le tableau en Excel", |
|
|
data=processed_data, |
|
|
file_name='bon_de_livraisons.xlsx', |
|
|
mime='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' |
|
|
) |
|
|
else: |
|
|
st.info("Aucun bons de livraisons trouvé.") |
|
|
|
|
|
with tab_analyse: |
|
|
|
|
|
if len(analyses) > 0: |
|
|
|
|
|
parsed_analyses = [] |
|
|
for a in analyses: |
|
|
if isinstance(a, str): |
|
|
parsed_analyses.append(json.loads(a)) |
|
|
else: |
|
|
parsed_analyses.append(a) |
|
|
|
|
|
|
|
|
df_analyses = pd.DataFrame(parsed_analyses) |
|
|
|
|
|
|
|
|
df_analyses = df_analyses.rename(columns={ |
|
|
"denomination": "Dénomination", |
|
|
"type_prairie": "Type de prairie", |
|
|
"coupe": "Coupe", |
|
|
"matiere_seche_pourcentage": "Matière sèche (%)", |
|
|
"proteines_brutes_pourcentage_MS": "Protéines brutes (% MS)", |
|
|
"cellulose_pourcentage_MS": "Cellulose (% MS)", |
|
|
"cendres_brutes_pourcentage_MS": "Cendres brutes (% MS)" |
|
|
}) |
|
|
|
|
|
|
|
|
ordered_columns = [ |
|
|
"Dénomination", "Type de prairie", "Coupe", |
|
|
"Matière sèche (%)", "Protéines brutes (% MS)", |
|
|
"Cellulose (% MS)", "Cendres brutes (% MS)" |
|
|
] |
|
|
|
|
|
df_analyses = df_analyses[[c for c in ordered_columns if c in df_analyses.columns]] |
|
|
|
|
|
|
|
|
df_analyses = df_analyses.round(1) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
new_fields = [ |
|
|
"Nom ou raison sociale du responsable de l’étiquetage", |
|
|
"Adresse du responsable de l’étiquetage", |
|
|
"Numéro d'agrément du responsable de l'étiquetage ", |
|
|
"Numéro de lot", |
|
|
"Si vrac - Quantité nette livrée", |
|
|
"Si non vrac, pré-emabllé poids fixe - Quantité nette unitaire", |
|
|
"Si non vrac, pré-emabllé poids fixe - Nombre d'unité de vente livrées", |
|
|
"Mode d’emploi", |
|
|
"Produit biologique", |
|
|
"La liste des additifs", |
|
|
"Code d'identification", |
|
|
"Nom commercial", |
|
|
"Conditionnement", |
|
|
"Conditionnement - précisions", |
|
|
"Date de la coupe", |
|
|
"Taux de légumineuse", |
|
|
"Pays d'origine ou zone géographique", |
|
|
"Autres certification / label - champ libre", |
|
|
"Affourragement ", |
|
|
"Poids carbone de l'aliment", |
|
|
"Méthode de calcul du poids carbone de l'aliment" |
|
|
] |
|
|
for field in new_fields: |
|
|
if field not in df_analyses.columns: |
|
|
df_analyses[field] = None |
|
|
|
|
|
|
|
|
|
|
|
st.dataframe(df_analyses, use_container_width=True) |
|
|
|
|
|
|
|
|
output = io.BytesIO() |
|
|
with pd.ExcelWriter(output, engine='xlsxwriter') as writer: |
|
|
df_analyses.to_excel(writer, index=False, sheet_name='Analyses') |
|
|
processed_data = output.getvalue() |
|
|
|
|
|
|
|
|
st.download_button( |
|
|
label="Télécharger le tableau en Excel", |
|
|
data=processed_data, |
|
|
file_name='analyses_fourrages.xlsx', |
|
|
mime='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' |
|
|
) |
|
|
else: |
|
|
st.info("Aucune analyse fourrage trouvée.") |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |