|
import gradio as gr |
|
import json |
|
from mistralai import Mistral |
|
from pydantic import BaseModel, Field |
|
from datetime import datetime |
|
import base64 |
|
from io import BytesIO |
|
from PIL import Image |
|
import os |
|
|
|
class ClimateData(BaseModel): |
|
document_type: str = Field(..., description="Type of document: report, policy, research, assessment, etc.") |
|
title: str = Field(..., description="Document title or main heading") |
|
organization: str = Field(..., description="Publishing organization or agency") |
|
publication_date: str = Field(..., description="Publication or release date") |
|
temperature_data: list[str] = Field(default=[], description="Temperature readings, anomalies, projections") |
|
precipitation_data: list[str] = Field(default=[], description="Precipitation measurements and forecasts") |
|
co2_levels: list[str] = Field(default=[], description="CO2 concentration data and emissions") |
|
sea_level_data: list[str] = Field(default=[], description="Sea level rise measurements") |
|
extreme_events: list[str] = Field(default=[], description="Extreme weather events and frequencies") |
|
year_ranges: list[str] = Field(default=[], description="Time periods and date ranges covered") |
|
baseline_periods: list[str] = Field(default=[], description="Reference or baseline periods used") |
|
projection_periods: list[str] = Field(default=[], description="Future projection timeframes") |
|
policy_recommendations: list[str] = Field(default=[], description="Policy recommendations and actions") |
|
targets_goals: list[str] = Field(default=[], description="Climate targets, goals, and commitments") |
|
mitigation_strategies: list[str] = Field(default=[], description="Mitigation approaches and strategies") |
|
adaptation_measures: list[str] = Field(default=[], description="Adaptation measures and plans") |
|
regions_covered: list[str] = Field(default=[], description="Geographical regions or countries covered") |
|
sectors_affected: list[str] = Field(default=[], description="Economic sectors or systems affected") |
|
main_conclusions: list[str] = Field(default=[], description="Primary conclusions and findings") |
|
risk_assessments: list[str] = Field(default=[], description="Risk levels and assessments") |
|
uncertainty_levels: list[str] = Field(default=[], description="Uncertainty ranges and confidence levels") |
|
|
|
class ChartDescription(BaseModel): |
|
chart_type: str = Field(..., description="Type of visualization: line chart, bar chart, map, table, etc.") |
|
data_type: str = Field(..., description="Type of data shown: temperature, emissions, policy timeline, etc.") |
|
trend_description: str = Field(..., description="Description of trends, patterns, and changes") |
|
key_insights: str = Field(..., description="Important findings and takeaways from the visualization") |
|
time_period: str = Field(..., description="Time period or range covered in the chart") |
|
geographical_scope: str = Field(..., description="Geographical area or regions shown") |
|
|
|
def initialize_client(api_key): |
|
if not api_key: |
|
raise ValueError("Please provide a valid Mistral API key") |
|
return Mistral(api_key=api_key) |
|
|
|
def extract_climate_data(api_key, file_path=None, url=None): |
|
try: |
|
client = initialize_client(api_key) |
|
from mistralai.extra import response_format_from_pydantic_model |
|
if file_path: |
|
uploaded_file = client.files.upload( |
|
file={"file_name": os.path.basename(file_path), "content": open(file_path, "rb")}, |
|
purpose="ocr" |
|
) |
|
signed_url = client.files.get_signed_url(file_id=uploaded_file.id) |
|
document_url = signed_url.url |
|
elif url: |
|
document_url = url |
|
else: |
|
return {"error": "No file or URL provided"} |
|
response = client.ocr.process( |
|
model="mistral-ocr-latest", |
|
document={"type": "document_url", "document_url": document_url}, |
|
bbox_annotation_format=response_format_from_pydantic_model(ChartDescription), |
|
document_annotation_format=response_format_from_pydantic_model(ClimateData), |
|
include_image_base64=True |
|
) |
|
extracted_text = response.text if hasattr(response, 'text') else "" |
|
bbox_annotations = response.bbox_annotations if hasattr(response, 'bbox_annotations') else [] |
|
doc_annotations = response.document_annotation if hasattr(response, 'document_annotation') else {} |
|
return { |
|
"success": True, |
|
"extracted_text": extracted_text, |
|
"climate_data": doc_annotations, |
|
"chart_descriptions": bbox_annotations, |
|
"raw_response": str(response) |
|
} |
|
except Exception as e: |
|
return {"error": f"OCR processing failed: {str(e)}"} |
|
|
|
def process_climate_document(api_key, file, url_input): |
|
""" |
|
The function `process_climate_document` extracts climate data from either a file or URL input and |
|
returns structured JSON data. |
|
|
|
:param api_key: The `api_key` parameter is typically a unique identifier or access token that allows |
|
you to authenticate and access a specific API or service. It is used in the |
|
`process_climate_document` function to authenticate and make requests to the `extract_climate_data` |
|
function. You need to provide a valid |
|
:param file: The `file` parameter in the `process_climate_document` function is used to pass a file |
|
object containing climate document data. If this parameter is provided, the function will extract |
|
climate data from the file using the `extract_climate_data` function |
|
:param url_input: The `url_input` parameter in the `process_climate_document` function is used to |
|
provide a URL input for extracting climate data. This URL should point to a document or webpage |
|
containing climate-related information that needs to be analyzed. The function will extract data |
|
from this URL if it is provided |
|
:return: The function `process_climate_document` returns a JSON object containing the analysis |
|
results of a climate document including climate_data, chart_descriptions, and extracted_text. |
|
""" |
|
if file: |
|
result = extract_climate_data(api_key, file_path=file.name) |
|
elif url_input.strip(): |
|
result = extract_climate_data(api_key, url=url_input.strip()) |
|
else: |
|
return {"error": "Please provide either a file or URL"} |
|
|
|
if "error" in result: |
|
return {"error": result['error']} |
|
|
|
return result |
|
|
|
def analyze_image(api_key, image): |
|
""" |
|
The function `analyze_image` takes an image, analyzes it using a chat model, and returns JSON output |
|
with information about the image content. |
|
|
|
:param api_key: The `api_key` parameter is a string that represents the API key required for |
|
authentication to access the chat API service. This key is used to initialize the client for making |
|
requests to the service |
|
:param image: The `analyze_image` function you provided seems to be a Python function that takes an |
|
API key and an image as input parameters. The function is designed to analyze the image using a chat |
|
completion model and provide a JSON output with specific fields related to the image content |
|
:return: The `analyze_image` function returns a JSON string containing information about the |
|
analyzed image. The JSON output includes fields such as image_type, climate_feature, location, |
|
date_captured, cloud_density, temperature_anomaly, and description. If the image analysis is |
|
successful, the function returns the analyzed results in JSON format. If there is an error during |
|
processing, it returns an error message with default values for |
|
""" |
|
try: |
|
client = initialize_client(api_key) |
|
buffered = BytesIO() |
|
image.save(buffered, format="PNG") |
|
img_str = base64.b64encode(buffered.getvalue()).decode() |
|
prompt = """Analyze this image and provide a JSON output with the following fields: |
|
- image_type: Type of image (e.g., satellite, ground, aerial) |
|
- climate_feature: Primary climate feature observed (e.g., cloud_cover, precipitation) |
|
- location: Estimated or general location (e.g., Pacific Ocean, Sahara Desert) |
|
- date_captured: Current date in YYYY-MM-DD format |
|
- cloud_density: Estimated cloud density (0.0 to 1.0) if applicable |
|
- temperature_anomaly: Estimated temperature anomaly in Celsius (e.g., 1.2) |
|
- description: Brief description of the image content |
|
""" |
|
response = client.chat.complete( |
|
model="pixtral-large-latest", |
|
messages=[ |
|
{ |
|
"role": "user", |
|
"content": [ |
|
{"type": "text", "text": prompt}, |
|
{"type": "image_url", "image_url": f"data:image/png;base64,{img_str}"} |
|
] |
|
} |
|
] |
|
) |
|
response_text = response.choices[0].message.content |
|
try: |
|
response_text = response_text.replace("```json", "").replace("```", "").strip() |
|
result = json.loads(response_text) |
|
except json.JSONDecodeError: |
|
result = { |
|
"image_type": "unknown", |
|
"climate_feature": "unknown", |
|
"location": "unknown", |
|
"date_captured": datetime.now().strftime("%Y-%m-DD"), |
|
"cloud_density": 0.0, |
|
"temperature_anomaly": 0.0, |
|
"description": "Error parsing model output." |
|
} |
|
return result |
|
except Exception as e: |
|
error_result = { |
|
"image_type": "error", |
|
"climate_feature": "none", |
|
"location": "none", |
|
"date_captured": datetime.now().strftime("%Y-%m-DD"), |
|
"cloud_density": 0.0, |
|
"temperature_anomaly": 0.0, |
|
"description": f"Error processing image: {str(e)}" |
|
} |
|
return error_result |
|
|
|
with gr.Blocks(title="Climate Data and Image Analyzer") as demo: |
|
gr.Markdown("# Climate Data and Image Analysis Tool\nAnalyze climate documents or images using Mistral OCR and Pixtral models") |
|
api_key_input = gr.Textbox( |
|
label="Mistral API Key", |
|
placeholder="Enter your Mistral API key here", |
|
type="password" |
|
) |
|
with gr.Tabs(): |
|
with gr.Tab(label="Document Analysis"): |
|
gr.Markdown("## Document Analysis\nExtract data from climate reports, policies, or research papers") |
|
with gr.Row(): |
|
with gr.Column(): |
|
file_input = gr.File( |
|
label="Upload Climate Document", |
|
file_types=[".pdf", ".png", ".jpg", ".jpeg", ".docx", ".pptx"] |
|
) |
|
url_input = gr.Textbox( |
|
label="Or Enter Document URL", |
|
placeholder="https://example.com/climate-policy.pdf" |
|
) |
|
process_btn = gr.Button("Analyze Document", variant="primary") |
|
with gr.Column(): |
|
doc_output = gr.JSON(label="Document Analysis Results") |
|
process_btn.click( |
|
fn=process_climate_document, |
|
inputs=[api_key_input, file_input, url_input], |
|
outputs=doc_output |
|
) |
|
gr.Examples( |
|
examples=[ |
|
[None, "https://static.pib.gov.in/WriteReadData/specificdocs/documents/2021/dec/doc202112101.pdf"], |
|
[None, "https://www.ipcc.ch/site/assets/uploads/2018/02/WG1AR5_Chapter02_FINAL.pdf"], |
|
[None, "https://unfccc.int/sites/default/files/resource/parisagreement_publication.pdf"] |
|
], |
|
inputs=[file_input, url_input] |
|
) |
|
with gr.Tab(label="Image Analysis"): |
|
gr.Markdown("## Image Analysis\nAnalyze climate-related images for features like cloud cover or temperature anomalies") |
|
image_input = gr.Image(type="pil", label="Upload Image") |
|
image_btn = gr.Button("Analyze Image", variant="primary") |
|
image_output = gr.JSON(label="Image Analysis Result") |
|
image_btn.click( |
|
fn=analyze_image, |
|
inputs=[api_key_input, image_input], |
|
outputs=image_output |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch(mcp_server=True) |