import base64 import json import os from io import BytesIO from typing import Optional, Dict, Any # --- New Dependency --- # The google-generativeai library is now required. # Please install it using: pip install google-generativeai try: from google import genai from google.genai.types import RawReferenceImage from google.genai import types except ImportError: print("google-generativeai library not found. Please install it using: pip install google-generativeai") exit() # Pillow is required for image format conversion and normalization. # Please install it using: pip install Pillow try: from PIL import Image except ImportError: print("Pillow library not found. Please install it using: pip install Pillow") exit() # --- Configuration --- # It is recommended to set these as environment variables for security. # For ImgBB: used to upload the final image and get a public URL. IMGBB_API_KEY = os.getenv("IMGBB_API_KEY") # For Google AI: your API key for accessing the Gemini model. GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") # Configure the Gemini client library if GEMINI_API_KEY: client = genai.Client(api_key=GEMINI_API_KEY) def upload_to_imgbb(image_path: str, file_name: str) -> Optional[str]: """ Uploads the image at image_path to ImgBB. Returns the public URL or None on failure. (This function is unchanged from the original script) """ if not IMGBB_API_KEY: print("Warning: IMGBB_API_KEY not set, skipping upload to ImgBB.") return None # requests is now only used for ImgBB uploads, so we import it here. import requests print(f"Uploading {file_name} to ImgBB...") try: with open(image_path, 'rb') as f: files = {"image": (file_name, f.read())} resp = requests.post( "https://api.imgbb.com/1/upload", params={"key": IMGBB_API_KEY}, files=files, timeout=30 ) resp.raise_for_status() data = resp.json().get("data", {}) url = data.get("url") if url: print(f"Successfully uploaded to ImgBB: {url}") return url else: print(f"Error: ImgBB API response missing 'url'. Response: {resp.json()}") return None except requests.exceptions.RequestException as e: print(f"Error: ImgBB upload failed: {e}") return None def _save_image_bytes(image_bytes: bytes, save_path: str) -> bool: """ Saves raw image bytes to a file. Args: image_bytes: The raw bytes of the image data. save_path: The local path to save the image. Returns: True if saving was successful, False otherwise. """ print(f"Saving generated image to: {save_path}") try: with open(save_path, 'wb') as f: f.write(image_bytes) print("Image successfully saved.") return True except IOError as e: print(f"Error saving image file: {e}") return False def generate_image( prompt_text: str, image_path: str, download_path: Optional[str] = None ) -> Optional[Dict[str, Any]]: """ Sends a request to the Gemini API using the Python SDK to modify an image, optionally saves the result, and uploads it to ImgBB. Args: prompt_text: The instructional text for image modification. image_path: The file path to the input image (any common format). download_path: If provided, the path to save the generated image. Returns: A dictionary containing a simplified API response and the ImgBB URL, or None on error. """ if not GEMINI_API_KEY: print("Error: GEMINI_API_KEY environment variable not set.") return None try: # --- Image Loading --- print(f"Processing image: {image_path}") img = Image.open(image_path) except FileNotFoundError: print(f"Error: Image file not found at {image_path}") return None except Exception as e: print(f"Error processing image file. Ensure it's a valid image. Details: {e}") return None try: # --- API Call via Python SDK --- print("Initializing Gemini model...") print("Sending request to Gemini API via Python SDK...") # --- Process Gemini API Response --- # Convert the PIL Image to bytes (JPEG format) img_byte_arr = BytesIO() img.save(img_byte_arr, format='JPEG') img_bytes = img_byte_arr.getvalue() response = client.models.generate_content( model='gemini-2.0-flash-preview-image-generation', contents=[ types.Part.from_text(text=prompt_text), types.Part.from_bytes(data=img_bytes, mime_type='image/jpeg'), ], config=types.GenerateContentConfig( response_modalities=["TEXT", "IMAGE"] ) ) # DEBUG: print the response parts to inspect structure # print(response.candidates[0].content.parts) # Extract the image bytes from the response parts generated_image_bytes = None for part in response.candidates[0].content.parts: if hasattr(part, "inline_data") and part.inline_data and hasattr(part.inline_data, "data"): generated_image_bytes = part.inline_data.data break print(generated_image_bytes) imgbb_url = None # --- Download & Upload Logic --- if download_path and generated_image_bytes: if _save_image_bytes(generated_image_bytes, download_path): # If save is successful, upload the saved file to ImgBB file_name = os.path.basename(download_path) imgbb_url = upload_to_imgbb(download_path, file_name) # Prepare a final dictionary similar to the original script's output final_result = { "api_response": { "candidates": [{ "finish_reason": response.candidates[0].finish_reason.name }] }, "imgbb_url": imgbb_url } return final_result except Exception as e: print(f"Gemini API request failed: {e}") return None