eve / gemini_flash_lib /image_generator.py
Chandima Prabhath
gemini flash image edit
3380d86
import base64
import json
import os
from io import BytesIO
from typing import Optional, Dict, Any
# --- New Dependency ---
# The google-generativeai library is now required.
# Please install it using: pip install google-generativeai
try:
from google import genai
from google.genai.types import RawReferenceImage
from google.genai import types
except ImportError:
print("google-generativeai library not found. Please install it using: pip install google-generativeai")
exit()
# Pillow is required for image format conversion and normalization.
# Please install it using: pip install Pillow
try:
from PIL import Image
except ImportError:
print("Pillow library not found. Please install it using: pip install Pillow")
exit()
# --- Configuration ---
# It is recommended to set these as environment variables for security.
# For ImgBB: used to upload the final image and get a public URL.
IMGBB_API_KEY = os.getenv("IMGBB_API_KEY")
# For Google AI: your API key for accessing the Gemini model.
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
# Configure the Gemini client library
if GEMINI_API_KEY:
client = genai.Client(api_key=GEMINI_API_KEY)
def upload_to_imgbb(image_path: str, file_name: str) -> Optional[str]:
"""
Uploads the image at image_path to ImgBB.
Returns the public URL or None on failure.
(This function is unchanged from the original script)
"""
if not IMGBB_API_KEY:
print("Warning: IMGBB_API_KEY not set, skipping upload to ImgBB.")
return None
# requests is now only used for ImgBB uploads, so we import it here.
import requests
print(f"Uploading {file_name} to ImgBB...")
try:
with open(image_path, 'rb') as f:
files = {"image": (file_name, f.read())}
resp = requests.post(
"https://api.imgbb.com/1/upload",
params={"key": IMGBB_API_KEY},
files=files,
timeout=30
)
resp.raise_for_status()
data = resp.json().get("data", {})
url = data.get("url")
if url:
print(f"Successfully uploaded to ImgBB: {url}")
return url
else:
print(f"Error: ImgBB API response missing 'url'. Response: {resp.json()}")
return None
except requests.exceptions.RequestException as e:
print(f"Error: ImgBB upload failed: {e}")
return None
def _save_image_bytes(image_bytes: bytes, save_path: str) -> bool:
"""
Saves raw image bytes to a file.
Args:
image_bytes: The raw bytes of the image data.
save_path: The local path to save the image.
Returns:
True if saving was successful, False otherwise.
"""
print(f"Saving generated image to: {save_path}")
try:
with open(save_path, 'wb') as f:
f.write(image_bytes)
print("Image successfully saved.")
return True
except IOError as e:
print(f"Error saving image file: {e}")
return False
def generate_image(
prompt_text: str,
image_path: str,
download_path: Optional[str] = None
) -> Optional[Dict[str, Any]]:
"""
Sends a request to the Gemini API using the Python SDK to modify an image,
optionally saves the result, and uploads it to ImgBB.
Args:
prompt_text: The instructional text for image modification.
image_path: The file path to the input image (any common format).
download_path: If provided, the path to save the generated image.
Returns:
A dictionary containing a simplified API response and the ImgBB URL,
or None on error.
"""
if not GEMINI_API_KEY:
print("Error: GEMINI_API_KEY environment variable not set.")
return None
try:
# --- Image Loading ---
print(f"Processing image: {image_path}")
img = Image.open(image_path)
except FileNotFoundError:
print(f"Error: Image file not found at {image_path}")
return None
except Exception as e:
print(f"Error processing image file. Ensure it's a valid image. Details: {e}")
return None
try:
# --- API Call via Python SDK ---
print("Initializing Gemini model...")
print("Sending request to Gemini API via Python SDK...")
# --- Process Gemini API Response ---
# Convert the PIL Image to bytes (JPEG format)
img_byte_arr = BytesIO()
img.save(img_byte_arr, format='JPEG')
img_bytes = img_byte_arr.getvalue()
response = client.models.generate_content(
model='gemini-2.0-flash-preview-image-generation',
contents=[
types.Part.from_text(text=prompt_text),
types.Part.from_bytes(data=img_bytes, mime_type='image/jpeg'),
],
config=types.GenerateContentConfig(
response_modalities=["TEXT", "IMAGE"]
)
)
# DEBUG: print the response parts to inspect structure
# print(response.candidates[0].content.parts)
# Extract the image bytes from the response parts
generated_image_bytes = None
for part in response.candidates[0].content.parts:
if hasattr(part, "inline_data") and part.inline_data and hasattr(part.inline_data, "data"):
generated_image_bytes = part.inline_data.data
break
print(generated_image_bytes)
imgbb_url = None
# --- Download & Upload Logic ---
if download_path and generated_image_bytes:
if _save_image_bytes(generated_image_bytes, download_path):
# If save is successful, upload the saved file to ImgBB
file_name = os.path.basename(download_path)
imgbb_url = upload_to_imgbb(download_path, file_name)
# Prepare a final dictionary similar to the original script's output
final_result = {
"api_response": {
"candidates": [{
"finish_reason": response.candidates[0].finish_reason.name
}]
},
"imgbb_url": imgbb_url
}
return final_result
except Exception as e:
print(f"Gemini API request failed: {e}")
return None