File size: 6,288 Bytes
3380d86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
import base64
import json
import os
from io import BytesIO
from typing import Optional, Dict, Any

# --- New Dependency ---
# The google-generativeai library is now required.
# Please install it using: pip install google-generativeai
try:
    from google import genai
    from google.genai.types import RawReferenceImage
    from google.genai import types
except ImportError:
    print("google-generativeai library not found. Please install it using: pip install google-generativeai")
    exit()

# Pillow is required for image format conversion and normalization.
# Please install it using: pip install Pillow
try:
    from PIL import Image
except ImportError:
    print("Pillow library not found. Please install it using: pip install Pillow")
    exit()

# --- Configuration ---
# It is recommended to set these as environment variables for security.
# For ImgBB: used to upload the final image and get a public URL.
IMGBB_API_KEY = os.getenv("IMGBB_API_KEY")
# For Google AI: your API key for accessing the Gemini model.
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")

# Configure the Gemini client library
if GEMINI_API_KEY:
    client = genai.Client(api_key=GEMINI_API_KEY)

def upload_to_imgbb(image_path: str, file_name: str) -> Optional[str]:
    """
    Uploads the image at image_path to ImgBB.
    Returns the public URL or None on failure.
    (This function is unchanged from the original script)
    """
    if not IMGBB_API_KEY:
        print("Warning: IMGBB_API_KEY not set, skipping upload to ImgBB.")
        return None

    # requests is now only used for ImgBB uploads, so we import it here.
    import requests
    print(f"Uploading {file_name} to ImgBB...")
    try:
        with open(image_path, 'rb') as f:
            files = {"image": (file_name, f.read())}
        resp = requests.post(
            "https://api.imgbb.com/1/upload",
            params={"key": IMGBB_API_KEY},
            files=files,
            timeout=30
        )
        resp.raise_for_status()
        data = resp.json().get("data", {})
        url = data.get("url")
        if url:
            print(f"Successfully uploaded to ImgBB: {url}")
            return url
        else:
            print(f"Error: ImgBB API response missing 'url'. Response: {resp.json()}")
            return None
    except requests.exceptions.RequestException as e:
        print(f"Error: ImgBB upload failed: {e}")
        return None

def _save_image_bytes(image_bytes: bytes, save_path: str) -> bool:
    """
    Saves raw image bytes to a file.

    Args:
        image_bytes: The raw bytes of the image data.
        save_path: The local path to save the image.

    Returns:
        True if saving was successful, False otherwise.
    """
    print(f"Saving generated image to: {save_path}")
    try:
        with open(save_path, 'wb') as f:
            f.write(image_bytes)
        print("Image successfully saved.")
        return True
    except IOError as e:
        print(f"Error saving image file: {e}")
        return False


def generate_image(
    prompt_text: str,
    image_path: str,
    download_path: Optional[str] = None
) -> Optional[Dict[str, Any]]:
    """
    Sends a request to the Gemini API using the Python SDK to modify an image,
    optionally saves the result, and uploads it to ImgBB.

    Args:
        prompt_text: The instructional text for image modification.
        image_path: The file path to the input image (any common format).
        download_path: If provided, the path to save the generated image.

    Returns:
        A dictionary containing a simplified API response and the ImgBB URL,
        or None on error.
    """
    if not GEMINI_API_KEY:
        print("Error: GEMINI_API_KEY environment variable not set.")
        return None
    
    try:
        # --- Image Loading ---
        print(f"Processing image: {image_path}")
        img = Image.open(image_path)
    except FileNotFoundError:
        print(f"Error: Image file not found at {image_path}")
        return None
    except Exception as e:
        print(f"Error processing image file. Ensure it's a valid image. Details: {e}")
        return None

    try:
        # --- API Call via Python SDK ---
        print("Initializing Gemini model...")
        print("Sending request to Gemini API via Python SDK...")
        # --- Process Gemini API Response ---
        # Convert the PIL Image to bytes (JPEG format)
        img_byte_arr = BytesIO()
        img.save(img_byte_arr, format='JPEG')
        img_bytes = img_byte_arr.getvalue()

        response = client.models.generate_content(
            model='gemini-2.0-flash-preview-image-generation',
            contents=[
                types.Part.from_text(text=prompt_text),
                types.Part.from_bytes(data=img_bytes, mime_type='image/jpeg'),
            ],
            config=types.GenerateContentConfig(
                response_modalities=["TEXT", "IMAGE"]
            )
        )
        # DEBUG: print the response parts to inspect structure
        # print(response.candidates[0].content.parts)

        # Extract the image bytes from the response parts
        generated_image_bytes = None
        for part in response.candidates[0].content.parts:
            if hasattr(part, "inline_data") and part.inline_data and hasattr(part.inline_data, "data"):
                generated_image_bytes = part.inline_data.data
                break
        print(generated_image_bytes)
        imgbb_url = None
        # --- Download & Upload Logic ---
        if download_path and generated_image_bytes:
            if _save_image_bytes(generated_image_bytes, download_path):
                # If save is successful, upload the saved file to ImgBB
                file_name = os.path.basename(download_path)
                imgbb_url = upload_to_imgbb(download_path, file_name)
        
        # Prepare a final dictionary similar to the original script's output
        final_result = {
            "api_response": {
                "candidates": [{
                    "finish_reason": response.candidates[0].finish_reason.name
                }]
            },
            "imgbb_url": imgbb_url
        }
        return final_result

    except Exception as e:
        print(f"Gemini API request failed: {e}")
        return None