import json import os import time import uuid import tempfile from PIL import Image, ImageDraw, ImageFont import gradio as gr import base64 import mimetypes from translatepy import Translator from io import BytesIO from google import genai from google.genai import types def save_binary_file(file_name, data): with open(file_name, "wb") as f: f.write(data) def translate_to_english(text): try: translator = Translator() result = translator.translate(text, destination_language="en") return result.result except Exception as e: print(f"Translation error: {e}") return text def generate(text, file_name, api_key, model="gemini-2.0-flash-exp"): client = genai.Client(api_key=(api_key.strip() if api_key and api_key.strip() != "" else os.environ.get("GEMINI_API_KEY"))) files = [client.files.upload(file=file_name)] contents = [ types.Content( role="user", parts=[ types.Part.from_uri( file_uri=files[0].uri, mime_type=files[0].mime_type, ), types.Part.from_text(text=text), ], ), ] generate_content_config = types.GenerateContentConfig( temperature=1, top_p=0.95, top_k=40, max_output_tokens=8192, response_modalities=["image", "text"], response_mime_type="text/plain", ) text_response = "" image_data = None for chunk in client.models.generate_content_stream( model=model, contents=contents, config=generate_content_config, ): if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts: continue candidate = chunk.candidates[0].content.parts[0] if candidate.inline_data: image_data = candidate.inline_data.data break else: text_response += chunk.text + "\n" del files return image_data, text_response def process_image_and_prompt(composite_pil, prompt, gemini_api_key): try: # Translate prompt to English translated_prompt = translate_to_english(prompt) print(f"Original prompt: {prompt}, Translated prompt: {translated_prompt}") # Save the composite image to memory as PNG img_byte_arr = BytesIO() composite_pil.save(img_byte_arr, format='PNG') img_byte_arr.seek(0) # Create a temporary PNG file with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp: tmp.write(img_byte_arr.getvalue()) file_name = tmp.name image_data, text_response = generate( text=translated_prompt, file_name=file_name, api_key=gemini_api_key, model="gemini-2.0-flash-exp" ) if image_data: # Convert the binary image data to PNG format img = Image.open(BytesIO(image_data)) if img.mode == 'RGBA': img = img.convert('RGB') # Save to BytesIO as PNG output_buffer = BytesIO() img.save(output_buffer, format="PNG") output_buffer.seek(0) # Create PIL Image from buffer result_img = Image.open(output_buffer) return [result_img], "" else: return None, text_response except Exception as e: raise gr.Error(f"Error: {str(e)}", duration=5) with gr.Blocks(css_paths="style.css") as demo: gr.HTML( """ """ ) with gr.Accordion("⚠️ API Configuration ⚠️", open=False, elem_classes="config-accordion"): gr.Markdown(""" - **Issue:** ❗ Sometimes the model returns text instead of an image. ### 🔧 Steps to Address: 1. **🛠️ Duplicate the Repository** - Create a separate copy for modifications. 2. **🔑 Use Your Own Gemini API Key** - You **must** configure your own Gemini key for generation! """) with gr.Accordion("📌 Usage Instructions", open=False, elem_classes="instructions-accordion"): gr.Markdown(""" ### 📌 Usage - Upload an image (any format will be converted to PNG) - Enter a prompt (will be automatically translated to English) - Output will always be in PNG format - If text is returned instead of an image, it will appear in the text output - ❌ **Do not use NSFW images!** """) with gr.Row(elem_classes="main-content"): with gr.Column(elem_classes="input-column"): image_input = gr.Image( type="pil", label="Upload Image (will be converted to PNG)", image_mode="RGBA", elem_id="image-input", elem_classes="upload-box" ) gemini_api_key = gr.Textbox( lines=1, placeholder="Enter Gemini API Key (optional)", label="Gemini API Key (optional)", elem_classes="api-key-input" ) prompt_input = gr.Textbox( lines=2, placeholder="Enter prompt here...", label="Prompt", elem_classes="prompt-input" ) submit_btn = gr.Button("Generate", elem_classes="generate-btn") with gr.Column(elem_classes="output-column"): output_gallery = gr.Gallery( label="Generated Outputs (PNG)", elem_classes="output-gallery", format="png" # Force Gradio to use PNG format ) output_text = gr.Textbox( label="Gemini Output", placeholder="Text response will appear here if no image is generated.", elem_classes="output-text" ) submit_btn.click( fn=process_image_and_prompt, inputs=[image_input, prompt_input, gemini_api_key], outputs=[output_gallery, output_text], ) demo.queue(max_size=50).launch()