marahmerah commited on
Commit
fbf45e9
·
verified ·
1 Parent(s): 16e9f54

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -61
app.py CHANGED
@@ -8,6 +8,7 @@ import gradio as gr
8
  import base64
9
  import mimetypes
10
  from translatepy import Translator
 
11
 
12
  from google import genai
13
  from google.genai import types
@@ -23,14 +24,13 @@ def translate_to_english(text):
23
  return result.result
24
  except Exception as e:
25
  print(f"Translation error: {e}")
26
- return text # Return original text if translation fails
27
 
28
  def generate(text, file_name, api_key, model="gemini-2.0-flash-exp"):
29
- # Initialize client using provided api_key (or fallback to env variable)
30
  client = genai.Client(api_key=(api_key.strip() if api_key and api_key.strip() != ""
31
- else os.environ.get("GEMINI_API_KEY")))
32
 
33
- files = [ client.files.upload(file=file_name) ]
34
 
35
  contents = [
36
  types.Content(
@@ -54,43 +54,23 @@ def generate(text, file_name, api_key, model="gemini-2.0-flash-exp"):
54
  )
55
 
56
  text_response = ""
57
- image_path = None
58
- # Create a temporary file to potentially store image data.
59
- with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
60
- temp_path = tmp.name
61
- for chunk in client.models.generate_content_stream(
62
- model=model,
63
- contents=contents,
64
- config=generate_content_config,
65
- ):
66
- if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts:
67
- continue
68
- candidate = chunk.candidates[0].content.parts[0]
69
- # Check for inline image data
70
- if candidate.inline_data:
71
- save_binary_file(temp_path, candidate.inline_data.data)
72
- print(f"File of mime type {candidate.inline_data.mime_type} saved to: {temp_path} and prompt input: {text}")
73
- image_path = temp_path
74
- # If an image is found, we assume that is the desired output.
75
- break
76
- else:
77
- # Accumulate text response if no inline_data is present.
78
- text_response += chunk.text + "\n"
79
 
80
  del files
81
- return image_path, text_response
82
-
83
- def convert_to_png(image_path):
84
- """Convert any image format to PNG and return the path"""
85
- with Image.open(image_path) as img:
86
- # Convert RGBA to RGB if needed
87
- if img.mode == 'RGBA':
88
- img = img.convert('RGB')
89
- # Create a new temp file with PNG extension
90
- with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
91
- output_path = tmp.name
92
- img.save(output_path, format="PNG", quality=95)
93
- return output_path
94
 
95
  def process_image_and_prompt(composite_pil, prompt, gemini_api_key):
96
  try:
@@ -98,32 +78,43 @@ def process_image_and_prompt(composite_pil, prompt, gemini_api_key):
98
  translated_prompt = translate_to_english(prompt)
99
  print(f"Original prompt: {prompt}, Translated prompt: {translated_prompt}")
100
 
101
- # Save the composite image to a temporary file as PNG
 
 
 
 
 
102
  with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
103
- composite_path = tmp.name
104
- composite_pil.save(composite_path, format="PNG")
105
 
106
- file_name = composite_path
107
- input_text = translated_prompt
108
- model = "gemini-2.0-flash-exp"
109
-
110
- image_path, text_response = generate(text=input_text, file_name=file_name, api_key=gemini_api_key, model=model)
 
111
 
112
- if image_path:
113
- # Convert the output image to PNG format
114
- png_path = convert_to_png(image_path)
115
- result_img = Image.open(png_path)
116
- return [result_img], "" # Return image in gallery and empty text output.
 
 
 
 
 
 
 
 
 
117
  else:
118
- # Return no image and the text response.
119
  return None, text_response
120
  except Exception as e:
121
- raise gr.Error(f"Error Getting {e}", duration=5)
122
-
123
 
124
- # Build a Blocks-based interface with a custom HTML header and CSS
125
- with gr.Blocks(css_paths="style.css",) as demo:
126
- # Custom HTML header with proper class for styling
127
  gr.HTML(
128
  """
129
  <div class="header-container">
@@ -185,14 +176,17 @@ with gr.Blocks(css_paths="style.css",) as demo:
185
  submit_btn = gr.Button("Generate", elem_classes="generate-btn")
186
 
187
  with gr.Column(elem_classes="output-column"):
188
- output_gallery = gr.Gallery(label="Generated Outputs (PNG)", elem_classes="output-gallery")
 
 
 
 
189
  output_text = gr.Textbox(
190
  label="Gemini Output",
191
  placeholder="Text response will appear here if no image is generated.",
192
  elem_classes="output-text"
193
  )
194
 
195
- # Set up the interaction with two outputs.
196
  submit_btn.click(
197
  fn=process_image_and_prompt,
198
  inputs=[image_input, prompt_input, gemini_api_key],
@@ -214,7 +208,7 @@ with gr.Blocks(css_paths="style.css",) as demo:
214
 
215
  gr.Examples(
216
  examples=examples,
217
- inputs=[image_input, prompt_input,],
218
  elem_id="examples-grid"
219
  )
220
 
 
8
  import base64
9
  import mimetypes
10
  from translatepy import Translator
11
+ from io import BytesIO
12
 
13
  from google import genai
14
  from google.genai import types
 
24
  return result.result
25
  except Exception as e:
26
  print(f"Translation error: {e}")
27
+ return text
28
 
29
  def generate(text, file_name, api_key, model="gemini-2.0-flash-exp"):
 
30
  client = genai.Client(api_key=(api_key.strip() if api_key and api_key.strip() != ""
31
+ else os.environ.get("GEMINI_API_KEY")))
32
 
33
+ files = [client.files.upload(file=file_name)]
34
 
35
  contents = [
36
  types.Content(
 
54
  )
55
 
56
  text_response = ""
57
+ image_data = None
58
+ for chunk in client.models.generate_content_stream(
59
+ model=model,
60
+ contents=contents,
61
+ config=generate_content_config,
62
+ ):
63
+ if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts:
64
+ continue
65
+ candidate = chunk.candidates[0].content.parts[0]
66
+ if candidate.inline_data:
67
+ image_data = candidate.inline_data.data
68
+ break
69
+ else:
70
+ text_response += chunk.text + "\n"
 
 
 
 
 
 
 
 
71
 
72
  del files
73
+ return image_data, text_response
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
  def process_image_and_prompt(composite_pil, prompt, gemini_api_key):
76
  try:
 
78
  translated_prompt = translate_to_english(prompt)
79
  print(f"Original prompt: {prompt}, Translated prompt: {translated_prompt}")
80
 
81
+ # Save the composite image to memory as PNG
82
+ img_byte_arr = BytesIO()
83
+ composite_pil.save(img_byte_arr, format='PNG')
84
+ img_byte_arr.seek(0)
85
+
86
+ # Create a temporary PNG file
87
  with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
88
+ tmp.write(img_byte_arr.getvalue())
89
+ file_name = tmp.name
90
 
91
+ image_data, text_response = generate(
92
+ text=translated_prompt,
93
+ file_name=file_name,
94
+ api_key=gemini_api_key,
95
+ model="gemini-2.0-flash-exp"
96
+ )
97
 
98
+ if image_data:
99
+ # Convert the binary image data to PNG format
100
+ img = Image.open(BytesIO(image_data))
101
+ if img.mode == 'RGBA':
102
+ img = img.convert('RGB')
103
+
104
+ # Save to BytesIO as PNG
105
+ output_buffer = BytesIO()
106
+ img.save(output_buffer, format="PNG")
107
+ output_buffer.seek(0)
108
+
109
+ # Create PIL Image from buffer
110
+ result_img = Image.open(output_buffer)
111
+ return [result_img], ""
112
  else:
 
113
  return None, text_response
114
  except Exception as e:
115
+ raise gr.Error(f"Error: {str(e)}", duration=5)
 
116
 
117
+ with gr.Blocks(css_paths="style.css") as demo:
 
 
118
  gr.HTML(
119
  """
120
  <div class="header-container">
 
176
  submit_btn = gr.Button("Generate", elem_classes="generate-btn")
177
 
178
  with gr.Column(elem_classes="output-column"):
179
+ output_gallery = gr.Gallery(
180
+ label="Generated Outputs (PNG)",
181
+ elem_classes="output-gallery",
182
+ format="png" # Force Gradio to use PNG format
183
+ )
184
  output_text = gr.Textbox(
185
  label="Gemini Output",
186
  placeholder="Text response will appear here if no image is generated.",
187
  elem_classes="output-text"
188
  )
189
 
 
190
  submit_btn.click(
191
  fn=process_image_and_prompt,
192
  inputs=[image_input, prompt_input, gemini_api_key],
 
208
 
209
  gr.Examples(
210
  examples=examples,
211
+ inputs=[image_input, prompt_input],
212
  elem_id="examples-grid"
213
  )
214