MultiMedTulu

Runtime error

App Files Files Community

Tonic commited on Nov 18, 2023

Commit

415223e

1 Parent(s): 41cbd00

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -34

app.py CHANGED Viewed

@@ -144,36 +144,32 @@ def resize_image(image, max_size):
-def process_image(image_input, text_input):
-    # Resize the image if needed
-    max_image_size = 1024  # You can adjust this size
-    image = resize_image(image_input, max_image_size)
-    # Encode the image to base64
-    base64_image_str = encode_image(image)
-    # Prepare the payload for the HTTP request
-    payload = {
-        "content": [
-            {
-                "prompt": text_input,
-                "image": base64_image_str,
-            }
-        ],
-        "token": "sk-OtterHD",  # Replace with your actual token
-    }
-    # Specify the URL for the HTTP request
-    url = "https://ensures-picture-choices-labels.trycloudflare.com/app/otter"
-    headers = {"Content-Type": "application/json"}
-    # Make the HTTP request
-    response = requests.post(url, headers=headers, data=json.dumps(payload))
-    if response.status_code == 200:
-        results = response.json()
-        return results["result"]
-    else:
-        return f"Error: {response.status_code}, {response.text}"
 def query_vectara(text):
@@ -336,7 +332,7 @@ def process_and_query(input_language=None, audio_input=None, image_input=None, t
         # Process text input
         if text_input is not None:
-            combined_text = "the user asks the following to his health adviser: " + text_input
         # Process audio input
         if audio_input is not None:
@@ -344,15 +340,15 @@ def process_and_query(input_language=None, audio_input=None, image_input=None, t
             print("Audio Text:", audio_text)  # Debug print
             combined_text += "\n" + audio_text
-        # Check if only an image is provided without text
-        if image_input is not None and not combined_text.strip():
-            return "Error: Please provide text input along with the image.", "No hallucination evaluation"
         # Process image input
         if image_input is not None:
-            image_text = process_image(image_input, combined_text)
             print("Image Text:", image_text)  # Debug print
             combined_text += "\n" + image_text
         # Use the text to query Vectara
         vectara_response_json = query_vectara(combined_text)

+def process_image(image_input):
+    # Initialize the Gradio client with the URL of the Gradio server
+    client = Client("https://adept-fuyu-8b-demo.hf.space/--replicas/pqjvl/")
+    # Check if the image input is a file path (str) or a PIL Image
+    if isinstance(image_input, str):
+        # Assuming it's a file path or a URL
+        image_path = image_input
+    else:
+        # Assuming it's a PIL Image, save it to a temporary file
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp_file:
+            image_input.save(tmp_file.name)
+            image_path = tmp_file.name
+    # Call the predict method of the client
+    result = client.predict(
+        image_path,  # File path or URL of the image
+        True,        # Additional parameter for the server (e.g., enable detailed captioning)
+        fn_index=2   # Function index if the server has multiple functions
+    )
+    # Clean up the temporary file if created
+    if not isinstance(image_input, str):
+        os.remove(image_path)
+    return result
 def query_vectara(text):
         # Process text input
         if text_input is not None:
+            combined_text = "The user asks the following to his health adviser: " + text_input
         # Process audio input
         if audio_input is not None:
             print("Audio Text:", audio_text)  # Debug print
             combined_text += "\n" + audio_text
         # Process image input
         if image_input is not None:
+            image_text = process_image(image_input)  # Call process_image with only the image input
             print("Image Text:", image_text)  # Debug print
             combined_text += "\n" + image_text
+        # Check if combined text is empty
+        if not combined_text.strip():
+            return "Error: Please provide some input (text, audio, or image).", "No hallucination evaluation"
         # Use the text to query Vectara
         vectara_response_json = query_vectara(combined_text)