Spaces:

muhammadnasar
/

ai_image_desc_and_audio

Sleeping

File size: 2,301 Bytes

57dbf9f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1edd2f3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57dbf9f
1edd2f3
 
57dbf9f
1edd2f3
 
 
 
 
 
57dbf9f
1edd2f3
 
 
 
57dbf9f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1edd2f3
 
 
 
57dbf9f
1edd2f3
 
 
57dbf9f
1edd2f3
 
57dbf9f

import streamlit as st
from openai import OpenAI
from dotenv import load_dotenv
import os
import tempfile

load_dotenv()

st.title("Image Description and Audio Generation")

# Initialize OpenAI client
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

# Define function to process image description and generate audio
def process_image_and_generate_audio(image_url):
    try:
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": "Explain every single thing about this image"},
                        {
                            "type": "image_url",
                            "image_url": {"url": image_url},
                        },
                    ],
                }
            ],
            max_tokens=300,
        )

        # Get content from response
        content = response.choices[0].message.content

        # Generate audio from content
        audio_response = client.audio.speech.create(
            model="tts-1",
            voice="alloy",
            input=content,
        )

        return content, audio_response
    except Exception as e:
        st.error(f"An error occurred: {str(e)}")
        return None, None

# Streamlit UI
def main():
    # Image URL input
    image_url = st.text_input("Enter Image URL")

    if st.button("Generate Description and Audio"):
        if not image_url:
            st.warning("Please enter an image URL.")
        else:
            st.info("Processing image and generating audio...")

            # Generate content and audio
            content, audio_response = process_image_and_generate_audio(image_url)

            if content is not None and audio_response is not None:
                # Write audio to a temporary file
                with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
                    audio_response.stream_to_file(f.name)

                # Display content
                st.markdown("**Description:**")
                st.write(content)

                # Display the audio
                st.audio(open(f.name, "rb").read(), format="audio/mp3")

if __name__ == "__main__":
    main()