|
import streamlit as st |
|
from openai import OpenAI |
|
from dotenv import load_dotenv |
|
import os |
|
import tempfile |
|
|
|
load_dotenv() |
|
|
|
st.title("Image Description and Audio Generation") |
|
|
|
|
|
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) |
|
|
|
|
|
def process_image_and_generate_audio(image_url): |
|
try: |
|
response = client.chat.completions.create( |
|
model="gpt-4o", |
|
messages=[ |
|
{ |
|
"role": "user", |
|
"content": [ |
|
{"type": "text", "text": "Explain every single thing about this image"}, |
|
{ |
|
"type": "image_url", |
|
"image_url": {"url": image_url}, |
|
}, |
|
], |
|
} |
|
], |
|
max_tokens=300, |
|
) |
|
|
|
|
|
content = response.choices[0].message.content |
|
|
|
|
|
audio_response = client.audio.speech.create( |
|
model="tts-1", |
|
voice="alloy", |
|
input=content, |
|
) |
|
|
|
return content, audio_response |
|
except Exception as e: |
|
st.error(f"An error occurred: {str(e)}") |
|
return None, None |
|
|
|
|
|
def main(): |
|
|
|
image_url = st.text_input("Enter Image URL") |
|
|
|
if st.button("Generate Description and Audio"): |
|
if not image_url: |
|
st.warning("Please enter an image URL.") |
|
else: |
|
st.info("Processing image and generating audio...") |
|
|
|
|
|
content, audio_response = process_image_and_generate_audio(image_url) |
|
|
|
if content is not None and audio_response is not None: |
|
|
|
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f: |
|
audio_response.stream_to_file(f.name) |
|
|
|
|
|
st.markdown("**Description:**") |
|
st.write(content) |
|
|
|
|
|
st.audio(open(f.name, "rb").read(), format="audio/mp3") |
|
|
|
if __name__ == "__main__": |
|
main() |
|
|