Spaces:

muhammadnasar
/

ai_image_desc_and_audio

Sleeping

App Files Files Community

ai_image_desc_and_audio / app.py

muhammadnasar

Update app.py

1edd2f3 verified about 1 year ago

raw

history blame contribute delete

2.3 kB

	import streamlit as st
	from openai import OpenAI
	from dotenv import load_dotenv
	import os
	import tempfile

	load_dotenv()

	st.title("Image Description and Audio Generation")

	# Initialize OpenAI client
	client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

	# Define function to process image description and generate audio
	def process_image_and_generate_audio(image_url):
	try:
	response = client.chat.completions.create(
	model="gpt-4o",
	messages=[
	{
	"role": "user",
	"content": [
	{"type": "text", "text": "Explain every single thing about this image"},
	{
	"type": "image_url",
	"image_url": {"url": image_url},
	},
	],
	}
	],
	max_tokens=300,
	)

	# Get content from response
	content = response.choices[0].message.content

	# Generate audio from content
	audio_response = client.audio.speech.create(
	model="tts-1",
	voice="alloy",
	input=content,
	)

	return content, audio_response
	except Exception as e:
	st.error(f"An error occurred: {str(e)}")
	return None, None

	# Streamlit UI
	def main():
	# Image URL input
	image_url = st.text_input("Enter Image URL")

	if st.button("Generate Description and Audio"):
	if not image_url:
	st.warning("Please enter an image URL.")
	else:
	st.info("Processing image and generating audio...")

	# Generate content and audio
	content, audio_response = process_image_and_generate_audio(image_url)

	if content is not None and audio_response is not None:
	# Write audio to a temporary file
	with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
	audio_response.stream_to_file(f.name)

	# Display content
	st.markdown("Description:")
	st.write(content)

	# Display the audio
	st.audio(open(f.name, "rb").read(), format="audio/mp3")

	if __name__ == "__main__":
	main()