Spaces:
Sleeping
Sleeping
from smolagents import Tool, DuckDuckGoSearchTool, PythonInterpreterTool, VisitWebpageTool, WikipediaSearchTool | |
from openai import OpenAI | |
import whisper | |
import base64 | |
import os | |
class read_file(Tool): | |
name="read_file" | |
description="Read a file and return the content." | |
inputs={ | |
"file_path": { | |
"type": "string", | |
"description": "The path to the file to read." | |
} | |
} | |
output_type = "string" | |
def forward(self, file_path: str) -> str: | |
""" | |
Read the content of a file and return it as a string. | |
""" | |
try: | |
with open(file_path, 'r') as file: | |
content = file.read() | |
return content | |
except Exception as e: | |
return f"Error reading file: {str(e)}" | |
class transcribe_audio(Tool): | |
name="transcribe_audio" | |
description="Transcribe an audio file and return the text." | |
inputs={ | |
"audio_path": { | |
"type": "string", | |
"description": "The path to the audio file to transcribe." | |
} | |
} | |
output_type = "string" | |
def forward(self, audio_path: str) -> str: | |
try: | |
# Load the Whisper model | |
model = whisper.load_model("small") | |
# Transcribe the audio file | |
result = model.transcribe(audio_path) | |
return result['text'] | |
except Exception as e: | |
return f"Error transcribing audio: {str(e)}" | |
def get_data_uri(image_path: str, base64_image: str): | |
_, file_extension = os.path.splitext(image_path) | |
file_extension = file_extension.lower().lstrip(".") | |
mime_type = f"image/{file_extension}" | |
data_uri = f"data:{mime_type};base64,{base64_image}" | |
return data_uri | |
class describe_image(Tool): | |
name="describe_image" | |
description="Describe an image and return the description." | |
inputs={ | |
"image_path": { | |
"type": "string", | |
"description": "The path to the image file to describe." | |
} | |
} | |
output_type = "string" | |
def forward(self, image_path: str) -> str: | |
api_key = os.getenv("OPENROUTER_API_KEY") | |
if not api_key: | |
raise ValueError("OpenAI API key not provided and OPENAI_API_KEY environment variable not set") | |
base_url = os.getenv("OPENROUTER_BASE_URL") | |
client = OpenAI(api_key=api_key, base_url=base_url) | |
try: | |
with open(image_path, 'rb') as image_file: | |
base64_image = base64.b64encode(image_file.read()).decode('utf-8') | |
data_uri = get_data_uri(image_path, base64_image) | |
response = client.chat.completions.create( | |
model="gpt-4o", | |
messages=[ | |
{ | |
"role": "user", | |
"content": [ | |
{"type": "text", "text": "Describe this image in detail. Include information about the main subject, setting, colors, and any notable elements."}, | |
{ | |
"type": "image_url", | |
"image_url": {"url": data_uri} | |
} | |
] | |
} | |
], | |
max_tokens=500 | |
) | |
return response.choices[0].message.content | |
except Exception as e: | |
return f"Error describing image: {str(e)}" | |
def return_tools() -> list[Tool]: | |
""" | |
Returns a list of tools to be used by the agent. | |
""" | |
return [ | |
read_file(), | |
transcribe_audio(), | |
describe_image(), | |
DuckDuckGoSearchTool(), | |
PythonInterpreterTool(), | |
VisitWebpageTool(), | |
WikipediaSearchTool(), | |
] | |