Spaces:
Runtime error
Runtime error
File size: 11,170 Bytes
6627dda |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 |
import requests
from bs4 import BeautifulSoup
import re
from datetime import datetime
import pytz
from PIL import Image
from io import BytesIO
import tempfile
import os
import speech_recognition as sr
from ddgs import DDGS
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
from transformers import BlipProcessor, BlipForConditionalGeneration
import torch
# Lazy import pydub to avoid ffmpeg warning when not needed
# from pydub import AudioSegment
# handlers for tools
# 1. get_weather
def get_weather(latitude : str, longitude : str) -> str:
"""Get current temperature for a given location using latitude and longitude coordinates."""
api = f"https://api.open-meteo.com/v1/forecast?latitude={latitude}&longitude={longitude}¤t=temperature_2m,wind_speed_10m&hourly=temperature_2m,relative_humidity_2m,wind_speed_10m"
response = requests.get(api)
return response.json()["current"]["temperature_2m"]
# 2. web_search
def web_search(query: str, max_results: int = 5) -> str:
"""Enhanced web search with better error handling"""
try:
results_text = []
with DDGS() as ddgs:
# Add more specific search parameters
results = ddgs.text(
query,
max_results=max_results,
region='us-en',
safesearch='off'
)
for i, result in enumerate(results):
title = result.get("title", "")
snippet = result.get("body", "")
link = result.get("href", "")
# Clean up the text
title = title.strip() if title else ""
snippet = snippet.strip() if snippet else ""
if title or snippet:
results_text.append(f"{i+1}. {title}\n🔗 {link}\n📝 {snippet}\n")
if results_text:
return "\n".join(results_text)
else:
# Try alternative search approach
return f"No results found for: {query}. Try rephrasing your search query."
except Exception as e:
return f"Search error: {str(e)}. Please try a different search query."
# 3. get_time_in_location
def get_current_time(timezone: str = "Asia/Kolkata") -> str:
"""Get current time in specified timezone."""
try:
# Convert timezone string to pytz timezone object
tz = pytz.timezone(timezone)
current_time = datetime.now(tz)
# Format the time nicely
formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S %Z")
return f"Current time in {timezone}: {formatted_time}"
except Exception as e:
return f"Error getting time: {str(e)}"
def get_time_in_location(location: str) -> str:
"""Get current time for a specific location."""
# Map common locations to timezones
location_timezones = {
"gujarat": "Asia/Kolkata",
"india": "Asia/Kolkata",
"mumbai": "Asia/Kolkata",
"delhi": "Asia/Kolkata",
"paris": "Europe/Paris",
"london": "Europe/London",
"new york": "America/New_York",
"tokyo": "Asia/Tokyo",
"sydney": "Australia/Sydney",
"utc": "UTC"
}
location_lower = location.lower()
if location_lower in location_timezones:
timezone = location_timezones[location_lower]
return get_current_time(timezone)
else:
return f"I don't have timezone information for '{location}'. Try: Gujarat, India, Mumbai, Delhi, Paris, London, New York, Tokyo, Sydney, or UTC."
# 4. analyze_image
def analyze_image(image_url: str, question: str = None) -> str:
"""Analyze an image and optionally answer a visual question about it."""
try:
# Download the image
headers = {"User-Agent": "Mozilla/5.0"}
response = requests.get(image_url, headers=headers, timeout=30)
response.raise_for_status()
# Open image
image = Image.open(BytesIO(response.content)).convert('RGB')
# Load BLIP model for visual QA or captioning
processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-vqa-base")
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
if question:
inputs = processor(image, question, return_tensors="pt").to(device)
out = model.generate(**inputs)
answer = processor.decode(out[0], skip_special_tokens=True)
return f"❓ Question: {question}\n🧠 Answer: {answer}"
else:
# If no question, generate a caption
processor_caption = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model_caption = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
model_caption.to(device)
inputs = processor_caption(image, return_tensors="pt").to(device)
out = model_caption.generate(**inputs)
caption = processor_caption.decode(out[0], skip_special_tokens=True)
return f"🖼️ Image Caption: {caption}"
except Exception as e:
return f"Error analyzing image: {e}"
# 5. analyze_video
def analyze_video(video_url: str) -> str:
"""Analyze YouTube video metadata and attempt to fetch subtitles/transcript."""
try:
# Validate YouTube URL and extract video ID
video_id_match = re.search(r'(?:youtube\.com\/(?:watch\?v=|shorts\/)|youtu\.be\/)([^&\n?#\/]+)', video_url)
if not video_id_match:
return "Invalid YouTube URL format. Please provide a valid YouTube or Shorts URL."
video_id = video_id_match.group(1)
headers = {
"User-Agent": "Mozilla/5.0"
}
response = requests.get(video_url, headers=headers, timeout=20)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
# Extract title
title = None
for selector in ['meta[property="og:title"]', 'meta[name="title"]', 'title']:
el = soup.select_one(selector)
if el:
title = el.get('content') or el.text
break
title_text = title.strip() if title else "Unknown title"
# Extract description
description = None
for selector in ['meta[property="og:description"]', 'meta[name="description"]']:
el = soup.select_one(selector)
if el:
description = el.get('content') or el.text
break
desc_text = description.strip() if description else "No description available"
# Extract channel
channel = None
for selector in ['link[itemprop="name"]', 'meta[itemprop="channelId"]', 'a[href*="/@"]']:
el = soup.select_one(selector)
if el:
channel = el.get('content') or el.text or el.get('title')
break
channel_text = channel.strip() if channel else "Unknown channel"
# Attempt to fetch transcript using YouTubeTranscriptApi
transcript_text = "Transcript not available."
try:
transcript = YouTubeTranscriptApi.get_transcript(video_id)
transcript_text = "\n".join([f"{line['text']}" for line in transcript[:5]]) + "\n..."
except (TranscriptsDisabled, NoTranscriptFound):
transcript_text = "No transcript available for this video."
except Exception as e:
transcript_text = f"Transcript fetch failed: {e}"
# Final analysis summary
result = f"📺 Video Analysis\n"
result += f"Title: {title_text}\n"
result += f"Channel: {channel_text}\n"
result += f"Description: {desc_text[:300]}...\n"
result += f"Transcript Preview:\n{transcript_text}"
return result
except Exception as e:
return f"Error analyzing video: {e}"
# 6. transcribe_audio
def transcribe_audio(audio_url: str) -> str:
"""Transcribe audio content from an audio file URL using speech recognition."""
try:
# Download the audio file
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}
response = requests.get(audio_url, headers=headers, timeout=30)
response.raise_for_status()
# Create a temporary file to store the audio
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_audio:
temp_audio.write(response.content)
temp_audio_path = temp_audio.name
try:
# Try to convert audio to WAV format (speech_recognition works better with WAV)
try:
# Lazy import pydub only when needed
from pydub import AudioSegment
audio = AudioSegment.from_file(temp_audio_path)
# Create temporary WAV file
with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_wav:
audio.export(temp_wav.name, format="wav")
wav_path = temp_wav.name
except Exception as e:
# If conversion fails, try using the original file directly
print(f"Audio conversion warning: {e}")
wav_path = temp_audio_path
# Initialize speech recognizer
recognizer = sr.Recognizer()
# Load the audio file
with sr.AudioFile(wav_path) as source:
# Adjust for ambient noise
recognizer.adjust_for_ambient_noise(source, duration=0.5)
# Record the audio
audio_data = recognizer.record(source)
# Try to recognize speech
try:
# Use Google Speech Recognition (free, requires internet)
text = recognizer.recognize_google(audio_data)
return f"Audio Transcription:\n{text}"
except sr.UnknownValueError:
return "Audio Transcription: Could not understand the audio. The speech may be unclear, too quiet, or in a language not supported."
except sr.RequestError as e:
return f"Audio Transcription: Error with speech recognition service: {str(e)}"
finally:
# Clean up temporary files
try:
os.unlink(temp_audio_path)
if wav_path != temp_audio_path: # Only delete wav file if it's different
os.unlink(wav_path)
except:
pass
except ImportError:
return "Audio Transcription: Speech recognition library not available. Please install 'SpeechRecognition' package."
except Exception as e:
return f"Audio Transcription Error: {str(e)}" |