|
|
|
|
|
""" |
|
|
Multi-Source Fallback Engine |
|
|
Implements cascading fallback system with 10+ sources per data type |
|
|
NEVER FAILS - Always returns data or cached data |
|
|
""" |
|
|
|
|
|
import httpx |
|
|
import asyncio |
|
|
import logging |
|
|
import json |
|
|
import time |
|
|
from typing import Dict, Any, List, Optional, Callable, Tuple |
|
|
from datetime import datetime, timedelta |
|
|
from pathlib import Path |
|
|
from enum import Enum |
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
|
class DataType(Enum): |
|
|
"""Supported data types""" |
|
|
MARKET_PRICES = "market_prices" |
|
|
OHLC_CANDLESTICK = "ohlc_candlestick" |
|
|
BLOCKCHAIN_EXPLORER = "blockchain_explorer" |
|
|
NEWS_FEEDS = "news_feeds" |
|
|
SENTIMENT_DATA = "sentiment_data" |
|
|
ONCHAIN_ANALYTICS = "onchain_analytics" |
|
|
WHALE_TRACKING = "whale_tracking" |
|
|
|
|
|
|
|
|
class SourceStatus(Enum): |
|
|
"""Source availability status""" |
|
|
AVAILABLE = "available" |
|
|
RATE_LIMITED = "rate_limited" |
|
|
TEMPORARILY_DOWN = "temporarily_down" |
|
|
PERMANENTLY_FAILED = "permanently_failed" |
|
|
|
|
|
|
|
|
class MultiSourceCache: |
|
|
"""Simple in-memory cache with TTL""" |
|
|
|
|
|
def __init__(self): |
|
|
self._cache: Dict[str, Tuple[Any, float, float]] = {} |
|
|
|
|
|
def get(self, key: str) -> Optional[Any]: |
|
|
"""Get cached data if not expired""" |
|
|
if key in self._cache: |
|
|
data, timestamp, ttl = self._cache[key] |
|
|
if time.time() - timestamp < ttl: |
|
|
logger.info(f"β
Cache HIT: {key}") |
|
|
return data |
|
|
else: |
|
|
|
|
|
del self._cache[key] |
|
|
logger.debug(f"β° Cache EXPIRED: {key}") |
|
|
return None |
|
|
|
|
|
def set(self, key: str, data: Any, ttl: int): |
|
|
"""Set cache with TTL in seconds""" |
|
|
self._cache[key] = (data, time.time(), ttl) |
|
|
logger.debug(f"πΎ Cache SET: {key} (TTL: {ttl}s)") |
|
|
|
|
|
def get_stale(self, key: str, max_age: int) -> Optional[Any]: |
|
|
"""Get cached data even if expired, within max_age""" |
|
|
if key in self._cache: |
|
|
data, timestamp, _ = self._cache[key] |
|
|
age = time.time() - timestamp |
|
|
if age < max_age: |
|
|
logger.warning(f"β οΈ Cache STALE: {key} (age: {age:.0f}s)") |
|
|
return data |
|
|
return None |
|
|
|
|
|
def clear(self): |
|
|
"""Clear all cache""" |
|
|
self._cache.clear() |
|
|
|
|
|
|
|
|
class SourceMonitor: |
|
|
"""Monitor source performance and availability""" |
|
|
|
|
|
def __init__(self): |
|
|
self._source_stats: Dict[str, Dict[str, Any]] = {} |
|
|
self._source_status: Dict[str, SourceStatus] = {} |
|
|
self._unavailable_until: Dict[str, float] = {} |
|
|
|
|
|
def record_success(self, source_name: str, response_time: float): |
|
|
"""Record successful request""" |
|
|
if source_name not in self._source_stats: |
|
|
self._source_stats[source_name] = { |
|
|
"success_count": 0, |
|
|
"failure_count": 0, |
|
|
"total_response_time": 0, |
|
|
"last_success": None, |
|
|
"last_failure": None |
|
|
} |
|
|
|
|
|
stats = self._source_stats[source_name] |
|
|
stats["success_count"] += 1 |
|
|
stats["total_response_time"] += response_time |
|
|
stats["last_success"] = time.time() |
|
|
|
|
|
|
|
|
self._source_status[source_name] = SourceStatus.AVAILABLE |
|
|
if source_name in self._unavailable_until: |
|
|
del self._unavailable_until[source_name] |
|
|
|
|
|
logger.debug(f"β
{source_name}: Success ({response_time:.2f}s)") |
|
|
|
|
|
def record_failure(self, source_name: str, error_type: str, status_code: Optional[int] = None): |
|
|
"""Record failed request""" |
|
|
if source_name not in self._source_stats: |
|
|
self._source_stats[source_name] = { |
|
|
"success_count": 0, |
|
|
"failure_count": 0, |
|
|
"total_response_time": 0, |
|
|
"last_success": None, |
|
|
"last_failure": None |
|
|
} |
|
|
|
|
|
stats = self._source_stats[source_name] |
|
|
stats["failure_count"] += 1 |
|
|
stats["last_failure"] = time.time() |
|
|
stats["last_error"] = error_type |
|
|
stats["last_status_code"] = status_code |
|
|
|
|
|
|
|
|
if status_code == 429: |
|
|
|
|
|
self._source_status[source_name] = SourceStatus.RATE_LIMITED |
|
|
self._unavailable_until[source_name] = time.time() + 3600 |
|
|
logger.warning(f"β οΈ {source_name}: RATE LIMITED (unavailable for 60 min)") |
|
|
|
|
|
elif status_code in [500, 502, 503, 504]: |
|
|
|
|
|
self._source_status[source_name] = SourceStatus.TEMPORARILY_DOWN |
|
|
self._unavailable_until[source_name] = time.time() + 300 |
|
|
logger.warning(f"β οΈ {source_name}: TEMPORARILY DOWN (unavailable for 5 min)") |
|
|
|
|
|
elif status_code in [401, 403]: |
|
|
|
|
|
self._source_status[source_name] = SourceStatus.TEMPORARILY_DOWN |
|
|
self._unavailable_until[source_name] = time.time() + 86400 |
|
|
logger.error(f"β {source_name}: AUTH FAILED (unavailable for 24 hours)") |
|
|
|
|
|
else: |
|
|
logger.warning(f"β οΈ {source_name}: Failed ({error_type})") |
|
|
|
|
|
def is_available(self, source_name: str) -> bool: |
|
|
"""Check if source is available""" |
|
|
if source_name in self._unavailable_until: |
|
|
if time.time() < self._unavailable_until[source_name]: |
|
|
return False |
|
|
else: |
|
|
|
|
|
del self._unavailable_until[source_name] |
|
|
self._source_status[source_name] = SourceStatus.AVAILABLE |
|
|
|
|
|
return True |
|
|
|
|
|
def get_stats(self, source_name: str) -> Dict[str, Any]: |
|
|
"""Get source statistics""" |
|
|
if source_name not in self._source_stats: |
|
|
return {} |
|
|
|
|
|
stats = self._source_stats[source_name] |
|
|
total_requests = stats["success_count"] + stats["failure_count"] |
|
|
|
|
|
return { |
|
|
"total_requests": total_requests, |
|
|
"success_count": stats["success_count"], |
|
|
"failure_count": stats["failure_count"], |
|
|
"success_rate": stats["success_count"] / total_requests if total_requests > 0 else 0, |
|
|
"avg_response_time": stats["total_response_time"] / stats["success_count"] if stats["success_count"] > 0 else 0, |
|
|
"last_success": stats.get("last_success"), |
|
|
"last_failure": stats.get("last_failure"), |
|
|
"status": self._source_status.get(source_name, SourceStatus.AVAILABLE).value |
|
|
} |
|
|
|
|
|
def get_all_stats(self) -> Dict[str, Dict[str, Any]]: |
|
|
"""Get all source statistics""" |
|
|
return {name: self.get_stats(name) for name in self._source_stats.keys()} |
|
|
|
|
|
|
|
|
class MultiSourceFallbackEngine: |
|
|
""" |
|
|
Core engine for multi-source data fetching with automatic failover |
|
|
""" |
|
|
|
|
|
def __init__(self, config_path: Optional[str] = None): |
|
|
"""Initialize the fallback engine""" |
|
|
|
|
|
if config_path is None: |
|
|
config_path = Path(__file__).parent / "multi_source_config.json" |
|
|
|
|
|
with open(config_path, 'r') as f: |
|
|
self.config = json.load(f) |
|
|
|
|
|
|
|
|
self.cache = MultiSourceCache() |
|
|
self.monitor = SourceMonitor() |
|
|
|
|
|
logger.info("β
Multi-Source Fallback Engine initialized") |
|
|
|
|
|
def _get_sources_for_data_type(self, data_type: DataType, **kwargs) -> List[Dict[str, Any]]: |
|
|
"""Get all sources for a data type in priority order""" |
|
|
sources = [] |
|
|
|
|
|
if data_type == DataType.MARKET_PRICES: |
|
|
config = self.config["api_sources"]["market_prices"] |
|
|
sources.extend(config.get("primary", [])) |
|
|
sources.extend(config.get("secondary", [])) |
|
|
sources.extend(config.get("tertiary", [])) |
|
|
|
|
|
elif data_type == DataType.OHLC_CANDLESTICK: |
|
|
config = self.config["api_sources"]["ohlc_candlestick"] |
|
|
sources.extend(config.get("primary", [])) |
|
|
sources.extend(config.get("secondary", [])) |
|
|
|
|
|
sources.extend(config.get("huggingface_datasets", [])) |
|
|
|
|
|
elif data_type == DataType.BLOCKCHAIN_EXPLORER: |
|
|
chain = kwargs.get("chain", "ethereum") |
|
|
config = self.config["api_sources"]["blockchain_explorer"] |
|
|
sources.extend(config.get(chain, [])) |
|
|
|
|
|
elif data_type == DataType.NEWS_FEEDS: |
|
|
config = self.config["api_sources"]["news_feeds"] |
|
|
sources.extend(config.get("api_sources", [])) |
|
|
sources.extend(config.get("rss_feeds", [])) |
|
|
|
|
|
elif data_type == DataType.SENTIMENT_DATA: |
|
|
config = self.config["api_sources"]["sentiment_data"] |
|
|
sources.extend(config.get("primary", [])) |
|
|
sources.extend(config.get("social_analytics", [])) |
|
|
|
|
|
elif data_type == DataType.ONCHAIN_ANALYTICS: |
|
|
sources.extend(self.config["api_sources"]["onchain_analytics"]) |
|
|
|
|
|
elif data_type == DataType.WHALE_TRACKING: |
|
|
sources.extend(self.config["api_sources"]["whale_tracking"]) |
|
|
|
|
|
|
|
|
sources.sort(key=lambda x: x.get("priority", 999)) |
|
|
|
|
|
|
|
|
available_sources = [s for s in sources if self.monitor.is_available(s["name"])] |
|
|
|
|
|
logger.info(f"π {data_type.value}: {len(available_sources)}/{len(sources)} sources available") |
|
|
|
|
|
return available_sources |
|
|
|
|
|
async def _fetch_from_source( |
|
|
self, |
|
|
source: Dict[str, Any], |
|
|
fetch_func: Callable, |
|
|
**kwargs |
|
|
) -> Optional[Dict[str, Any]]: |
|
|
"""Fetch data from a single source""" |
|
|
source_name = source["name"] |
|
|
|
|
|
try: |
|
|
start_time = time.time() |
|
|
|
|
|
|
|
|
result = await fetch_func(source, **kwargs) |
|
|
|
|
|
response_time = time.time() - start_time |
|
|
|
|
|
|
|
|
if result and self._validate_result(result): |
|
|
self.monitor.record_success(source_name, response_time) |
|
|
return result |
|
|
else: |
|
|
logger.warning(f"β οΈ {source_name}: Invalid result") |
|
|
self.monitor.record_failure(source_name, "invalid_result") |
|
|
return None |
|
|
|
|
|
except httpx.HTTPStatusError as e: |
|
|
status_code = e.response.status_code |
|
|
logger.warning(f"β οΈ {source_name}: HTTP {status_code}") |
|
|
self.monitor.record_failure(source_name, f"http_{status_code}", status_code) |
|
|
return None |
|
|
|
|
|
except httpx.TimeoutException as e: |
|
|
logger.warning(f"β οΈ {source_name}: Timeout") |
|
|
self.monitor.record_failure(source_name, "timeout") |
|
|
return None |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"β {source_name}: {type(e).__name__}: {str(e)}") |
|
|
self.monitor.record_failure(source_name, type(e).__name__) |
|
|
return None |
|
|
|
|
|
def _validate_result(self, result: Any) -> bool: |
|
|
"""Validate result data""" |
|
|
if not result: |
|
|
return False |
|
|
|
|
|
|
|
|
if isinstance(result, dict): |
|
|
return True |
|
|
elif isinstance(result, list): |
|
|
return len(result) > 0 |
|
|
|
|
|
return False |
|
|
|
|
|
async def fetch_with_fallback( |
|
|
self, |
|
|
data_type: DataType, |
|
|
fetch_func: Callable, |
|
|
cache_key: str, |
|
|
**kwargs |
|
|
) -> Dict[str, Any]: |
|
|
""" |
|
|
Fetch data with automatic fallback through multiple sources |
|
|
|
|
|
Args: |
|
|
data_type: Type of data to fetch |
|
|
fetch_func: Async function to fetch from a source |
|
|
cache_key: Unique cache key |
|
|
**kwargs: Additional parameters for fetch function |
|
|
|
|
|
Returns: |
|
|
Data from successful source or cache |
|
|
""" |
|
|
|
|
|
cached = self.cache.get(cache_key) |
|
|
if cached: |
|
|
return { |
|
|
"success": True, |
|
|
"data": cached, |
|
|
"source": "cache", |
|
|
"cached": True, |
|
|
"timestamp": datetime.utcnow().isoformat() |
|
|
} |
|
|
|
|
|
|
|
|
sources = self._get_sources_for_data_type(data_type, **kwargs) |
|
|
|
|
|
if not sources: |
|
|
logger.error(f"β No sources available for {data_type.value}") |
|
|
|
|
|
return self._emergency_fallback(cache_key, data_type) |
|
|
|
|
|
|
|
|
attempts = 0 |
|
|
for source in sources: |
|
|
attempts += 1 |
|
|
source_name = source["name"] |
|
|
|
|
|
logger.info(f"π Attempt {attempts}/{len(sources)}: Trying {source_name}") |
|
|
|
|
|
result = await self._fetch_from_source(source, fetch_func, **kwargs) |
|
|
|
|
|
if result: |
|
|
|
|
|
cache_ttl = self.config["caching"].get(data_type.value, {}).get("ttl_seconds", 60) |
|
|
self.cache.set(cache_key, result, cache_ttl) |
|
|
|
|
|
logger.info(f"β
SUCCESS: {source_name} (attempt {attempts}/{len(sources)})") |
|
|
|
|
|
return { |
|
|
"success": True, |
|
|
"data": result, |
|
|
"source": source_name, |
|
|
"cached": False, |
|
|
"attempts": attempts, |
|
|
"total_sources": len(sources), |
|
|
"timestamp": datetime.utcnow().isoformat() |
|
|
} |
|
|
|
|
|
|
|
|
logger.error(f"β All {len(sources)} sources failed for {data_type.value}") |
|
|
return self._emergency_fallback(cache_key, data_type) |
|
|
|
|
|
def _emergency_fallback(self, cache_key: str, data_type: DataType) -> Dict[str, Any]: |
|
|
"""Emergency fallback when all sources fail""" |
|
|
|
|
|
max_age = self.config["caching"].get(data_type.value, {}).get("max_age_seconds", 3600) |
|
|
stale_data = self.cache.get_stale(cache_key, max_age) |
|
|
|
|
|
if stale_data: |
|
|
logger.warning(f"β οΈ EMERGENCY FALLBACK: Using stale cache for {cache_key}") |
|
|
return { |
|
|
"success": True, |
|
|
"data": stale_data, |
|
|
"source": "stale_cache", |
|
|
"cached": True, |
|
|
"stale": True, |
|
|
"warning": "Data may be outdated", |
|
|
"timestamp": datetime.utcnow().isoformat() |
|
|
} |
|
|
|
|
|
|
|
|
logger.error(f"β COMPLETE FAILURE: No data available for {cache_key}") |
|
|
return { |
|
|
"success": False, |
|
|
"error": "All sources failed and no cached data available", |
|
|
"data_type": data_type.value, |
|
|
"timestamp": datetime.utcnow().isoformat() |
|
|
} |
|
|
|
|
|
async def fetch_parallel( |
|
|
self, |
|
|
data_type: DataType, |
|
|
fetch_func: Callable, |
|
|
cache_key: str, |
|
|
max_parallel: int = 3, |
|
|
**kwargs |
|
|
) -> Dict[str, Any]: |
|
|
""" |
|
|
Fetch from multiple sources in parallel and return first successful result |
|
|
|
|
|
Args: |
|
|
data_type: Type of data to fetch |
|
|
fetch_func: Async function to fetch from a source |
|
|
cache_key: Unique cache key |
|
|
max_parallel: Maximum number of parallel requests |
|
|
**kwargs: Additional parameters for fetch function |
|
|
|
|
|
Returns: |
|
|
Data from first successful source |
|
|
""" |
|
|
|
|
|
cached = self.cache.get(cache_key) |
|
|
if cached: |
|
|
return { |
|
|
"success": True, |
|
|
"data": cached, |
|
|
"source": "cache", |
|
|
"cached": True, |
|
|
"timestamp": datetime.utcnow().isoformat() |
|
|
} |
|
|
|
|
|
|
|
|
sources = self._get_sources_for_data_type(data_type, **kwargs)[:max_parallel] |
|
|
|
|
|
if not sources: |
|
|
return self._emergency_fallback(cache_key, data_type) |
|
|
|
|
|
logger.info(f"π Parallel fetch from {len(sources)} sources") |
|
|
|
|
|
|
|
|
tasks = [ |
|
|
self._fetch_from_source(source, fetch_func, **kwargs) |
|
|
for source in sources |
|
|
] |
|
|
|
|
|
|
|
|
for completed in asyncio.as_completed(tasks): |
|
|
try: |
|
|
result = await completed |
|
|
if result: |
|
|
|
|
|
cache_ttl = self.config["caching"].get(data_type.value, {}).get("ttl_seconds", 60) |
|
|
self.cache.set(cache_key, result, cache_ttl) |
|
|
|
|
|
logger.info(f"β
PARALLEL SUCCESS: Got first result") |
|
|
|
|
|
return { |
|
|
"success": True, |
|
|
"data": result, |
|
|
"source": "parallel_fetch", |
|
|
"cached": False, |
|
|
"timestamp": datetime.utcnow().isoformat() |
|
|
} |
|
|
except: |
|
|
continue |
|
|
|
|
|
|
|
|
logger.error(f"β All parallel requests failed") |
|
|
return self._emergency_fallback(cache_key, data_type) |
|
|
|
|
|
def get_monitoring_stats(self) -> Dict[str, Any]: |
|
|
"""Get monitoring statistics for all sources""" |
|
|
return { |
|
|
"sources": self.monitor.get_all_stats(), |
|
|
"timestamp": datetime.utcnow().isoformat() |
|
|
} |
|
|
|
|
|
def clear_cache(self): |
|
|
"""Clear all cached data""" |
|
|
self.cache.clear() |
|
|
logger.info("ποΈ Cache cleared") |
|
|
|
|
|
|
|
|
|
|
|
_engine_instance: Optional[MultiSourceFallbackEngine] = None |
|
|
|
|
|
|
|
|
def get_fallback_engine() -> MultiSourceFallbackEngine: |
|
|
"""Get or create global fallback engine instance""" |
|
|
global _engine_instance |
|
|
if _engine_instance is None: |
|
|
_engine_instance = MultiSourceFallbackEngine() |
|
|
return _engine_instance |
|
|
|
|
|
|
|
|
__all__ = [ |
|
|
"MultiSourceFallbackEngine", |
|
|
"DataType", |
|
|
"SourceStatus", |
|
|
"get_fallback_engine" |
|
|
] |
|
|
|