|
|
|
|
|
""" |
|
|
News Aggregator - Uses ALL Free News Resources |
|
|
Maximizes usage of all available free crypto news sources |
|
|
""" |
|
|
|
|
|
import httpx |
|
|
import logging |
|
|
import feedparser |
|
|
import asyncio |
|
|
from typing import Dict, Any, List, Optional |
|
|
from datetime import datetime |
|
|
from fastapi import HTTPException |
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
|
class NewsAggregator: |
|
|
""" |
|
|
Aggregates news from ALL free sources: |
|
|
- CryptoPanic |
|
|
- CoinStats |
|
|
- CoinTelegraph RSS |
|
|
- CoinDesk RSS |
|
|
- Decrypt RSS |
|
|
- Bitcoin Magazine RSS |
|
|
- CryptoSlate |
|
|
- The Block |
|
|
- CoinDesk API |
|
|
- CoinTelegraph API |
|
|
""" |
|
|
|
|
|
def __init__(self): |
|
|
self.timeout = 10.0 |
|
|
self.providers = { |
|
|
"cryptopanic": { |
|
|
"base_url": "https://cryptopanic.com/api/v1", |
|
|
"type": "api", |
|
|
"priority": 1, |
|
|
"free": True |
|
|
}, |
|
|
"coinstats": { |
|
|
"base_url": "https://api.coinstats.app/public/v1", |
|
|
"type": "api", |
|
|
"priority": 2, |
|
|
"free": True |
|
|
}, |
|
|
"cointelegraph_rss": { |
|
|
"base_url": "https://cointelegraph.com/rss", |
|
|
"type": "rss", |
|
|
"priority": 3, |
|
|
"free": True |
|
|
}, |
|
|
"coindesk_rss": { |
|
|
"base_url": "https://www.coindesk.com/arc/outboundfeeds/rss/", |
|
|
"type": "rss", |
|
|
"priority": 4, |
|
|
"free": True |
|
|
}, |
|
|
"decrypt_rss": { |
|
|
"base_url": "https://decrypt.co/feed", |
|
|
"type": "rss", |
|
|
"priority": 5, |
|
|
"free": True |
|
|
}, |
|
|
"bitcoinmagazine_rss": { |
|
|
"base_url": "https://bitcoinmagazine.com/.rss/full/", |
|
|
"type": "rss", |
|
|
"priority": 6, |
|
|
"free": True |
|
|
}, |
|
|
"cryptoslate": { |
|
|
"base_url": "https://cryptoslate.com/feed/", |
|
|
"type": "rss", |
|
|
"priority": 7, |
|
|
"free": True |
|
|
} |
|
|
} |
|
|
|
|
|
async def get_news( |
|
|
self, |
|
|
symbol: Optional[str] = None, |
|
|
limit: int = 20 |
|
|
) -> List[Dict[str, Any]]: |
|
|
""" |
|
|
Get news from ALL available free providers with fallback |
|
|
""" |
|
|
all_news = [] |
|
|
|
|
|
|
|
|
tasks = [] |
|
|
for provider_name, provider_info in self.providers.items(): |
|
|
task = self._fetch_from_provider(provider_name, provider_info, symbol, limit) |
|
|
tasks.append(task) |
|
|
|
|
|
results = await asyncio.gather(*tasks, return_exceptions=True) |
|
|
|
|
|
|
|
|
for provider_name, result in zip(self.providers.keys(), results): |
|
|
if isinstance(result, Exception): |
|
|
logger.warning(f"⚠️ {provider_name.upper()} failed: {result}") |
|
|
continue |
|
|
|
|
|
if result: |
|
|
all_news.extend(result) |
|
|
logger.info(f"✅ {provider_name.upper()}: Fetched {len(result)} articles") |
|
|
|
|
|
if not all_news: |
|
|
raise HTTPException( |
|
|
status_code=503, |
|
|
detail="All news providers failed" |
|
|
) |
|
|
|
|
|
|
|
|
all_news.sort(key=lambda x: x.get("timestamp", 0), reverse=True) |
|
|
|
|
|
|
|
|
seen_titles = set() |
|
|
unique_news = [] |
|
|
for article in all_news: |
|
|
title_lower = article.get("title", "").lower() |
|
|
if title_lower not in seen_titles: |
|
|
seen_titles.add(title_lower) |
|
|
unique_news.append(article) |
|
|
|
|
|
return unique_news[:limit] |
|
|
|
|
|
async def _fetch_from_provider( |
|
|
self, |
|
|
provider_name: str, |
|
|
provider_info: Dict[str, Any], |
|
|
symbol: Optional[str], |
|
|
limit: int |
|
|
) -> List[Dict[str, Any]]: |
|
|
"""Fetch news from a specific provider""" |
|
|
try: |
|
|
if provider_info["type"] == "api": |
|
|
if provider_name == "cryptopanic": |
|
|
return await self._get_news_cryptopanic(symbol, limit) |
|
|
elif provider_name == "coinstats": |
|
|
return await self._get_news_coinstats(limit) |
|
|
|
|
|
elif provider_info["type"] == "rss": |
|
|
return await self._get_news_rss( |
|
|
provider_name, |
|
|
provider_info["base_url"], |
|
|
limit |
|
|
) |
|
|
|
|
|
return [] |
|
|
|
|
|
except Exception as e: |
|
|
logger.warning(f"⚠️ {provider_name} failed: {e}") |
|
|
return [] |
|
|
|
|
|
async def _get_news_cryptopanic(self, symbol: Optional[str], limit: int) -> List[Dict[str, Any]]: |
|
|
"""Get news from CryptoPanic (free, no API key required)""" |
|
|
async with httpx.AsyncClient(timeout=self.timeout) as client: |
|
|
params = {"public": "true"} |
|
|
if symbol: |
|
|
params["currencies"] = symbol.upper() |
|
|
|
|
|
response = await client.get( |
|
|
f"{self.providers['cryptopanic']['base_url']}/posts/", |
|
|
params=params |
|
|
) |
|
|
response.raise_for_status() |
|
|
data = response.json() |
|
|
|
|
|
news = [] |
|
|
for post in data.get("results", [])[:limit]: |
|
|
news.append({ |
|
|
"title": post.get("title", ""), |
|
|
"summary": post.get("title", ""), |
|
|
"url": post.get("url", ""), |
|
|
"source": post.get("source", {}).get("title", "CryptoPanic"), |
|
|
"published_at": post.get("published_at", ""), |
|
|
"timestamp": self._parse_timestamp(post.get("published_at", "")), |
|
|
"sentiment": post.get("votes", {}).get("positive", 0) - post.get("votes", {}).get("negative", 0), |
|
|
"provider": "cryptopanic" |
|
|
}) |
|
|
|
|
|
return news |
|
|
|
|
|
async def _get_news_coinstats(self, limit: int) -> List[Dict[str, Any]]: |
|
|
"""Get news from CoinStats""" |
|
|
async with httpx.AsyncClient(timeout=self.timeout) as client: |
|
|
response = await client.get( |
|
|
f"{self.providers['coinstats']['base_url']}/news" |
|
|
) |
|
|
response.raise_for_status() |
|
|
data = response.json() |
|
|
|
|
|
news = [] |
|
|
for article in data.get("news", [])[:limit]: |
|
|
news.append({ |
|
|
"title": article.get("title", ""), |
|
|
"summary": article.get("description", ""), |
|
|
"url": article.get("link", ""), |
|
|
"source": article.get("source", "CoinStats"), |
|
|
"published_at": article.get("feedDate", ""), |
|
|
"timestamp": article.get("feedDate", 0) * 1000 if article.get("feedDate") else 0, |
|
|
"image_url": article.get("imgURL", ""), |
|
|
"provider": "coinstats" |
|
|
}) |
|
|
|
|
|
return news |
|
|
|
|
|
async def _get_news_rss(self, provider_name: str, rss_url: str, limit: int) -> List[Dict[str, Any]]: |
|
|
"""Get news from RSS feed""" |
|
|
async with httpx.AsyncClient(timeout=self.timeout) as client: |
|
|
response = await client.get(rss_url) |
|
|
response.raise_for_status() |
|
|
|
|
|
|
|
|
feed = feedparser.parse(response.text) |
|
|
|
|
|
news = [] |
|
|
for entry in feed.entries[:limit]: |
|
|
news.append({ |
|
|
"title": entry.get("title", ""), |
|
|
"summary": entry.get("summary", "") or entry.get("description", ""), |
|
|
"url": entry.get("link", ""), |
|
|
"source": provider_name.replace("_rss", "").title(), |
|
|
"published_at": entry.get("published", ""), |
|
|
"timestamp": self._parse_timestamp(entry.get("published", "")), |
|
|
"provider": provider_name |
|
|
}) |
|
|
|
|
|
return news |
|
|
|
|
|
def _parse_timestamp(self, date_str: str) -> int: |
|
|
"""Parse various date formats to Unix timestamp (milliseconds)""" |
|
|
if not date_str: |
|
|
return int(datetime.utcnow().timestamp() * 1000) |
|
|
|
|
|
try: |
|
|
|
|
|
dt = datetime.fromisoformat(date_str.replace("Z", "+00:00")) |
|
|
return int(dt.timestamp() * 1000) |
|
|
except: |
|
|
pass |
|
|
|
|
|
try: |
|
|
|
|
|
from email.utils import parsedate_to_datetime |
|
|
dt = parsedate_to_datetime(date_str) |
|
|
return int(dt.timestamp() * 1000) |
|
|
except: |
|
|
pass |
|
|
|
|
|
|
|
|
return int(datetime.utcnow().timestamp() * 1000) |
|
|
|
|
|
async def get_latest_news(self, limit: int = 10) -> List[Dict[str, Any]]: |
|
|
"""Get latest news from all sources""" |
|
|
return await self.get_news(symbol=None, limit=limit) |
|
|
|
|
|
async def get_symbol_news(self, symbol: str, limit: int = 10) -> List[Dict[str, Any]]: |
|
|
"""Get news for a specific symbol""" |
|
|
return await self.get_news(symbol=symbol, limit=limit) |
|
|
|
|
|
|
|
|
|
|
|
news_aggregator = NewsAggregator() |
|
|
|
|
|
__all__ = ["NewsAggregator", "news_aggregator"] |
|
|
|
|
|
|