Datasourceforcryptocurrency-2 / backend /services /huggingface_inference_client.py
Your Name
feat: UI improvements and error suppression - Enhanced dashboard and market pages with improved header buttons, logo, and currency symbol display - Stopped animated ticker - Removed pie chart legends - Added error suppressor for external service errors (SSE, Permissions-Policy warnings) - Improved header button prominence and icon appearance - Enhanced logo with glow effects and better design - Fixed currency symbol visibility in market tables
8b7b267
#!/usr/bin/env python3
"""
Hugging Face Inference API Client - REAL DATA ONLY
Uses real Hugging Face models for sentiment analysis
NO MOCK DATA - All predictions from real HF models
"""
import httpx
import logging
import os
from typing import Dict, Any, Optional
from datetime import datetime
from fastapi import HTTPException
logger = logging.getLogger(__name__)
class HuggingFaceInferenceClient:
"""
Real Hugging Face Inference API Client
Primary source for real sentiment analysis using NLP models
"""
def __init__(self):
# Strip whitespace from token to avoid "Illegal header value" errors
self.api_token = (os.getenv("HF_API_TOKEN") or os.getenv("HF_TOKEN") or "").strip()
self.base_url = "https://router.huggingface.co/models"
self.timeout = 30.0 # HF models can take time to load
# Real sentiment analysis models
self.models = {
"sentiment_crypto": "cardiffnlp/twitter-roberta-base-sentiment-latest",
"sentiment_financial": "ProsusAI/finbert",
"sentiment_twitter": "finiteautomata/bertweet-base-sentiment-analysis",
"sentiment_general": "nlptown/bert-base-multilingual-uncased-sentiment"
}
self.headers = {
"Content-Type": "application/json"
}
if self.api_token:
self.headers["Authorization"] = f"Bearer {self.api_token}"
def _normalize_sentiment_label(self, label: str, score: float) -> tuple[str, str]:
"""
Normalize different model label formats to standard format
Returns:
(normalized_label, sentiment_text)
"""
label_upper = label.upper()
# Map various label formats
if label_upper in ["POSITIVE", "LABEL_2", "5 STARS", "POS"]:
return ("POSITIVE", "positive")
elif label_upper in ["NEGATIVE", "LABEL_0", "1 STAR", "NEG"]:
return ("NEGATIVE", "negative")
elif label_upper in ["NEUTRAL", "LABEL_1", "3 STARS", "NEU"]:
return ("NEUTRAL", "neutral")
# For star ratings (1-5 stars)
if "STAR" in label_upper:
if "4" in label or "5" in label:
return ("POSITIVE", "positive")
elif "1" in label or "2" in label:
return ("NEGATIVE", "negative")
else:
return ("NEUTRAL", "neutral")
# Default: use score to determine sentiment
if score > 0.6:
return ("POSITIVE", "positive")
elif score < 0.4:
return ("NEGATIVE", "negative")
else:
return ("NEUTRAL", "neutral")
async def analyze_sentiment(
self,
text: str,
model_key: str = "sentiment_crypto"
) -> Dict[str, Any]:
"""
Analyze REAL sentiment using Hugging Face models
Args:
text: Text to analyze
model_key: Model to use (sentiment_crypto, sentiment_financial, etc.)
Returns:
Real sentiment analysis results
"""
try:
# Get model name
model_name = self.models.get(model_key, self.models["sentiment_crypto"])
# Validate input
if not text or len(text.strip()) == 0:
raise HTTPException(
status_code=400,
detail="Missing or invalid text in request body"
)
# Truncate text if too long (max 512 tokens ~ 2000 chars)
if len(text) > 2000:
text = text[:2000]
async with httpx.AsyncClient(timeout=self.timeout) as client:
response = await client.post(
f"{self.base_url}/{model_name}",
headers=self.headers,
json={"inputs": text}
)
# Handle model loading state
if response.status_code == 503:
# Model is loading
try:
error_data = response.json()
estimated_time = error_data.get("estimated_time", 20)
logger.warning(
f"⏳ HuggingFace model {model_name} is loading "
f"(estimated: {estimated_time}s)"
)
return {
"error": "Model is currently loading",
"estimated_time": estimated_time,
"model": model_name,
"timestamp": int(datetime.utcnow().timestamp() * 1000)
}
except:
return {
"error": "Model is currently loading",
"estimated_time": 20,
"model": model_name,
"timestamp": int(datetime.utcnow().timestamp() * 1000)
}
response.raise_for_status()
data = response.json()
# Parse model response
# HF returns: [[{"label": "POSITIVE", "score": 0.95}, ...]]
if isinstance(data, list) and len(data) > 0:
# Get first (or highest score) prediction
if isinstance(data[0], list):
predictions = data[0]
else:
predictions = data
# Get prediction with highest score
best_prediction = max(predictions, key=lambda x: x.get("score", 0))
raw_label = best_prediction.get("label", "NEUTRAL")
raw_score = best_prediction.get("score", 0.5)
# Normalize label
normalized_label, sentiment_text = self._normalize_sentiment_label(
raw_label,
raw_score
)
result = {
"label": normalized_label,
"score": raw_score,
"sentiment": sentiment_text,
"confidence": raw_score,
"text": text[:100] + ("..." if len(text) > 100 else ""),
"model": model_name,
"source": "huggingface",
"timestamp": int(datetime.utcnow().timestamp() * 1000)
}
logger.info(
f"✅ HuggingFace: Sentiment analysis completed "
f"({normalized_label}, confidence: {raw_score:.2f})"
)
return result
else:
# Unexpected response format
logger.error(f"❌ HuggingFace: Unexpected response format: {data}")
raise HTTPException(
status_code=500,
detail="Unexpected response format from model"
)
except httpx.HTTPStatusError as e:
if e.response.status_code == 503:
# Model loading - already handled above
return {
"error": "Model is currently loading",
"estimated_time": 20,
"timestamp": int(datetime.utcnow().timestamp() * 1000)
}
elif e.response.status_code == 400:
logger.error(f"❌ HuggingFace: Bad request: {e}")
raise HTTPException(
status_code=400,
detail="Invalid text or parameters"
)
elif e.response.status_code in (404, 410):
# Endpoint moved or model not available on old host; provide safe fallback
logger.warning("⚠ HuggingFace endpoint returned 404/410; using keyword fallback")
# Simple keyword-based sentiment fallback
text_lower = (text or "").lower()
pos_kw = ["bull", "up", "gain", "profit", "surge", "rally", "strong"]
neg_kw = ["bear", "down", "loss", "drop", "dump", "sell", "weak"]
pos_score = sum(k in text_lower for k in pos_kw)
neg_score = sum(k in text_lower for k in neg_kw)
if pos_score > neg_score:
label, sentiment = ("POSITIVE", "positive")
score = 0.7
elif neg_score > pos_score:
label, sentiment = ("NEGATIVE", "negative")
score = 0.7
else:
label, sentiment = ("NEUTRAL", "neutral")
score = 0.5
return {
"label": label,
"score": score,
"sentiment": sentiment,
"confidence": score,
"text": text[:100] + ("..." if len(text) > 100 else ""),
"model": "fallback-keywords",
"source": "fallback",
"timestamp": int(datetime.utcnow().timestamp() * 1000)
}
else:
logger.error(f"❌ HuggingFace API HTTP error: {e}")
raise HTTPException(
status_code=503,
detail=f"HuggingFace API temporarily unavailable: {str(e)}"
)
except httpx.HTTPError as e:
logger.error(f"❌ HuggingFace API HTTP error: {e}")
raise HTTPException(
status_code=503,
detail=f"HuggingFace API temporarily unavailable: {str(e)}"
)
except HTTPException:
raise
except Exception as e:
logger.error(f"❌ HuggingFace sentiment analysis failed: {e}")
raise HTTPException(
status_code=500,
detail=f"Failed to analyze sentiment: {str(e)}"
)
# Global instance
hf_inference_client = HuggingFaceInferenceClient()
__all__ = ["HuggingFaceInferenceClient", "hf_inference_client"]