Spaces:
Running
Running
import json | |
import os | |
import datetime | |
import threading | |
from collections import defaultdict | |
from typing import List, Dict, Any, Optional | |
from fastapi import Request | |
class UsageTracker: | |
def __init__(self, data_file="usage_data.json"): | |
self.data_file = data_file | |
self.lock = threading.Lock() | |
self.data = self._load_data() | |
self._schedule_save() | |
def _load_data(self) -> Dict[str, Any]: | |
""" | |
Loads usage data from the JSON file, ensuring data integrity. | |
Handles cases where the file might be corrupted or in an old format. | |
""" | |
if os.path.exists(self.data_file): | |
try: | |
with open(self.data_file, 'r') as f: | |
data = json.load(f) | |
# Check if data is in the expected new format | |
if isinstance(data, dict) and 'requests' in data and 'models' in data and 'api_endpoints' in data: | |
return data | |
# If data is in an older, simpler format, convert it | |
elif isinstance(data, dict) and 'total_requests' in data: # Heuristic for old format | |
return self._convert_old_format(data) | |
except (json.JSONDecodeError, TypeError) as e: | |
print(f"Warning: Could not decode JSON from {self.data_file} ({e}). Starting fresh.") | |
return self._initialize_empty_data() | |
def _initialize_empty_data(self) -> Dict[str, Any]: | |
""" | |
Initializes a new, empty data structure for usage tracking. | |
This structure includes a list for all requests, and dictionaries | |
to store aggregated data for models and API endpoints. | |
""" | |
return { | |
'requests': [], | |
'models': defaultdict(lambda: {'total_requests': 0, 'first_used': None, 'last_used': None}), | |
'api_endpoints': defaultdict(lambda: {'total_requests': 0, 'first_used': None, 'last_used': None}) | |
} | |
def _convert_old_format(self, old_data: Dict[str, Any]) -> Dict[str, Any]: | |
""" | |
Converts data from the old format to the new detailed format. | |
This is a crucial step to avoid data loss on updates. | |
It iterates through old 'requests' (if any) and re-records them | |
into the new structured format. | |
""" | |
print("Converting old usage data format to new format.") | |
new_data = self._initialize_empty_data() | |
# Preserve existing requests if they follow a basic structure | |
if 'requests' in old_data and isinstance(old_data['requests'], list): | |
for req in old_data['requests']: | |
# Attempt to extract relevant fields from old request entry | |
timestamp_str = req.get('timestamp') | |
model_name = req.get('model', 'unknown_model') | |
endpoint_name = req.get('endpoint', 'unknown_endpoint') | |
ip_address = req.get('ip_address', 'N/A') | |
user_agent = req.get('user_agent', 'N/A') | |
# Ensure timestamp is valid and parseable | |
try: | |
timestamp = datetime.datetime.fromisoformat(timestamp_str) if timestamp_str else datetime.datetime.now(datetime.timezone.utc) | |
except ValueError: | |
timestamp = datetime.datetime.now(datetime.timezone.utc) # Fallback if timestamp is malformed | |
new_data['requests'].append({ | |
'timestamp': timestamp.isoformat(), | |
'model': model_name, | |
'endpoint': endpoint_name, | |
'ip_address': ip_address, | |
'user_agent': user_agent, | |
}) | |
# Update aggregated stats for models and endpoints | |
# This ensures that even old data contributes to the new summary | |
if not new_data['models'][model_name]['first_used'] or timestamp < datetime.datetime.fromisoformat(new_data['models'][model_name]['first_used']): | |
new_data['models'][model_name]['first_used'] = timestamp.isoformat() | |
if not new_data['models'][model_name]['last_used'] or timestamp > datetime.datetime.fromisoformat(new_data['models'][model_name]['last_used']): | |
new_data['models'][model_name]['last_used'] = timestamp.isoformat() | |
new_data['models'][model_name]['total_requests'] += 1 | |
if not new_data['api_endpoints'][endpoint_name]['first_used'] or timestamp < datetime.datetime.fromisoformat(new_data['api_endpoints'][endpoint_name]['first_used']): | |
new_data['api_endpoints'][endpoint_name]['first_used'] = timestamp.isoformat() | |
if not new_data['api_endpoints'][endpoint_name]['last_used'] or timestamp > datetime.datetime.fromisoformat(new_data['api_endpoints'][endpoint_name]['last_used']): | |
new_data['api_endpoints'][endpoint_name]['last_used'] = timestamp.isoformat() | |
new_data['api_endpoints'][endpoint_name]['total_requests'] += 1 | |
print("Data conversion complete.") | |
return new_data | |
def save_data(self): | |
"""Saves current usage data to the JSON file periodically.""" | |
with self.lock: | |
try: | |
# Convert defaultdicts to regular dicts for JSON serialization | |
serializable_data = { | |
'requests': self.data['requests'], | |
'models': dict(self.data['models']), | |
'api_endpoints': dict(self.data['api_endpoints']) | |
} | |
with open(self.data_file, 'w') as f: | |
json.dump(serializable_data, f, indent=4) | |
except IOError as e: | |
print(f"Error saving usage data to {self.data_file}: {e}") | |
def _schedule_save(self): | |
"""Schedules the data to be saved every 60 seconds.""" | |
# Use a non-daemon thread for saving to ensure it runs even if main thread exits | |
# if using daemon threads, ensure proper shutdown hook is in place. | |
# For simplicity in this context, a direct Timer call is fine. | |
threading.Timer(60.0, self._schedule_save).start() | |
self.save_data() | |
def record_request(self, request: Optional[Request] = None, model: str = "unknown", endpoint: str = "unknown"): | |
""" | |
Records a single API request with detailed information. | |
Updates both the raw request list and aggregated statistics. | |
""" | |
with self.lock: | |
now = datetime.datetime.now(datetime.timezone.utc) | |
ip_address = request.client.host if request and request.client else "N/A" | |
user_agent = request.headers.get("user-agent", "N/A") if request else "N/A" | |
# Append to raw requests list | |
self.data['requests'].append({ | |
'timestamp': now.isoformat(), | |
'model': model, | |
'endpoint': endpoint, | |
'ip_address': ip_address, | |
'user_agent': user_agent, | |
}) | |
# Update model specific stats | |
model_stats = self.data['models'][model] | |
model_stats['total_requests'] += 1 | |
if model_stats['first_used'] is None or now < datetime.datetime.fromisoformat(model_stats['first_used']): | |
model_stats['first_used'] = now.isoformat() | |
if model_stats['last_used'] is None or now > datetime.datetime.fromisoformat(model_stats['last_used']): | |
model_stats['last_used'] = now.isoformat() | |
# Update endpoint specific stats | |
endpoint_stats = self.data['api_endpoints'][endpoint] | |
endpoint_stats['total_requests'] += 1 | |
if endpoint_stats['first_used'] is None or now < datetime.datetime.fromisoformat(endpoint_stats['first_used']): | |
endpoint_stats['first_used'] = now.isoformat() | |
if endpoint_stats['last_used'] is None or now > datetime.datetime.fromisoformat(endpoint_stats['last_used']): | |
endpoint_stats['last_used'] = now.isoformat() | |
def get_usage_summary(self, days: int = 7) -> Dict[str, Any]: | |
""" | |
Generates a comprehensive summary of usage data for the specified number of days. | |
Includes total requests, model usage, endpoint usage, daily usage, and unique IPs. | |
""" | |
with self.lock: | |
summary = { | |
'total_requests': 0, | |
'model_usage': defaultdict(int), # Requests per model for the period | |
'endpoint_usage': defaultdict(int), # Requests per endpoint for the period | |
'daily_usage': defaultdict(lambda: {'requests': 0, 'unique_ips': set()}), # Daily stats | |
'unique_ips_total': set(), # Unique IPs across all requests | |
'recent_requests': [] | |
} | |
# Prepare data for model and API endpoint charts | |
# These are based on the aggregated 'self.data' which covers all time, | |
# but the summary 'model_usage' and 'endpoint_usage' below are for the given 'days' period. | |
all_time_model_data = { | |
model: { | |
'total_requests': stats['total_requests'], | |
'first_used': stats['first_used'], | |
'last_used': stats['last_used'] | |
} for model, stats in self.data['models'].items() | |
} | |
all_time_endpoint_data = { | |
endpoint: { | |
'total_requests': stats['total_requests'], | |
'first_used': stats['first_used'], | |
'last_used': stats['last_used'] | |
} for endpoint, stats in self.data['api_endpoints'].items() | |
} | |
cutoff_date = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=days) | |
# Iterate backwards for recent requests and aggregate data for the specified period | |
requests_for_period = [] | |
for req in reversed(self.data['requests']): | |
req_time = datetime.datetime.fromisoformat(req['timestamp']) | |
# Always update total requests and unique IPs for all time | |
summary['total_requests'] += 1 | |
summary['unique_ips_total'].add(req['ip_address']) | |
if req_time >= cutoff_date: | |
requests_for_period.append(req) | |
date_str = req_time.strftime("%Y-%m-%d") | |
# Aggregate data for charts and tables for the given period | |
summary['model_usage'][req['model']] += 1 | |
summary['endpoint_usage'][req['endpoint']] += 1 | |
summary['daily_usage'][date_str]['requests'] += 1 | |
summary['daily_usage'][date_str]['unique_ips'].add(req['ip_address']) | |
# Add to recent requests list (up to 20) | |
if len(summary['recent_requests']) < 20: | |
summary['recent_requests'].append(req) | |
# Convert daily unique IPs set to count | |
for date_str, daily_stats in summary['daily_usage'].items(): | |
daily_stats['unique_ips_count'] = len(daily_stats['unique_ips']) | |
del daily_stats['unique_ips'] # Remove the set before returning | |
# Sort daily usage by date | |
summary['daily_usage'] = dict(sorted(summary['daily_usage'].items())) | |
# Convert defaultdicts to regular dicts for final summary | |
summary['model_usage_period'] = dict(summary['model_usage']) | |
summary['endpoint_usage_period'] = dict(summary['endpoint_usage']) | |
summary['daily_usage_period'] = dict(summary['daily_usage']) | |
# Add all-time data | |
summary['all_time_model_usage'] = all_time_model_data | |
summary['all_time_endpoint_usage'] = all_time_endpoint_data | |
summary['unique_ips_total_count'] = len(summary['unique_ips_total']) | |
del summary['unique_ips_total'] # No need to send the whole set | |
# Clean up defaultdicts that are not needed in the final output structure | |
del summary['model_usage'] | |
del summary['endpoint_usage'] | |
del summary['daily_usage'] | |
return summary | |