|
from typing import Dict, List, Tuple, Optional |
|
import json |
|
import sys |
|
import glob |
|
from pathlib import Path |
|
from collections import defaultdict |
|
|
|
|
|
def get_latest_log() -> str: |
|
"""Find the most recently modified log file in the current directory. |
|
|
|
Returns: |
|
str: Path to the most recently modified log file |
|
|
|
Raises: |
|
SystemExit: If no log files are found in current directory |
|
""" |
|
log_pattern = "api_usage_*.json" |
|
logs = list(Path(".").glob(log_pattern)) |
|
if not logs: |
|
print(f"No files matching pattern '{log_pattern}' found in current directory") |
|
sys.exit(1) |
|
return str(max(logs, key=lambda p: p.stat().st_mtime)) |
|
|
|
|
|
def analyze_log_file(filename: str) -> Tuple[List[Dict], List[Dict], Dict[str, List[str]]]: |
|
"""Analyze a log file for entries missing images and errors. |
|
|
|
Args: |
|
filename: Path to the log file to analyze |
|
|
|
Returns: |
|
Tuple containing: |
|
- List of entries with no images |
|
- List of skipped/error entries |
|
- Dict of processing errors by type |
|
|
|
Raises: |
|
SystemExit: If file cannot be found or read |
|
""" |
|
no_images = [] |
|
errors = defaultdict(list) |
|
skipped = [] |
|
|
|
try: |
|
with open(filename, "r") as f: |
|
for line_num, line in enumerate(f, 1): |
|
|
|
if line.startswith("HTTP Request:") or line.strip() == "": |
|
continue |
|
try: |
|
|
|
if not line.strip().startswith("{"): |
|
continue |
|
entry = json.loads(line.strip()) |
|
case_id = entry.get("case_id") |
|
question_id = entry.get("question_id") |
|
|
|
|
|
if not case_id or not question_id: |
|
continue |
|
|
|
|
|
if entry.get("status") in ["skipped", "error"]: |
|
skipped.append( |
|
{ |
|
"case_id": case_id, |
|
"question_id": question_id, |
|
"reason": entry.get("reason"), |
|
"status": entry.get("status"), |
|
} |
|
) |
|
continue |
|
|
|
|
|
messages = entry.get("input", {}).get("messages", []) |
|
has_image = False |
|
for msg in messages: |
|
content = msg.get("content", []) |
|
if isinstance(content, list): |
|
for item in content: |
|
if isinstance(item, dict) and item.get("type") == "image_url": |
|
has_image = True |
|
break |
|
if not has_image: |
|
no_images.append( |
|
{ |
|
"case_id": case_id, |
|
"question_id": question_id, |
|
"question": entry.get("input", {}) |
|
.get("question_data", {}) |
|
.get("question", "")[:100] |
|
+ "...", |
|
} |
|
) |
|
except json.JSONDecodeError: |
|
errors["json_decode"].append(f"Line {line_num}: Invalid JSON") |
|
continue |
|
except Exception as e: |
|
errors["other"].append(f"Line {line_num}: Error processing entry: {str(e)}") |
|
except FileNotFoundError: |
|
print(f"Error: Could not find log file: {filename}") |
|
sys.exit(1) |
|
except Exception as e: |
|
print(f"Error reading file {filename}: {str(e)}") |
|
sys.exit(1) |
|
|
|
return no_images, skipped, errors |
|
|
|
|
|
def print_results( |
|
filename: str, no_images: List[Dict], skipped: List[Dict], errors: Dict[str, List[str]] |
|
) -> None: |
|
"""Print analysis results. |
|
|
|
Args: |
|
filename: Name of the analyzed log file |
|
no_images: List of entries with no images |
|
skipped: List of skipped/error entries |
|
errors: Dict of processing errors by type |
|
""" |
|
print(f"\nAnalyzing log file: {filename}") |
|
print("\n=== Questions with No Images ===") |
|
if no_images: |
|
for entry in no_images: |
|
print(f"\nCase ID: {entry['case_id']}") |
|
print(f"Question ID: {entry['question_id']}") |
|
print(f"Question Preview: {entry['question']}") |
|
print(f"\nTotal questions without images: {len(no_images)}") |
|
|
|
print("\n=== Skipped/Error Questions ===") |
|
if skipped: |
|
for entry in skipped: |
|
print(f"\nCase ID: {entry['case_id']}") |
|
print(f"Question ID: {entry['question_id']}") |
|
print(f"Status: {entry['status']}") |
|
print(f"Reason: {entry.get('reason', 'unknown')}") |
|
print(f"\nTotal skipped/error questions: {len(skipped)}") |
|
|
|
if errors: |
|
print("\n=== Processing Errors ===") |
|
for error_type, messages in errors.items(): |
|
if messages: |
|
print(f"\n{error_type}:") |
|
for msg in messages: |
|
print(f" {msg}") |
|
|
|
|
|
def main() -> None: |
|
"""Main entry point for log validation script.""" |
|
|
|
if len(sys.argv) > 1: |
|
log_file = sys.argv[1] |
|
else: |
|
log_file = get_latest_log() |
|
|
|
no_images, skipped, errors = analyze_log_file(log_file) |
|
print_results(log_file, no_images, skipped, errors) |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
|
|