#!/usr/bin/env python3 """ Simple Test Script for Scraping and Rating System ================================================= This script tests the basic functionality of the scraping and rating system. Run this to verify that all components are working correctly. """ from app.services.rating_service import RatingService from app.services.scraping_service import ScrapingService, ScrapingStrategy import asyncio import json import sys import os from datetime import datetime # Add the app directory to the Python path sys.path.append(os.path.join(os.path.dirname(__file__), 'app')) async def test_scraping_service(): """Test the scraping service functionality""" print("๐Ÿงช Testing Scraping Service...") try: # Initialize scraping service scraping_service = ScrapingService(db_path=":memory:") print("โœ… Scraping service initialized") # Test URL processing test_urls = [ "https://example.com/test1", "https://example.com/test2" ] # Start a scraping job job_id = await scraping_service.start_scraping_job( urls=test_urls, strategy=ScrapingStrategy.GENERAL, max_depth=1, delay=0.1 ) print(f"โœ… Started scraping job: {job_id}") # Get job status status = await scraping_service.get_job_status(job_id) print(f"โœ… Job status: {status['status']}") # Get statistics stats = await scraping_service.get_scraping_statistics() print(f"โœ… Statistics: {stats}") return True except Exception as e: print(f"โŒ Scraping service test failed: {e}") return False async def test_rating_service(): """Test the rating service functionality""" print("\n๐Ÿงช Testing Rating Service...") try: # Initialize rating service rating_service = RatingService(db_path=":memory:") print("โœ… Rating service initialized") # Create test item data test_item = { 'id': 'test_item_001', 'url': 'https://example.com/legal-doc', 'title': 'Test Legal Document', 'content': 'This is a test legal document with proper structure and legal terms including contract and agreement.', 'metadata': {'content_type': 'text/html'}, 'timestamp': datetime.now().isoformat(), 'source_url': 'https://example.com/legal-doc', 'domain': 'example.com', 'word_count': 20, 'language': 'english', 'strategy_used': 'legal_documents' } # Rate the item result = await rating_service.rate_item(test_item) print( f"โœ… Rated item: {result.rating_level.value} ({result.overall_score:.3f})") print(f" Criteria scores: {result.criteria_scores}") # Get rating summary summary = await rating_service.get_rating_summary() print(f"โœ… Rating summary: {summary['total_rated']} items rated") return True except Exception as e: print(f"โŒ Rating service test failed: {e}") return False async def test_integration(): """Test integration between scraping and rating services""" print("\n๐Ÿงช Testing Integration...") try: # Initialize both services scraping_service = ScrapingService(db_path=":memory:") rating_service = RatingService(db_path=":memory:") print("โœ… Services initialized") # Create mock scraped item mock_item = { 'id': 'integration_test_001', 'url': 'https://court.gov.ir/document', 'title': 'Legal Court Document', 'content': 'This is a legal court document with proper legal structure and terminology.', 'metadata': {'content_type': 'text/html', 'job_id': 'test_job'}, 'timestamp': datetime.now().isoformat(), 'source_url': 'https://court.gov.ir/document', 'domain': 'court.gov.ir', 'word_count': 50, 'language': 'english', 'strategy_used': 'legal_documents' } # Simulate scraping process await scraping_service._store_scraped_item(mock_item) print("โœ… Mock item stored") # Rate the item rating_result = await rating_service.rate_item(mock_item) print( f"โœ… Integration test completed: {rating_result.rating_level.value}") return True except Exception as e: print(f"โŒ Integration test failed: {e}") return False async def test_api_endpoints(): """Test API endpoints (requires running server)""" print("\n๐Ÿงช Testing API Endpoints...") try: import requests base_url = "http://localhost:8000" # Test health endpoint response = requests.get(f"{base_url}/api/health", timeout=5) if response.status_code == 200: print("โœ… Health endpoint working") else: print(f"โš ๏ธ Health endpoint returned {response.status_code}") # Test scraping statistics response = requests.get(f"{base_url}/api/scrape/statistics", timeout=5) if response.status_code == 200: print("โœ… Scraping statistics endpoint working") else: print(f"โš ๏ธ Scraping statistics returned {response.status_code}") # Test rating summary response = requests.get(f"{base_url}/api/rating/summary", timeout=5) if response.status_code == 200: print("โœ… Rating summary endpoint working") else: print(f"โš ๏ธ Rating summary returned {response.status_code}") return True except requests.exceptions.ConnectionError: print("โš ๏ธ Server not running - skipping API tests") return True except Exception as e: print(f"โŒ API test failed: {e}") return False def test_dependencies(): """Test that all required dependencies are available""" print("๐Ÿงช Testing Dependencies...") required_packages = [ 'fastapi', 'aiohttp', 'bs4', 'numpy', 'pydantic', 'sqlite3' ] missing_packages = [] for package in required_packages: try: if package == 'sqlite3': import sqlite3 else: __import__(package) print(f"โœ… {package}") except ImportError: print(f"โŒ {package} - MISSING") missing_packages.append(package) if missing_packages: print(f"\nโš ๏ธ Missing packages: {', '.join(missing_packages)}") print("Install with: pip install " + " ".join([pkg if pkg != 'bs4' else 'beautifulsoup4' for pkg in missing_packages])) return False return True async def main(): """Run all tests""" print("๐Ÿš€ Starting Scraping and Rating System Tests") print("=" * 50) # Test dependencies first if not test_dependencies(): print("\nโŒ Dependency test failed. Please install missing packages.") return # Run all tests tests = [ test_scraping_service(), test_rating_service(), test_integration(), test_api_endpoints() ] results = await asyncio.gather(*tests, return_exceptions=True) # Print summary print("\n" + "=" * 50) print("๐Ÿ“Š Test Results Summary") print("=" * 50) test_names = [ "Scraping Service", "Rating Service", "Integration", "API Endpoints" ] passed = 0 total = len(results) for i, (name, result) in enumerate(zip(test_names, results)): if isinstance(result, Exception): print(f"โŒ {name}: Failed with exception - {result}") elif result: print(f"โœ… {name}: Passed") passed += 1 else: print(f"โŒ {name}: Failed") print(f"\n๐ŸŽฏ Overall: {passed}/{total} tests passed") if passed == total: print("๐ŸŽ‰ All tests passed! The system is working correctly.") else: print("โš ๏ธ Some tests failed. Please check the errors above.") return passed == total if __name__ == "__main__": try: success = asyncio.run(main()) sys.exit(0 if success else 1) except KeyboardInterrupt: print("\nโน๏ธ Tests interrupted by user") sys.exit(1) except Exception as e: print(f"\n๐Ÿ’ฅ Unexpected error: {e}") sys.exit(1)