Spaces:
Paused
Paused
| #!/usr/bin/env python3 | |
| """ | |
| Security Check Script for Legal Dashboard OCR | |
| ============================================ | |
| This script checks for hardcoded secrets, tokens, and API keys in the codebase. | |
| Based on security best practices from GitGuardian and Hugging Face documentation. | |
| """ | |
| import os | |
| import re | |
| import sys | |
| from pathlib import Path | |
| def check_for_hardcoded_secrets(): | |
| """Check for hardcoded secrets in the codebase""" | |
| print("π Security Check - Looking for hardcoded secrets...") | |
| # Patterns to look for | |
| secret_patterns = [ | |
| r'hf_[a-zA-Z0-9]{20,}', # Hugging Face tokens | |
| r'sk-[a-zA-Z0-9]{20,}', # OpenAI API keys | |
| r'pk_[a-zA-Z0-9]{20,}', # Stripe public keys | |
| r'sk_[a-zA-Z0-9]{20,}', # Stripe secret keys | |
| r'AKIA[0-9A-Z]{16}', # AWS access keys | |
| r'[0-9a-zA-Z/+]{40}', # AWS secret keys | |
| r'ghp_[a-zA-Z0-9]{36}', # GitHub personal access tokens | |
| r'gho_[a-zA-Z0-9]{36}', # GitHub OAuth tokens | |
| r'ghu_[a-zA-Z0-9]{36}', # GitHub user-to-server tokens | |
| r'ghs_[a-zA-Z0-9]{36}', # GitHub server-to-server tokens | |
| r'ghr_[a-zA-Z0-9]{36}', # GitHub refresh tokens | |
| ] | |
| # Files to check | |
| files_to_check = [ | |
| "app/services/ocr_service.py", | |
| "app/services/ai_service.py", | |
| "app/services/database_service.py", | |
| "app/main.py", | |
| "huggingface_space/app.py", | |
| "requirements.txt", | |
| "README.md" | |
| ] | |
| found_secrets = [] | |
| for file_path in files_to_check: | |
| if os.path.exists(file_path): | |
| try: | |
| with open(file_path, 'r', encoding='utf-8') as f: | |
| content = f.read() | |
| for pattern in secret_patterns: | |
| matches = re.findall(pattern, content) | |
| if matches: | |
| found_secrets.append({ | |
| 'file': file_path, | |
| 'pattern': pattern, | |
| 'matches': matches | |
| }) | |
| except Exception as e: | |
| print(f"β οΈ Error reading {file_path}: {e}") | |
| return found_secrets | |
| def check_environment_variables(): | |
| """Check if environment variables are properly used""" | |
| print("\nπ Checking environment variable usage...") | |
| env_vars_to_check = [ | |
| "HF_TOKEN", | |
| "OPENAI_API_KEY", | |
| "DATABASE_URL", | |
| "SECRET_KEY" | |
| ] | |
| proper_usage = True | |
| for var in env_vars_to_check: | |
| if os.getenv(var): | |
| print(f"β {var} is set in environment") | |
| else: | |
| print( | |
| f"β οΈ {var} not found in environment (this is OK for development)") | |
| return proper_usage | |
| def check_gitignore(): | |
| """Check if sensitive files are properly ignored""" | |
| print("\nπ Checking .gitignore for sensitive files...") | |
| sensitive_files = [ | |
| ".env", | |
| "*.key", | |
| "*.pem", | |
| "secrets.json", | |
| "config.json" | |
| ] | |
| gitignore_content = "" | |
| if os.path.exists(".gitignore"): | |
| with open(".gitignore", 'r') as f: | |
| gitignore_content = f.read() | |
| missing_entries = [] | |
| for file_pattern in sensitive_files: | |
| if file_pattern not in gitignore_content: | |
| missing_entries.append(file_pattern) | |
| if missing_entries: | |
| print(f"β οΈ Missing from .gitignore: {missing_entries}") | |
| return False | |
| else: | |
| print("β .gitignore properly configured") | |
| return True | |
| def generate_security_report(found_secrets): | |
| """Generate security report""" | |
| print("\nπ Security Check Report") | |
| print("=" * 50) | |
| if found_secrets: | |
| print("β HARDCODED SECRETS FOUND:") | |
| for secret in found_secrets: | |
| print(f" File: {secret['file']}") | |
| print(f" Pattern: {secret['pattern']}") | |
| print(f" Matches: {len(secret['matches'])} found") | |
| print(" ---") | |
| return False | |
| else: | |
| print("β No hardcoded secrets found!") | |
| return True | |
| def provide_remediation_advice(): | |
| """Provide advice for fixing security issues""" | |
| print("\nπ§ Security Remediation Advice") | |
| print("=" * 40) | |
| print("1. **Remove Hardcoded Tokens**:") | |
| print(" - Replace hardcoded tokens with environment variables") | |
| print(" - Use os.getenv() to read from environment") | |
| print(" - Set tokens in Hugging Face Space settings") | |
| print("\n2. **Environment Variables**:") | |
| print(" - Set HF_TOKEN in your Space settings") | |
| print(" - Use .env files for local development") | |
| print(" - Never commit .env files to version control") | |
| print("\n3. **Git Security**:") | |
| print(" - Add sensitive files to .gitignore") | |
| print(" - Use git-secrets for pre-commit hooks") | |
| print(" - Regularly audit your repository") | |
| print("\n4. **Hugging Face Best Practices**:") | |
| print(" - Use Space secrets for sensitive data") | |
| print(" - Keep tokens private and rotate regularly") | |
| print(" - Monitor token usage and permissions") | |
| def main(): | |
| """Main security check function""" | |
| print("π Legal Dashboard OCR - Security Check") | |
| print("=" * 50) | |
| # Check for hardcoded secrets | |
| found_secrets = check_for_hardcoded_secrets() | |
| # Check environment variables | |
| env_ok = check_environment_variables() | |
| # Check gitignore | |
| gitignore_ok = check_gitignore() | |
| # Generate report | |
| secrets_ok = generate_security_report(found_secrets) | |
| # Final result | |
| print("\n" + "=" * 50) | |
| if secrets_ok and env_ok and gitignore_ok: | |
| print("π Security check passed!") | |
| print("β No hardcoded secrets found") | |
| print("β Environment variables properly configured") | |
| print("β Git security measures in place") | |
| return 0 | |
| else: | |
| print("β οΈ Security issues found!") | |
| provide_remediation_advice() | |
| return 1 | |
| if __name__ == "__main__": | |
| sys.exit(main()) | |