Spaces:
Paused
Paused
#!/usr/bin/env python3 | |
""" | |
Encoding Fix Script for Legal Dashboard OCR | |
========================================== | |
This script fixes Unicode encoding issues that can occur on Windows systems. | |
Based on solutions from: https://docs.appseed.us/content/how-to-fix/unicodedecodeerror-charmap-codec-cant-decode-byte-0x9d/ | |
""" | |
import os | |
import sys | |
import codecs | |
def fix_file_encoding(file_path, target_encoding='utf-8'): | |
"""Fix encoding issues in a file""" | |
try: | |
# Try to read with different encodings | |
encodings_to_try = ['utf-8', 'utf-8-sig', | |
'cp1252', 'latin-1', 'iso-8859-1'] | |
content = None | |
used_encoding = None | |
for encoding in encodings_to_try: | |
try: | |
with open(file_path, 'r', encoding=encoding) as f: | |
content = f.read() | |
used_encoding = encoding | |
print( | |
f"β Successfully read {file_path} with {encoding} encoding") | |
break | |
except UnicodeDecodeError: | |
continue | |
if content is None: | |
print(f"β Could not read {file_path} with any encoding") | |
return False | |
# Write back with UTF-8 encoding | |
with open(file_path, 'w', encoding='utf-8') as f: | |
f.write(content) | |
print(f"β Fixed encoding for {file_path}") | |
return True | |
except Exception as e: | |
print(f"β Error fixing {file_path}: {e}") | |
return False | |
def fix_project_encoding(): | |
"""Fix encoding issues in the entire project""" | |
print("π§ Fixing encoding issues in Legal Dashboard OCR project...") | |
# Files that might have encoding issues | |
files_to_fix = [ | |
"huggingface_space/app.py", | |
"huggingface_space/README.md", | |
"requirements.txt", | |
"README.md", | |
"DEPLOYMENT_INSTRUCTIONS.md", | |
"FINAL_DEPLOYMENT_INSTRUCTIONS.md", | |
"DEPLOYMENT_SUMMARY.md" | |
] | |
fixed_count = 0 | |
total_files = len(files_to_fix) | |
for file_path in files_to_fix: | |
if os.path.exists(file_path): | |
if fix_file_encoding(file_path): | |
fixed_count += 1 | |
else: | |
print(f"β οΈ File not found: {file_path}") | |
print(f"\nπ Encoding Fix Results:") | |
print(f"β Fixed: {fixed_count}/{total_files} files") | |
return fixed_count == total_files | |
def set_environment_encoding(): | |
"""Set environment variables for proper encoding""" | |
print("\nπ§ Setting environment variables for encoding...") | |
# Set UTF-8 environment variable for Windows | |
os.environ['PYTHONUTF8'] = '1' | |
# For Windows CMD | |
print("For Windows CMD, run: set PYTHONUTF8=1") | |
# For PowerShell | |
print("For PowerShell, run: $env:PYTHONUTF8=1") | |
print("β Environment encoding variables set") | |
def main(): | |
"""Main function to fix encoding issues""" | |
print("π Legal Dashboard OCR - Encoding Fix") | |
print("=" * 50) | |
# Fix file encodings | |
files_ok = fix_project_encoding() | |
# Set environment encoding | |
set_environment_encoding() | |
print("\n" + "=" * 50) | |
if files_ok: | |
print("π All encoding issues fixed!") | |
print("β Project is ready for deployment") | |
return 0 | |
else: | |
print("β οΈ Some encoding issues remain") | |
print("Please check the files manually") | |
return 1 | |
if __name__ == "__main__": | |
sys.exit(main()) | |