""" EU-Utility - OCR Backend Manager Manages OCR backend installation and configuration. Checks registry for installed software and provides auto-configuration. """ import sys import subprocess from pathlib import Path from typing import Optional, Dict, List, Tuple # Windows registry checking if sys.platform == 'win32': import winreg WINDOWS = True else: WINDOWS = False class OCRBackendManager: """Manages OCR backend detection and installation.""" TESSERACT_REGISTRY_PATHS = [ (winreg.HKEY_LOCAL_MACHINE, r"SOFTWARE\Tesseract-OCR"), (winreg.HKEY_LOCAL_MACHINE, r"SOFTWARE\WOW6432Node\Tesseract-OCR"), (winreg.HKEY_CURRENT_USER, r"SOFTWARE\Tesseract-OCR"), ] TESSERACT_DEFAULT_PATHS = [ Path(r"C:\Program Files\Tesseract-OCR"), Path(r"C:\Program Files (x86)\Tesseract-OCR"), Path.home() / "Tesseract-OCR", ] def __init__(self): self.backends = { 'easyocr': {'installed': False, 'available': False, 'path': None}, 'tesseract': {'installed': False, 'available': False, 'path': None}, 'paddleocr': {'installed': False, 'available': False, 'path': None}, } self._scan_backends() def _scan_backends(self): """Scan for installed OCR backends.""" self._check_easyocr() self._check_tesseract() self._check_paddleocr() def _check_easyocr(self): """Check if EasyOCR Python package is installed.""" try: import easyocr self.backends['easyocr']['installed'] = True self.backends['easyocr']['available'] = True except ImportError: pass def _check_tesseract(self): """Check if Tesseract is installed (Python package and binary).""" # Check Python package try: import pytesseract self.backends['tesseract']['installed'] = True except ImportError: pass # Check for binary in various locations tesseract_exe = self._find_tesseract_binary() if tesseract_exe: self.backends['tesseract']['path'] = str(tesseract_exe) self.backends['tesseract']['available'] = True # Configure pytesseract if Python package is installed try: import pytesseract pytesseract.pytesseract.tesseract_cmd = str(tesseract_exe) except ImportError: pass def _find_tesseract_binary(self) -> Optional[Path]: """Find Tesseract binary in registry and common paths.""" # Check if already in PATH try: result = subprocess.run(['tesseract', '--version'], capture_output=True, text=True, timeout=5) if result.returncode == 0: # Tesseract is in PATH return Path('tesseract') except: pass if WINDOWS: # Check Windows Registry for hkey, reg_path in self.TESSERACT_REGISTRY_PATHS: try: with winreg.OpenKey(hkey, reg_path) as key: install_path, _ = winreg.QueryValueEx(key, 'InstallDir') if install_path: exe_path = Path(install_path) / 'tesseract.exe' if exe_path.exists(): return exe_path except: continue # Check default installation paths for path in self.TESSERACT_DEFAULT_PATHS: exe_path = path / 'tesseract.exe' if exe_path.exists(): return exe_path return None def _check_paddleocr(self): """Check if PaddleOCR Python package is installed.""" try: from paddleocr import PaddleOCR self.backends['paddleocr']['installed'] = True self.backends['paddleocr']['available'] = True except ImportError: pass def get_backend_status(self, backend: str) -> Dict: """Get status of a specific backend.""" return self.backends.get(backend, {}).copy() def get_all_status(self) -> Dict: """Get status of all backends.""" return self.backends.copy() def get_best_available(self) -> Optional[str]: """Get the best available backend.""" priority = ['easyocr', 'tesseract', 'paddleocr'] for backend in priority: if self.backends[backend]['available']: return backend return None def install_backend(self, backend: str) -> Tuple[bool, str]: """Install a backend via pip.""" packages = { 'easyocr': 'easyocr', 'tesseract': 'pytesseract', 'paddleocr': 'paddleocr', } if backend not in packages: return False, f"Unknown backend: {backend}" try: result = subprocess.run( [sys.executable, '-m', 'pip', 'install', packages[backend]], capture_output=True, text=True, timeout=300 ) if result.returncode == 0: self._scan_backends() # Re-scan after install return True, f"Successfully installed {backend}" else: return False, f"Failed to install {backend}: {result.stderr}" except Exception as e: return False, f"Error installing {backend}: {e}" def get_tesseract_install_info(self) -> str: """Get instructions for installing Tesseract.""" return """Tesseract is not installed or not in PATH. To install Tesseract: 1. Download installer from: https://github.com/UB-Mannheim/tesseract/wiki 2. Run the installer and note the installation path (usually C:\Program Files\Tesseract-OCR) 3. Add to PATH or use the auto-detect feature 4. Restart EU-Utility Alternative: Use EasyOCR instead (pip install easyocr) """ def auto_configure_tesseract(self) -> bool: """Auto-configure Tesseract from registry/paths.""" if not WINDOWS: return False exe_path = self._find_tesseract_binary() if exe_path and exe_path != Path('tesseract'): try: import pytesseract pytesseract.pytesseract.tesseract_cmd = str(exe_path) self.backends['tesseract']['available'] = True self.backends['tesseract']['path'] = str(exe_path) return True except: pass return False # Singleton instance _ocr_manager = None def get_ocr_backend_manager() -> OCRBackendManager: """Get the OCR backend manager singleton.""" global _ocr_manager if _ocr_manager is None: _ocr_manager = OCRBackendManager() return _ocr_manager