""" Lemontropia Suite - OCR Backends Base Interface Unified interface for multiple OCR backends with auto-fallback. """ from abc import ABC, abstractmethod from dataclasses import dataclass from typing import List, Tuple, Optional, Dict, Any, Union from pathlib import Path import numpy as np import logging logger = logging.getLogger(__name__) @dataclass class OCRTextRegion: """Detected text region with metadata.""" text: str confidence: float bbox: Tuple[int, int, int, int] # x, y, w, h language: str = "en" def to_dict(self) -> Dict[str, Any]: return { 'text': self.text, 'confidence': self.confidence, 'bbox': self.bbox, 'language': self.language } @dataclass class OCRBackendInfo: """Information about an OCR backend.""" name: str available: bool gpu_accelerated: bool = False error_message: Optional[str] = None version: Optional[str] = None def to_dict(self) -> Dict[str, Any]: return { 'name': self.name, 'available': self.available, 'gpu_accelerated': self.gpu_accelerated, 'error_message': self.error_message, 'version': self.version } class BaseOCRBackend(ABC): """Abstract base class for OCR backends.""" NAME = "base" SUPPORTS_GPU = False def __init__(self, use_gpu: bool = True, lang: str = 'en', **kwargs): self.use_gpu = use_gpu self.lang = lang self._available = False self._error_msg = None self._version = None @abstractmethod def _initialize(self) -> bool: """Initialize the backend. Return True if successful.""" pass @abstractmethod def extract_text(self, image: np.ndarray) -> List[OCRTextRegion]: """Extract text from image.""" pass def is_available(self) -> bool: """Check if backend is available.""" return self._available def get_info(self) -> OCRBackendInfo: """Get backend information.""" return OCRBackendInfo( name=self.NAME, available=self._available, gpu_accelerated=self.SUPPORTS_GPU and self.use_gpu, error_message=self._error_msg, version=self._version ) def preprocess_image(self, image: np.ndarray, grayscale: bool = True, denoise: bool = True, contrast: bool = True) -> np.ndarray: """Preprocess image for better OCR results.""" processed = image.copy() # Convert to grayscale if needed if grayscale and len(processed.shape) == 3: processed = self._to_grayscale(processed) # Denoise if denoise: processed = self._denoise(processed) # Enhance contrast if contrast: processed = self._enhance_contrast(processed) return processed def _to_grayscale(self, image: np.ndarray) -> np.ndarray: """Convert image to grayscale.""" if len(image.shape) == 3: import cv2 return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) return image def _denoise(self, image: np.ndarray) -> np.ndarray: """Denoise image.""" import cv2 if len(image.shape) == 2: return cv2.fastNlMeansDenoising(image, None, 10, 7, 21) return image def _enhance_contrast(self, image: np.ndarray) -> np.ndarray: """Enhance image contrast.""" import cv2 if len(image.shape) == 2: # CLAHE (Contrast Limited Adaptive Histogram Equalization) clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) return clahe.apply(image) return image class OCRBackendFactory: """Factory for creating OCR backends with auto-fallback.""" # Priority order: fastest/most reliable first BACKEND_PRIORITY = [ 'opencv_east', # Fastest, no dependencies, detection only 'easyocr', # Good accuracy, lighter than PaddleOCR 'tesseract', # Traditional, stable 'paddleocr', # Best accuracy but heavy dependencies ] _backends: Dict[str, Any] = {} _backend_classes: Dict[str, type] = {} @classmethod def register_backend(cls, name: str, backend_class: type): """Register a backend class.""" cls._backend_classes[name] = backend_class logger.debug(f"Registered OCR backend: {name}") @classmethod def create_backend(cls, name: str, use_gpu: bool = True, lang: str = 'en', **kwargs) -> Optional[BaseOCRBackend]: """Create a specific backend by name.""" if name not in cls._backend_classes: logger.error(f"Unknown OCR backend: {name}") return None try: backend = cls._backend_classes[name](use_gpu=use_gpu, lang=lang, **kwargs) if backend._initialize(): logger.info(f"Created OCR backend: {name}") return backend else: logger.warning(f"Failed to initialize OCR backend: {name}") return None except Exception as e: logger.error(f"Error creating OCR backend {name}: {e}") return None @classmethod def get_best_backend(cls, use_gpu: bool = True, lang: str = 'en', priority: Optional[List[str]] = None, **kwargs) -> Optional[BaseOCRBackend]: """Get the best available backend based on priority order.""" priority = priority or cls.BACKEND_PRIORITY logger.info(f"Searching for best OCR backend (priority: {priority})") for name in priority: if name not in cls._backend_classes: continue backend = cls.create_backend(name, use_gpu=use_gpu, lang=lang, **kwargs) if backend is not None and backend.is_available(): info = backend.get_info() logger.info(f"Selected OCR backend: {name} (GPU: {info.gpu_accelerated})") return backend logger.error("No OCR backend available!") return None @classmethod def check_all_backends(cls, use_gpu: bool = True, lang: str = 'en') -> List[OCRBackendInfo]: """Check availability of all backends.""" results = [] for name in cls.BACKEND_PRIORITY: if name not in cls._backend_classes: continue try: backend = cls._backend_classes[name](use_gpu=use_gpu, lang=lang) backend._initialize() results.append(backend.get_info()) except Exception as e: results.append(OCRBackendInfo( name=name, available=False, error_message=str(e) )) return results @classmethod def list_available_backends(cls, use_gpu: bool = True, lang: str = 'en') -> List[str]: """List names of available backends.""" info_list = cls.check_all_backends(use_gpu, lang) return [info.name for info in info_list if info.available] # Import and register backends def _register_backends(): """Register all available backends.""" try: from .opencv_east_backend import OpenCVEASTBackend OCRBackendFactory.register_backend('opencv_east', OpenCVEASTBackend) except ImportError as e: logger.debug(f"OpenCV EAST backend not available: {e}") try: from .easyocr_backend import EasyOCRBackend OCRBackendFactory.register_backend('easyocr', EasyOCRBackend) except ImportError as e: logger.debug(f"EasyOCR backend not available: {e}") try: from .tesseract_backend import TesseractBackend OCRBackendFactory.register_backend('tesseract', TesseractBackend) except ImportError as e: logger.debug(f"Tesseract backend not available: {e}") try: from .paddleocr_backend import PaddleOCRBackend OCRBackendFactory.register_backend('paddleocr', PaddleOCRBackend) except ImportError as e: logger.debug(f"PaddleOCR backend not available: {e}") # Auto-register on import _register_backends()