Lemontropia-Suite/modules/ocr_backends/__init__.py

255 lines
8.3 KiB
Python

"""
Lemontropia Suite - OCR Backends Base Interface
Unified interface for multiple OCR backends with auto-fallback.
"""
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import List, Tuple, Optional, Dict, Any, Union
from pathlib import Path
import numpy as np
import logging
logger = logging.getLogger(__name__)
@dataclass
class OCRTextRegion:
"""Detected text region with metadata."""
text: str
confidence: float
bbox: Tuple[int, int, int, int] # x, y, w, h
language: str = "en"
def to_dict(self) -> Dict[str, Any]:
return {
'text': self.text,
'confidence': self.confidence,
'bbox': self.bbox,
'language': self.language
}
@dataclass
class OCRBackendInfo:
"""Information about an OCR backend."""
name: str
available: bool
gpu_accelerated: bool = False
error_message: Optional[str] = None
version: Optional[str] = None
def to_dict(self) -> Dict[str, Any]:
return {
'name': self.name,
'available': self.available,
'gpu_accelerated': self.gpu_accelerated,
'error_message': self.error_message,
'version': self.version
}
class BaseOCRBackend(ABC):
"""Abstract base class for OCR backends."""
NAME = "base"
SUPPORTS_GPU = False
def __init__(self, use_gpu: bool = True, lang: str = 'en', **kwargs):
self.use_gpu = use_gpu
self.lang = lang
self._available = False
self._error_msg = None
self._version = None
@abstractmethod
def _initialize(self) -> bool:
"""Initialize the backend. Return True if successful."""
pass
@abstractmethod
def extract_text(self, image: np.ndarray) -> List[OCRTextRegion]:
"""Extract text from image."""
pass
def is_available(self) -> bool:
"""Check if backend is available."""
return self._available
def get_info(self) -> OCRBackendInfo:
"""Get backend information."""
return OCRBackendInfo(
name=self.NAME,
available=self._available,
gpu_accelerated=self.SUPPORTS_GPU and self.use_gpu,
error_message=self._error_msg,
version=self._version
)
def preprocess_image(self, image: np.ndarray,
grayscale: bool = True,
denoise: bool = True,
contrast: bool = True) -> np.ndarray:
"""Preprocess image for better OCR results."""
processed = image.copy()
# Convert to grayscale if needed
if grayscale and len(processed.shape) == 3:
processed = self._to_grayscale(processed)
# Denoise
if denoise:
processed = self._denoise(processed)
# Enhance contrast
if contrast:
processed = self._enhance_contrast(processed)
return processed
def _to_grayscale(self, image: np.ndarray) -> np.ndarray:
"""Convert image to grayscale."""
if len(image.shape) == 3:
import cv2
return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
return image
def _denoise(self, image: np.ndarray) -> np.ndarray:
"""Denoise image."""
import cv2
if len(image.shape) == 2:
return cv2.fastNlMeansDenoising(image, None, 10, 7, 21)
return image
def _enhance_contrast(self, image: np.ndarray) -> np.ndarray:
"""Enhance image contrast."""
import cv2
if len(image.shape) == 2:
# CLAHE (Contrast Limited Adaptive Histogram Equalization)
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
return clahe.apply(image)
return image
class OCRBackendFactory:
"""Factory for creating OCR backends with auto-fallback."""
# Priority order: fastest/most reliable first
BACKEND_PRIORITY = [
'opencv_east', # Fastest, no dependencies, detection only
'easyocr', # Good accuracy, lighter than PaddleOCR
'tesseract', # Traditional, stable
'paddleocr', # Best accuracy but heavy dependencies
]
_backends: Dict[str, Any] = {}
_backend_classes: Dict[str, type] = {}
@classmethod
def register_backend(cls, name: str, backend_class: type):
"""Register a backend class."""
cls._backend_classes[name] = backend_class
logger.debug(f"Registered OCR backend: {name}")
@classmethod
def create_backend(cls, name: str, use_gpu: bool = True,
lang: str = 'en', **kwargs) -> Optional[BaseOCRBackend]:
"""Create a specific backend by name."""
if name not in cls._backend_classes:
logger.error(f"Unknown OCR backend: {name}")
return None
try:
backend = cls._backend_classes[name](use_gpu=use_gpu, lang=lang, **kwargs)
if backend._initialize():
logger.info(f"Created OCR backend: {name}")
return backend
else:
logger.warning(f"Failed to initialize OCR backend: {name}")
return None
except Exception as e:
logger.error(f"Error creating OCR backend {name}: {e}")
return None
@classmethod
def get_best_backend(cls, use_gpu: bool = True, lang: str = 'en',
priority: Optional[List[str]] = None,
**kwargs) -> Optional[BaseOCRBackend]:
"""Get the best available backend based on priority order."""
priority = priority or cls.BACKEND_PRIORITY
logger.info(f"Searching for best OCR backend (priority: {priority})")
for name in priority:
if name not in cls._backend_classes:
continue
backend = cls.create_backend(name, use_gpu=use_gpu, lang=lang, **kwargs)
if backend is not None and backend.is_available():
info = backend.get_info()
logger.info(f"Selected OCR backend: {name} (GPU: {info.gpu_accelerated})")
return backend
logger.error("No OCR backend available!")
return None
@classmethod
def check_all_backends(cls, use_gpu: bool = True, lang: str = 'en') -> List[OCRBackendInfo]:
"""Check availability of all backends."""
results = []
for name in cls.BACKEND_PRIORITY:
if name not in cls._backend_classes:
continue
try:
backend = cls._backend_classes[name](use_gpu=use_gpu, lang=lang)
backend._initialize()
results.append(backend.get_info())
except Exception as e:
results.append(OCRBackendInfo(
name=name,
available=False,
error_message=str(e)
))
return results
@classmethod
def list_available_backends(cls, use_gpu: bool = True, lang: str = 'en') -> List[str]:
"""List names of available backends."""
info_list = cls.check_all_backends(use_gpu, lang)
return [info.name for info in info_list if info.available]
# Import and register backends
def _register_backends():
"""Register all available backends."""
try:
from .opencv_east_backend import OpenCVEASTBackend
OCRBackendFactory.register_backend('opencv_east', OpenCVEASTBackend)
except ImportError as e:
logger.debug(f"OpenCV EAST backend not available: {e}")
try:
from .easyocr_backend import EasyOCRBackend
OCRBackendFactory.register_backend('easyocr', EasyOCRBackend)
except ImportError as e:
logger.debug(f"EasyOCR backend not available: {e}")
try:
from .tesseract_backend import TesseractBackend
OCRBackendFactory.register_backend('tesseract', TesseractBackend)
except ImportError as e:
logger.debug(f"Tesseract backend not available: {e}")
try:
from .paddleocr_backend import PaddleOCRBackend
OCRBackendFactory.register_backend('paddleocr', PaddleOCRBackend)
except ImportError as e:
logger.debug(f"PaddleOCR backend not available: {e}")
# Auto-register on import
_register_backends()