255 lines
8.3 KiB
Python
255 lines
8.3 KiB
Python
"""
|
|
Lemontropia Suite - OCR Backends Base Interface
|
|
Unified interface for multiple OCR backends with auto-fallback.
|
|
"""
|
|
|
|
from abc import ABC, abstractmethod
|
|
from dataclasses import dataclass
|
|
from typing import List, Tuple, Optional, Dict, Any, Union
|
|
from pathlib import Path
|
|
import numpy as np
|
|
import logging
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@dataclass
|
|
class OCRTextRegion:
|
|
"""Detected text region with metadata."""
|
|
text: str
|
|
confidence: float
|
|
bbox: Tuple[int, int, int, int] # x, y, w, h
|
|
language: str = "en"
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
return {
|
|
'text': self.text,
|
|
'confidence': self.confidence,
|
|
'bbox': self.bbox,
|
|
'language': self.language
|
|
}
|
|
|
|
|
|
@dataclass
|
|
class OCRBackendInfo:
|
|
"""Information about an OCR backend."""
|
|
name: str
|
|
available: bool
|
|
gpu_accelerated: bool = False
|
|
error_message: Optional[str] = None
|
|
version: Optional[str] = None
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
return {
|
|
'name': self.name,
|
|
'available': self.available,
|
|
'gpu_accelerated': self.gpu_accelerated,
|
|
'error_message': self.error_message,
|
|
'version': self.version
|
|
}
|
|
|
|
|
|
class BaseOCRBackend(ABC):
|
|
"""Abstract base class for OCR backends."""
|
|
|
|
NAME = "base"
|
|
SUPPORTS_GPU = False
|
|
|
|
def __init__(self, use_gpu: bool = True, lang: str = 'en', **kwargs):
|
|
self.use_gpu = use_gpu
|
|
self.lang = lang
|
|
self._available = False
|
|
self._error_msg = None
|
|
self._version = None
|
|
|
|
@abstractmethod
|
|
def _initialize(self) -> bool:
|
|
"""Initialize the backend. Return True if successful."""
|
|
pass
|
|
|
|
@abstractmethod
|
|
def extract_text(self, image: np.ndarray) -> List[OCRTextRegion]:
|
|
"""Extract text from image."""
|
|
pass
|
|
|
|
def is_available(self) -> bool:
|
|
"""Check if backend is available."""
|
|
return self._available
|
|
|
|
def get_info(self) -> OCRBackendInfo:
|
|
"""Get backend information."""
|
|
return OCRBackendInfo(
|
|
name=self.NAME,
|
|
available=self._available,
|
|
gpu_accelerated=self.SUPPORTS_GPU and self.use_gpu,
|
|
error_message=self._error_msg,
|
|
version=self._version
|
|
)
|
|
|
|
def preprocess_image(self, image: np.ndarray,
|
|
grayscale: bool = True,
|
|
denoise: bool = True,
|
|
contrast: bool = True) -> np.ndarray:
|
|
"""Preprocess image for better OCR results."""
|
|
processed = image.copy()
|
|
|
|
# Convert to grayscale if needed
|
|
if grayscale and len(processed.shape) == 3:
|
|
processed = self._to_grayscale(processed)
|
|
|
|
# Denoise
|
|
if denoise:
|
|
processed = self._denoise(processed)
|
|
|
|
# Enhance contrast
|
|
if contrast:
|
|
processed = self._enhance_contrast(processed)
|
|
|
|
return processed
|
|
|
|
def _to_grayscale(self, image: np.ndarray) -> np.ndarray:
|
|
"""Convert image to grayscale."""
|
|
if len(image.shape) == 3:
|
|
import cv2
|
|
return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
|
return image
|
|
|
|
def _denoise(self, image: np.ndarray) -> np.ndarray:
|
|
"""Denoise image."""
|
|
import cv2
|
|
if len(image.shape) == 2:
|
|
return cv2.fastNlMeansDenoising(image, None, 10, 7, 21)
|
|
return image
|
|
|
|
def _enhance_contrast(self, image: np.ndarray) -> np.ndarray:
|
|
"""Enhance image contrast."""
|
|
import cv2
|
|
if len(image.shape) == 2:
|
|
# CLAHE (Contrast Limited Adaptive Histogram Equalization)
|
|
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
|
return clahe.apply(image)
|
|
return image
|
|
|
|
|
|
class OCRBackendFactory:
|
|
"""Factory for creating OCR backends with auto-fallback."""
|
|
|
|
# Priority order: fastest/most reliable first
|
|
BACKEND_PRIORITY = [
|
|
'opencv_east', # Fastest, no dependencies, detection only
|
|
'easyocr', # Good accuracy, lighter than PaddleOCR
|
|
'tesseract', # Traditional, stable
|
|
'paddleocr', # Best accuracy but heavy dependencies
|
|
]
|
|
|
|
_backends: Dict[str, Any] = {}
|
|
_backend_classes: Dict[str, type] = {}
|
|
|
|
@classmethod
|
|
def register_backend(cls, name: str, backend_class: type):
|
|
"""Register a backend class."""
|
|
cls._backend_classes[name] = backend_class
|
|
logger.debug(f"Registered OCR backend: {name}")
|
|
|
|
@classmethod
|
|
def create_backend(cls, name: str, use_gpu: bool = True,
|
|
lang: str = 'en', **kwargs) -> Optional[BaseOCRBackend]:
|
|
"""Create a specific backend by name."""
|
|
if name not in cls._backend_classes:
|
|
logger.error(f"Unknown OCR backend: {name}")
|
|
return None
|
|
|
|
try:
|
|
backend = cls._backend_classes[name](use_gpu=use_gpu, lang=lang, **kwargs)
|
|
if backend._initialize():
|
|
logger.info(f"Created OCR backend: {name}")
|
|
return backend
|
|
else:
|
|
logger.warning(f"Failed to initialize OCR backend: {name}")
|
|
return None
|
|
except Exception as e:
|
|
logger.error(f"Error creating OCR backend {name}: {e}")
|
|
return None
|
|
|
|
@classmethod
|
|
def get_best_backend(cls, use_gpu: bool = True, lang: str = 'en',
|
|
priority: Optional[List[str]] = None,
|
|
**kwargs) -> Optional[BaseOCRBackend]:
|
|
"""Get the best available backend based on priority order."""
|
|
priority = priority or cls.BACKEND_PRIORITY
|
|
|
|
logger.info(f"Searching for best OCR backend (priority: {priority})")
|
|
|
|
for name in priority:
|
|
if name not in cls._backend_classes:
|
|
continue
|
|
|
|
backend = cls.create_backend(name, use_gpu=use_gpu, lang=lang, **kwargs)
|
|
if backend is not None and backend.is_available():
|
|
info = backend.get_info()
|
|
logger.info(f"Selected OCR backend: {name} (GPU: {info.gpu_accelerated})")
|
|
return backend
|
|
|
|
logger.error("No OCR backend available!")
|
|
return None
|
|
|
|
@classmethod
|
|
def check_all_backends(cls, use_gpu: bool = True, lang: str = 'en') -> List[OCRBackendInfo]:
|
|
"""Check availability of all backends."""
|
|
results = []
|
|
|
|
for name in cls.BACKEND_PRIORITY:
|
|
if name not in cls._backend_classes:
|
|
continue
|
|
|
|
try:
|
|
backend = cls._backend_classes[name](use_gpu=use_gpu, lang=lang)
|
|
backend._initialize()
|
|
results.append(backend.get_info())
|
|
except Exception as e:
|
|
results.append(OCRBackendInfo(
|
|
name=name,
|
|
available=False,
|
|
error_message=str(e)
|
|
))
|
|
|
|
return results
|
|
|
|
@classmethod
|
|
def list_available_backends(cls, use_gpu: bool = True, lang: str = 'en') -> List[str]:
|
|
"""List names of available backends."""
|
|
info_list = cls.check_all_backends(use_gpu, lang)
|
|
return [info.name for info in info_list if info.available]
|
|
|
|
|
|
# Import and register backends
|
|
def _register_backends():
|
|
"""Register all available backends."""
|
|
try:
|
|
from .opencv_east_backend import OpenCVEASTBackend
|
|
OCRBackendFactory.register_backend('opencv_east', OpenCVEASTBackend)
|
|
except ImportError as e:
|
|
logger.debug(f"OpenCV EAST backend not available: {e}")
|
|
|
|
try:
|
|
from .easyocr_backend import EasyOCRBackend
|
|
OCRBackendFactory.register_backend('easyocr', EasyOCRBackend)
|
|
except ImportError as e:
|
|
logger.debug(f"EasyOCR backend not available: {e}")
|
|
|
|
try:
|
|
from .tesseract_backend import TesseractBackend
|
|
OCRBackendFactory.register_backend('tesseract', TesseractBackend)
|
|
except ImportError as e:
|
|
logger.debug(f"Tesseract backend not available: {e}")
|
|
|
|
try:
|
|
from .paddleocr_backend import PaddleOCRBackend
|
|
OCRBackendFactory.register_backend('paddleocr', PaddleOCRBackend)
|
|
except ImportError as e:
|
|
logger.debug(f"PaddleOCR backend not available: {e}")
|
|
|
|
|
|
# Auto-register on import
|
|
_register_backends()
|