185 lines
5.8 KiB
Python
185 lines
5.8 KiB
Python
"""
|
|
Lemontropia Suite - EasyOCR Backend
|
|
Text recognition using EasyOCR - lighter than PaddleOCR.
|
|
"""
|
|
|
|
import numpy as np
|
|
import logging
|
|
from typing import List, Optional
|
|
|
|
from . import BaseOCRBackend, OCRTextRegion
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class EasyOCRBackend(BaseOCRBackend):
|
|
"""
|
|
OCR backend using EasyOCR.
|
|
|
|
Pros:
|
|
- Lighter than PaddleOCR
|
|
- Good accuracy
|
|
- Supports many languages
|
|
- Can run on CPU reasonably well
|
|
|
|
Cons:
|
|
- First run downloads models (~100MB)
|
|
- Slower than OpenCV EAST
|
|
|
|
Installation: pip install easyocr
|
|
"""
|
|
|
|
NAME = "easyocr"
|
|
SUPPORTS_GPU = True
|
|
|
|
def __init__(self, use_gpu: bool = True, lang: str = 'en', **kwargs):
|
|
super().__init__(use_gpu=use_gpu, lang=lang, **kwargs)
|
|
|
|
self.reader = None
|
|
self._gpu_available = False
|
|
|
|
# Language mapping
|
|
self.lang_map = {
|
|
'en': 'en',
|
|
'sv': 'sv', # Swedish
|
|
'de': 'de',
|
|
'fr': 'fr',
|
|
'es': 'es',
|
|
'latin': 'latin',
|
|
}
|
|
|
|
def _initialize(self) -> bool:
|
|
"""Initialize EasyOCR reader."""
|
|
try:
|
|
import easyocr
|
|
|
|
# Map language code
|
|
easyocr_lang = self.lang_map.get(self.lang, 'en')
|
|
|
|
# Check GPU availability
|
|
self._gpu_available = self._check_gpu()
|
|
use_gpu_flag = self.use_gpu and self._gpu_available
|
|
|
|
logger.info(f"Initializing EasyOCR (lang={easyocr_lang}, gpu={use_gpu_flag})")
|
|
|
|
# Create reader
|
|
# EasyOCR downloads models automatically on first run
|
|
self.reader = easyocr.Reader(
|
|
[easyocr_lang],
|
|
gpu=use_gpu_flag,
|
|
verbose=False
|
|
)
|
|
|
|
self._available = True
|
|
self._version = easyocr.__version__ if hasattr(easyocr, '__version__') else 'unknown'
|
|
|
|
logger.info(f"EasyOCR initialized successfully (GPU: {use_gpu_flag})")
|
|
return True
|
|
|
|
except ImportError:
|
|
self._error_msg = "EasyOCR not installed. Run: pip install easyocr"
|
|
logger.warning(self._error_msg)
|
|
return False
|
|
|
|
except Exception as e:
|
|
# Handle specific PyTorch/CUDA errors
|
|
error_str = str(e).lower()
|
|
|
|
if 'cuda' in error_str or 'c10' in error_str or 'gpu' in error_str:
|
|
self._error_msg = f"EasyOCR GPU initialization failed: {e}"
|
|
logger.warning(f"{self._error_msg}. Try with use_gpu=False")
|
|
|
|
# Try CPU fallback
|
|
if self.use_gpu:
|
|
logger.info("Attempting EasyOCR CPU fallback...")
|
|
self.use_gpu = False
|
|
return self._initialize()
|
|
|
|
else:
|
|
self._error_msg = f"EasyOCR initialization failed: {e}"
|
|
logger.error(self._error_msg)
|
|
|
|
return False
|
|
|
|
def _check_gpu(self) -> bool:
|
|
"""Check if GPU is available for EasyOCR."""
|
|
try:
|
|
import torch
|
|
|
|
if torch.cuda.is_available():
|
|
logger.info(f"CUDA available: {torch.cuda.get_device_name(0)}")
|
|
return True
|
|
|
|
# Check MPS (Apple Silicon)
|
|
if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
|
|
logger.info("Apple MPS available")
|
|
return True
|
|
|
|
return False
|
|
|
|
except ImportError:
|
|
return False
|
|
except Exception as e:
|
|
logger.debug(f"GPU check failed: {e}")
|
|
return False
|
|
|
|
def extract_text(self, image: np.ndarray) -> List[OCRTextRegion]:
|
|
"""
|
|
Extract text from image using EasyOCR.
|
|
|
|
Args:
|
|
image: Input image (BGR format from OpenCV)
|
|
|
|
Returns:
|
|
List of detected text regions with recognized text
|
|
"""
|
|
if not self._available or self.reader is None:
|
|
logger.error("EasyOCR backend not initialized")
|
|
return []
|
|
|
|
try:
|
|
# EasyOCR expects RGB format
|
|
if len(image.shape) == 3:
|
|
import cv2
|
|
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
|
else:
|
|
image_rgb = image
|
|
|
|
# Run OCR
|
|
results = self.reader.readtext(image_rgb)
|
|
|
|
regions = []
|
|
for detection in results:
|
|
# EasyOCR returns: (bbox, text, confidence)
|
|
bbox, text, conf = detection
|
|
|
|
# Calculate bounding box from polygon
|
|
# bbox is list of 4 points: [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
|
|
x_coords = [p[0] for p in bbox]
|
|
y_coords = [p[1] for p in bbox]
|
|
|
|
x = int(min(x_coords))
|
|
y = int(min(y_coords))
|
|
w = int(max(x_coords) - x)
|
|
h = int(max(y_coords) - y)
|
|
|
|
regions.append(OCRTextRegion(
|
|
text=text.strip(),
|
|
confidence=float(conf),
|
|
bbox=(x, y, w, h),
|
|
language=self.lang
|
|
))
|
|
|
|
logger.debug(f"EasyOCR detected {len(regions)} text regions")
|
|
return regions
|
|
|
|
except Exception as e:
|
|
logger.error(f"EasyOCR extraction failed: {e}")
|
|
return []
|
|
|
|
def get_info(self):
|
|
"""Get backend information."""
|
|
info = super().get_info()
|
|
info.gpu_accelerated = self._gpu_available and self.use_gpu
|
|
return info
|