Lemontropia-Suite/modules/ocr_backends/paddleocr_backend.py

295 lines
10 KiB
Python

"""
Lemontropia Suite - PaddleOCR Backend
High-accuracy OCR using PaddleOCR - best quality but heavy dependencies.
"""
import numpy as np
import logging
from typing import List, Optional
from . import BaseOCRBackend, OCRTextRegion
logger = logging.getLogger(__name__)
class PaddleOCRBackend(BaseOCRBackend):
"""
OCR backend using PaddleOCR.
Pros:
- Best accuracy among open-source OCR
- Good multilingual support
- Fast with GPU
Cons:
- Heavy dependencies (PyTorch/PaddlePaddle)
- Can fail with DLL errors on Windows Store Python
- Large model download
Installation: pip install paddleocr
Note: This backend has special handling for PyTorch/Paddle DLL errors
that commonly occur with Windows Store Python installations.
"""
NAME = "paddleocr"
SUPPORTS_GPU = True
def __init__(self, use_gpu: bool = True, lang: str = 'en', **kwargs):
super().__init__(use_gpu=use_gpu, lang=lang, **kwargs)
self.ocr = None
self._gpu_available = False
self._dll_error = False # Track if we hit a DLL error
# Language mapping for PaddleOCR
self.lang_map = {
'en': 'en',
'sv': 'latin', # Swedish uses latin script
'de': 'latin',
'fr': 'latin',
'es': 'latin',
'latin': 'latin',
}
# Detection thresholds
self.det_db_thresh = kwargs.get('det_db_thresh', 0.3)
self.det_db_box_thresh = kwargs.get('det_db_box_thresh', 0.5)
self.rec_thresh = kwargs.get('rec_thresh', 0.5)
def _initialize(self) -> bool:
"""Initialize PaddleOCR with PyTorch DLL error handling."""
try:
# First, check if PyTorch is importable without DLL errors
if not self._check_pytorch():
return False
# Import PaddleOCR
from paddleocr import PaddleOCR as PPOCR
# Map language
paddle_lang = self.lang_map.get(self.lang, 'en')
# Check GPU availability
self._gpu_available = self._check_gpu()
use_gpu_flag = self.use_gpu and self._gpu_available
logger.info(f"Initializing PaddleOCR (lang={paddle_lang}, gpu={use_gpu_flag})")
# Initialize PaddleOCR
self.ocr = PPOCR(
lang=paddle_lang,
use_gpu=use_gpu_flag,
show_log=False,
use_angle_cls=True,
det_db_thresh=self.det_db_thresh,
det_db_box_thresh=self.det_db_box_thresh,
rec_thresh=self.rec_thresh,
)
self._available = True
self._version = "2.x" # PaddleOCR doesn't expose version easily
logger.info(f"PaddleOCR initialized successfully (GPU: {use_gpu_flag})")
return True
except ImportError as e:
self._error_msg = f"PaddleOCR not installed. Run: pip install paddleocr"
logger.warning(self._error_msg)
return False
except Exception as e:
error_str = str(e).lower()
# Check for common DLL-related errors
if any(x in error_str for x in ['dll', 'c10', 'torch', 'paddle', 'lib']):
self._dll_error = True
self._error_msg = f"PaddleOCR DLL error (Windows Store Python?): {e}"
logger.warning(self._error_msg)
logger.info("This is a known issue with Windows Store Python. Using fallback OCR.")
else:
self._error_msg = f"PaddleOCR initialization failed: {e}"
logger.error(self._error_msg)
return False
def _check_pytorch(self) -> bool:
"""
Check if PyTorch can be imported without DLL errors.
This is the critical check for Windows Store Python compatibility.
"""
try:
# Try importing torch - this is where DLL errors typically occur
import torch
# Try a simple operation to verify it works
_ = torch.__version__
logger.debug("PyTorch import successful")
return True
except ImportError:
self._error_msg = "PyTorch not installed"
logger.warning(self._error_msg)
return False
except OSError as e:
# This is the Windows Store Python DLL error
error_str = str(e).lower()
if 'dll' in error_str or 'c10' in error_str or 'specified module' in error_str:
self._dll_error = True
self._error_msg = (
f"PyTorch DLL load failed: {e}\n"
"This is a known issue with Windows Store Python.\n"
"Solutions:\n"
"1. Use Python from python.org instead of Windows Store\n"
"2. Install PyTorch with conda instead of pip\n"
"3. Use alternative OCR backend (EasyOCR, Tesseract, or OpenCV EAST)"
)
logger.error(self._error_msg)
else:
self._error_msg = f"PyTorch load failed: {e}"
logger.error(self._error_msg)
return False
except Exception as e:
self._error_msg = f"Unexpected PyTorch error: {e}"
logger.error(self._error_msg)
return False
def _check_gpu(self) -> bool:
"""Check if GPU is available for PaddleOCR."""
try:
import torch
if torch.cuda.is_available():
device_name = torch.cuda.get_device_name(0)
logger.info(f"CUDA available: {device_name}")
return True
# Check for MPS (Apple Silicon)
if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
logger.info("Apple MPS available")
return True
return False
except Exception as e:
logger.debug(f"GPU check failed: {e}")
return False
def extract_text(self, image: np.ndarray) -> List[OCRTextRegion]:
"""
Extract text from image using PaddleOCR.
Args:
image: Input image (BGR format from OpenCV)
Returns:
List of detected text regions with recognized text
"""
if not self._available or self.ocr is None:
logger.error("PaddleOCR backend not initialized")
return []
try:
# Preprocess image
processed = self.preprocess_image(image)
# Run OCR
result = self.ocr.ocr(processed, cls=True)
regions = []
if result and result[0]:
for line in result[0]:
if line is None:
continue
# Parse result: [bbox, (text, confidence)]
bbox, (text, conf) = line
# Calculate bounding box from polygon
x_coords = [p[0] for p in bbox]
y_coords = [p[1] for p in bbox]
x = int(min(x_coords))
y = int(min(y_coords))
w = int(max(x_coords) - x)
h = int(max(y_coords) - y)
regions.append(OCRTextRegion(
text=text.strip(),
confidence=float(conf),
bbox=(x, y, w, h),
language=self.lang
))
logger.debug(f"PaddleOCR detected {len(regions)} text regions")
return regions
except Exception as e:
logger.error(f"PaddleOCR extraction failed: {e}")
return []
def get_info(self):
"""Get backend information."""
info = super().get_info()
info.gpu_accelerated = self._gpu_available and self.use_gpu
if self._dll_error:
info.error_message = "PyTorch DLL error - incompatible with Windows Store Python"
return info
def has_dll_error(self) -> bool:
"""Check if this backend failed due to DLL error."""
return self._dll_error
@staticmethod
def diagnose_windows_store_python() -> dict:
"""
Diagnose if running Windows Store Python and potential issues.
Returns:
Dictionary with diagnostic information
"""
import sys
import platform
diag = {
'platform': platform.system(),
'python_version': sys.version,
'executable': sys.executable,
'is_windows_store': False,
'pytorch_importable': False,
'recommendations': []
}
# Check if Windows Store Python
exe_path = sys.executable.lower()
if 'windowsapps' in exe_path or 'microsoft' in exe_path:
diag['is_windows_store'] = True
diag['recommendations'].append(
"You are using Windows Store Python which has known DLL compatibility issues."
)
# Check PyTorch
try:
import torch
diag['pytorch_importable'] = True
diag['pytorch_version'] = torch.__version__
diag['pytorch_cuda'] = torch.cuda.is_available()
except Exception as e:
diag['pytorch_error'] = str(e)
diag['recommendations'].append(
"PyTorch cannot be loaded. Use alternative OCR backends."
)
if not diag['pytorch_importable'] and diag['is_windows_store']:
diag['recommendations'].extend([
"Install Python from https://python.org instead of Windows Store",
"Or use conda/miniconda for better compatibility",
"Recommended OCR backends: opencv_east, easyocr, tesseract"
])
return diag