295 lines
10 KiB
Python
295 lines
10 KiB
Python
"""
|
|
Lemontropia Suite - PaddleOCR Backend
|
|
High-accuracy OCR using PaddleOCR - best quality but heavy dependencies.
|
|
"""
|
|
|
|
import numpy as np
|
|
import logging
|
|
from typing import List, Optional
|
|
|
|
from . import BaseOCRBackend, OCRTextRegion
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class PaddleOCRBackend(BaseOCRBackend):
|
|
"""
|
|
OCR backend using PaddleOCR.
|
|
|
|
Pros:
|
|
- Best accuracy among open-source OCR
|
|
- Good multilingual support
|
|
- Fast with GPU
|
|
|
|
Cons:
|
|
- Heavy dependencies (PyTorch/PaddlePaddle)
|
|
- Can fail with DLL errors on Windows Store Python
|
|
- Large model download
|
|
|
|
Installation: pip install paddleocr
|
|
|
|
Note: This backend has special handling for PyTorch/Paddle DLL errors
|
|
that commonly occur with Windows Store Python installations.
|
|
"""
|
|
|
|
NAME = "paddleocr"
|
|
SUPPORTS_GPU = True
|
|
|
|
def __init__(self, use_gpu: bool = True, lang: str = 'en', **kwargs):
|
|
super().__init__(use_gpu=use_gpu, lang=lang, **kwargs)
|
|
|
|
self.ocr = None
|
|
self._gpu_available = False
|
|
self._dll_error = False # Track if we hit a DLL error
|
|
|
|
# Language mapping for PaddleOCR
|
|
self.lang_map = {
|
|
'en': 'en',
|
|
'sv': 'latin', # Swedish uses latin script
|
|
'de': 'latin',
|
|
'fr': 'latin',
|
|
'es': 'latin',
|
|
'latin': 'latin',
|
|
}
|
|
|
|
# Detection thresholds
|
|
self.det_db_thresh = kwargs.get('det_db_thresh', 0.3)
|
|
self.det_db_box_thresh = kwargs.get('det_db_box_thresh', 0.5)
|
|
self.rec_thresh = kwargs.get('rec_thresh', 0.5)
|
|
|
|
def _initialize(self) -> bool:
|
|
"""Initialize PaddleOCR with PyTorch DLL error handling."""
|
|
try:
|
|
# First, check if PyTorch is importable without DLL errors
|
|
if not self._check_pytorch():
|
|
return False
|
|
|
|
# Import PaddleOCR
|
|
from paddleocr import PaddleOCR as PPOCR
|
|
|
|
# Map language
|
|
paddle_lang = self.lang_map.get(self.lang, 'en')
|
|
|
|
# Check GPU availability
|
|
self._gpu_available = self._check_gpu()
|
|
use_gpu_flag = self.use_gpu and self._gpu_available
|
|
|
|
logger.info(f"Initializing PaddleOCR (lang={paddle_lang}, gpu={use_gpu_flag})")
|
|
|
|
# Initialize PaddleOCR
|
|
self.ocr = PPOCR(
|
|
lang=paddle_lang,
|
|
use_gpu=use_gpu_flag,
|
|
show_log=False,
|
|
use_angle_cls=True,
|
|
det_db_thresh=self.det_db_thresh,
|
|
det_db_box_thresh=self.det_db_box_thresh,
|
|
rec_thresh=self.rec_thresh,
|
|
)
|
|
|
|
self._available = True
|
|
self._version = "2.x" # PaddleOCR doesn't expose version easily
|
|
|
|
logger.info(f"PaddleOCR initialized successfully (GPU: {use_gpu_flag})")
|
|
return True
|
|
|
|
except ImportError as e:
|
|
self._error_msg = f"PaddleOCR not installed. Run: pip install paddleocr"
|
|
logger.warning(self._error_msg)
|
|
return False
|
|
|
|
except Exception as e:
|
|
error_str = str(e).lower()
|
|
|
|
# Check for common DLL-related errors
|
|
if any(x in error_str for x in ['dll', 'c10', 'torch', 'paddle', 'lib']):
|
|
self._dll_error = True
|
|
self._error_msg = f"PaddleOCR DLL error (Windows Store Python?): {e}"
|
|
logger.warning(self._error_msg)
|
|
logger.info("This is a known issue with Windows Store Python. Using fallback OCR.")
|
|
else:
|
|
self._error_msg = f"PaddleOCR initialization failed: {e}"
|
|
logger.error(self._error_msg)
|
|
|
|
return False
|
|
|
|
def _check_pytorch(self) -> bool:
|
|
"""
|
|
Check if PyTorch can be imported without DLL errors.
|
|
|
|
This is the critical check for Windows Store Python compatibility.
|
|
"""
|
|
try:
|
|
# Try importing torch - this is where DLL errors typically occur
|
|
import torch
|
|
|
|
# Try a simple operation to verify it works
|
|
_ = torch.__version__
|
|
|
|
logger.debug("PyTorch import successful")
|
|
return True
|
|
|
|
except ImportError:
|
|
self._error_msg = "PyTorch not installed"
|
|
logger.warning(self._error_msg)
|
|
return False
|
|
|
|
except OSError as e:
|
|
# This is the Windows Store Python DLL error
|
|
error_str = str(e).lower()
|
|
if 'dll' in error_str or 'c10' in error_str or 'specified module' in error_str:
|
|
self._dll_error = True
|
|
self._error_msg = (
|
|
f"PyTorch DLL load failed: {e}\n"
|
|
"This is a known issue with Windows Store Python.\n"
|
|
"Solutions:\n"
|
|
"1. Use Python from python.org instead of Windows Store\n"
|
|
"2. Install PyTorch with conda instead of pip\n"
|
|
"3. Use alternative OCR backend (EasyOCR, Tesseract, or OpenCV EAST)"
|
|
)
|
|
logger.error(self._error_msg)
|
|
else:
|
|
self._error_msg = f"PyTorch load failed: {e}"
|
|
logger.error(self._error_msg)
|
|
return False
|
|
|
|
except Exception as e:
|
|
self._error_msg = f"Unexpected PyTorch error: {e}"
|
|
logger.error(self._error_msg)
|
|
return False
|
|
|
|
def _check_gpu(self) -> bool:
|
|
"""Check if GPU is available for PaddleOCR."""
|
|
try:
|
|
import torch
|
|
|
|
if torch.cuda.is_available():
|
|
device_name = torch.cuda.get_device_name(0)
|
|
logger.info(f"CUDA available: {device_name}")
|
|
return True
|
|
|
|
# Check for MPS (Apple Silicon)
|
|
if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
|
|
logger.info("Apple MPS available")
|
|
return True
|
|
|
|
return False
|
|
|
|
except Exception as e:
|
|
logger.debug(f"GPU check failed: {e}")
|
|
return False
|
|
|
|
def extract_text(self, image: np.ndarray) -> List[OCRTextRegion]:
|
|
"""
|
|
Extract text from image using PaddleOCR.
|
|
|
|
Args:
|
|
image: Input image (BGR format from OpenCV)
|
|
|
|
Returns:
|
|
List of detected text regions with recognized text
|
|
"""
|
|
if not self._available or self.ocr is None:
|
|
logger.error("PaddleOCR backend not initialized")
|
|
return []
|
|
|
|
try:
|
|
# Preprocess image
|
|
processed = self.preprocess_image(image)
|
|
|
|
# Run OCR
|
|
result = self.ocr.ocr(processed, cls=True)
|
|
|
|
regions = []
|
|
if result and result[0]:
|
|
for line in result[0]:
|
|
if line is None:
|
|
continue
|
|
|
|
# Parse result: [bbox, (text, confidence)]
|
|
bbox, (text, conf) = line
|
|
|
|
# Calculate bounding box from polygon
|
|
x_coords = [p[0] for p in bbox]
|
|
y_coords = [p[1] for p in bbox]
|
|
|
|
x = int(min(x_coords))
|
|
y = int(min(y_coords))
|
|
w = int(max(x_coords) - x)
|
|
h = int(max(y_coords) - y)
|
|
|
|
regions.append(OCRTextRegion(
|
|
text=text.strip(),
|
|
confidence=float(conf),
|
|
bbox=(x, y, w, h),
|
|
language=self.lang
|
|
))
|
|
|
|
logger.debug(f"PaddleOCR detected {len(regions)} text regions")
|
|
return regions
|
|
|
|
except Exception as e:
|
|
logger.error(f"PaddleOCR extraction failed: {e}")
|
|
return []
|
|
|
|
def get_info(self):
|
|
"""Get backend information."""
|
|
info = super().get_info()
|
|
info.gpu_accelerated = self._gpu_available and self.use_gpu
|
|
if self._dll_error:
|
|
info.error_message = "PyTorch DLL error - incompatible with Windows Store Python"
|
|
return info
|
|
|
|
def has_dll_error(self) -> bool:
|
|
"""Check if this backend failed due to DLL error."""
|
|
return self._dll_error
|
|
|
|
@staticmethod
|
|
def diagnose_windows_store_python() -> dict:
|
|
"""
|
|
Diagnose if running Windows Store Python and potential issues.
|
|
|
|
Returns:
|
|
Dictionary with diagnostic information
|
|
"""
|
|
import sys
|
|
import platform
|
|
|
|
diag = {
|
|
'platform': platform.system(),
|
|
'python_version': sys.version,
|
|
'executable': sys.executable,
|
|
'is_windows_store': False,
|
|
'pytorch_importable': False,
|
|
'recommendations': []
|
|
}
|
|
|
|
# Check if Windows Store Python
|
|
exe_path = sys.executable.lower()
|
|
if 'windowsapps' in exe_path or 'microsoft' in exe_path:
|
|
diag['is_windows_store'] = True
|
|
diag['recommendations'].append(
|
|
"You are using Windows Store Python which has known DLL compatibility issues."
|
|
)
|
|
|
|
# Check PyTorch
|
|
try:
|
|
import torch
|
|
diag['pytorch_importable'] = True
|
|
diag['pytorch_version'] = torch.__version__
|
|
diag['pytorch_cuda'] = torch.cuda.is_available()
|
|
except Exception as e:
|
|
diag['pytorch_error'] = str(e)
|
|
diag['recommendations'].append(
|
|
"PyTorch cannot be loaded. Use alternative OCR backends."
|
|
)
|
|
|
|
if not diag['pytorch_importable'] and diag['is_windows_store']:
|
|
diag['recommendations'].extend([
|
|
"Install Python from https://python.org instead of Windows Store",
|
|
"Or use conda/miniconda for better compatibility",
|
|
"Recommended OCR backends: opencv_east, easyocr, tesseract"
|
|
])
|
|
|
|
return diag
|