Lemontropia-Suite/modules/ocr_backends/paddleocr_backend.py

"""
Lemontropia Suite - PaddleOCR Backend
High-accuracy OCR using PaddleOCR - best quality but heavy dependencies.
"""

import numpy as np
import logging
from typing import List, Optional

from . import BaseOCRBackend, OCRTextRegion

logger = logging.getLogger(__name__)


class PaddleOCRBackend(BaseOCRBackend):
    """
    OCR backend using PaddleOCR.

    Pros:
    - Best accuracy among open-source OCR
    - Good multilingual support
    - Fast with GPU

    Cons:
    - Heavy dependencies (PyTorch/PaddlePaddle)
    - Can fail with DLL errors on Windows Store Python
    - Large model download

    Installation: pip install paddleocr

    Note: This backend has special handling for PyTorch/Paddle DLL errors
    that commonly occur with Windows Store Python installations.
    """

    NAME = "paddleocr"
    SUPPORTS_GPU = True

    def __init__(self, use_gpu: bool = True, lang: str = 'en', **kwargs):
        super().__init__(use_gpu=use_gpu, lang=lang, **kwargs)

        self.ocr = None
        self._gpu_available = False
        self._dll_error = False  # Track if we hit a DLL error

        # Language mapping for PaddleOCR
        self.lang_map = {
            'en': 'en',
            'sv': 'latin',  # Swedish uses latin script
            'de': 'latin',
            'fr': 'latin',
            'es': 'latin',
            'latin': 'latin',
        }

        # Detection thresholds
        self.det_db_thresh = kwargs.get('det_db_thresh', 0.3)
        self.det_db_box_thresh = kwargs.get('det_db_box_thresh', 0.5)
        self.rec_thresh = kwargs.get('rec_thresh', 0.5)

    def _initialize(self) -> bool:
        """Initialize PaddleOCR with PyTorch DLL error handling."""
        try:
            # First, check if PyTorch is importable without DLL errors
            if not self._check_pytorch():
                return False

            # Import PaddleOCR
            from paddleocr import PaddleOCR as PPOCR

            # Map language
            paddle_lang = self.lang_map.get(self.lang, 'en')

            # Check GPU availability
            self._gpu_available = self._check_gpu()
            use_gpu_flag = self.use_gpu and self._gpu_available

            logger.info(f"Initializing PaddleOCR (lang={paddle_lang}, gpu={use_gpu_flag})")

            # Initialize PaddleOCR
            self.ocr = PPOCR(
                lang=paddle_lang,
                use_gpu=use_gpu_flag,
                show_log=False,
                use_angle_cls=True,
                det_db_thresh=self.det_db_thresh,
                det_db_box_thresh=self.det_db_box_thresh,
                rec_thresh=self.rec_thresh,
            )

            self._available = True
            self._version = "2.x"  # PaddleOCR doesn't expose version easily

            logger.info(f"PaddleOCR initialized successfully (GPU: {use_gpu_flag})")
            return True

        except ImportError as e:
            self._error_msg = f"PaddleOCR not installed. Run: pip install paddleocr"
            logger.warning(self._error_msg)
            return False

        except Exception as e:
            error_str = str(e).lower()

            # Check for common DLL-related errors
            if any(x in error_str for x in ['dll', 'c10', 'torch', 'paddle', 'lib']):
                self._dll_error = True
                self._error_msg = f"PaddleOCR DLL error (Windows Store Python?): {e}"
                logger.warning(self._error_msg)
                logger.info("This is a known issue with Windows Store Python. Using fallback OCR.")
            else:
                self._error_msg = f"PaddleOCR initialization failed: {e}"
                logger.error(self._error_msg)

            return False

    def _check_pytorch(self) -> bool:
        """
        Check if PyTorch can be imported without DLL errors.

        This is the critical check for Windows Store Python compatibility.
        """
        try:
            # Try importing torch - this is where DLL errors typically occur
            import torch

            # Try a simple operation to verify it works
            _ = torch.__version__

            logger.debug("PyTorch import successful")
            return True

        except ImportError:
            self._error_msg = "PyTorch not installed"
            logger.warning(self._error_msg)
            return False

        except OSError as e:
            # This is the Windows Store Python DLL error
            error_str = str(e).lower()
            if 'dll' in error_str or 'c10' in error_str or 'specified module' in error_str:
                self._dll_error = True
                self._error_msg = (
                    f"PyTorch DLL load failed: {e}\n"
                    "This is a known issue with Windows Store Python.\n"
                    "Solutions:\n"
                    "1. Use Python from python.org instead of Windows Store\n"
                    "2. Install PyTorch with conda instead of pip\n"
                    "3. Use alternative OCR backend (EasyOCR, Tesseract, or OpenCV EAST)"
                )
                logger.error(self._error_msg)
            else:
                self._error_msg = f"PyTorch load failed: {e}"
                logger.error(self._error_msg)
            return False

        except Exception as e:
            self._error_msg = f"Unexpected PyTorch error: {e}"
            logger.error(self._error_msg)
            return False

    def _check_gpu(self) -> bool:
        """Check if GPU is available for PaddleOCR."""
        try:
            import torch

            if torch.cuda.is_available():
                device_name = torch.cuda.get_device_name(0)
                logger.info(f"CUDA available: {device_name}")
                return True

            # Check for MPS (Apple Silicon)
            if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
                logger.info("Apple MPS available")
                return True

            return False

        except Exception as e:
            logger.debug(f"GPU check failed: {e}")
            return False

    def extract_text(self, image: np.ndarray) -> List[OCRTextRegion]:
        """
        Extract text from image using PaddleOCR.

        Args:
            image: Input image (BGR format from OpenCV)

        Returns:
            List of detected text regions with recognized text
        """
        if not self._available or self.ocr is None:
            logger.error("PaddleOCR backend not initialized")
            return []

        try:
            # Preprocess image
            processed = self.preprocess_image(image)

            # Run OCR
            result = self.ocr.ocr(processed, cls=True)

            regions = []
            if result and result[0]:
                for line in result[0]:
                    if line is None:
                        continue

                    # Parse result: [bbox, (text, confidence)]
                    bbox, (text, conf) = line

                    # Calculate bounding box from polygon
                    x_coords = [p[0] for p in bbox]
                    y_coords = [p[1] for p in bbox]

                    x = int(min(x_coords))
                    y = int(min(y_coords))
                    w = int(max(x_coords) - x)
                    h = int(max(y_coords) - y)

                    regions.append(OCRTextRegion(
                        text=text.strip(),
                        confidence=float(conf),
                        bbox=(x, y, w, h),
                        language=self.lang
                    ))

            logger.debug(f"PaddleOCR detected {len(regions)} text regions")
            return regions

        except Exception as e:
            logger.error(f"PaddleOCR extraction failed: {e}")
            return []

    def get_info(self):
        """Get backend information."""
        info = super().get_info()
        info.gpu_accelerated = self._gpu_available and self.use_gpu
        if self._dll_error:
            info.error_message = "PyTorch DLL error - incompatible with Windows Store Python"
        return info

    def has_dll_error(self) -> bool:
        """Check if this backend failed due to DLL error."""
        return self._dll_error

    @staticmethod
    def diagnose_windows_store_python() -> dict:
        """
        Diagnose if running Windows Store Python and potential issues.

        Returns:
            Dictionary with diagnostic information
        """
        import sys
        import platform

        diag = {
            'platform': platform.system(),
            'python_version': sys.version,
            'executable': sys.executable,
            'is_windows_store': False,
            'pytorch_importable': False,
            'recommendations': []
        }

        # Check if Windows Store Python
        exe_path = sys.executable.lower()
        if 'windowsapps' in exe_path or 'microsoft' in exe_path:
            diag['is_windows_store'] = True
            diag['recommendations'].append(
                "You are using Windows Store Python which has known DLL compatibility issues."
            )

        # Check PyTorch
        try:
            import torch
            diag['pytorch_importable'] = True
            diag['pytorch_version'] = torch.__version__
            diag['pytorch_cuda'] = torch.cuda.is_available()
        except Exception as e:
            diag['pytorch_error'] = str(e)
            diag['recommendations'].append(
                "PyTorch cannot be loaded. Use alternative OCR backends."
            )

        if not diag['pytorch_importable'] and diag['is_windows_store']:
            diag['recommendations'].extend([
                "Install Python from https://python.org instead of Windows Store",
                "Or use conda/miniconda for better compatibility",
                "Recommended OCR backends: opencv_east, easyocr, tesseract"
            ])

        return diag