EU-Utility/core/ocr_service.py

"""
EU-Utility - OCR Service Core Module

Screen capture and OCR functionality for all plugins.
Part of core - not a plugin. Plugins access via PluginAPI.
"""

import io
import base64
from typing import Dict, List, Tuple, Optional, Any
from dataclasses import dataclass
from pathlib import Path

try:
    import numpy as np
    NUMPY_AVAILABLE = True
except ImportError:
    NUMPY_AVAILABLE = False
    np = None


@dataclass
class OCRResult:
    """Result from OCR operation."""
    text: str
    confidence: float
    bounding_box: Tuple[int, int, int, int]  # x, y, width, height
    raw_data: Any = None


class OCRService:
    """
    Core OCR service with multiple backend support.
    Fallback chain: EasyOCR -> Tesseract -> PaddleOCR
    LAZY INITIALIZATION - only loads when first used
    """

    def __init__(self):
        self._ocr_reader = None
        self._backend = None
        self._initialized = False
        self._initializing = False

    def _init_backends(self):
        """Initialize available OCR backends (lazy - called on first use)."""
        if self._initialized or self._initializing:
            return

        self._initializing = True
        print("[OCR] Initializing backends...")

        # Try EasyOCR first (best accuracy)
        try:
            import easyocr
            self._ocr_reader = easyocr.Reader(['en'], gpu=False, verbose=False)
            self._backend = 'easyocr'
            self._initialized = True
            print("[OCR] Using EasyOCR backend")
            self._initializing = False
            return
        except ImportError:
            pass
        except Exception as e:
            print(f"[OCR] EasyOCR failed: {e}")

        # Try Tesseract (most common)
        try:
            import pytesseract
            from PIL import Image
            pytesseract.get_tesseract_version()
            self._backend = 'tesseract'
            self._initialized = True
            print("[OCR] Using Tesseract backend")
            self._initializing = False
            return
        except Exception as e:
            print(f"[OCR] Tesseract failed: {e}")

        # Try PaddleOCR (fallback) - with minimal config
        try:
            from paddleocr import PaddleOCR
            # Use minimal config to avoid model downloads on init
            import os
            os.environ['PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK'] = 'True'

            self._ocr_reader = PaddleOCR(
                lang='en',
                show_log=False,
                use_gpu=False  # This param may not work in all versions
            )
            self._backend = 'paddle'
            self._initialized = True
            print("[OCR] Using PaddleOCR backend")
        except TypeError:
            # Try without use_gpu if it failed
            try:
                self._ocr_reader = PaddleOCR(lang='en', show_log=False)
                self._backend = 'paddle'
                self._initialized = True
                print("[OCR] Using PaddleOCR backend (no GPU)")
            except Exception as e2:
                print(f"[OCR] PaddleOCR failed: {e2}")
        except Exception as e:
            print(f"[OCR] PaddleOCR failed: {e}")

        self._initializing = False

        if not self._initialized:
            print("[OCR] WARNING: No OCR backend available!")
            print("[OCR] Install one of: easyocr, pytesseract, paddleocr")

    def is_available(self) -> bool:
        """Check if OCR is available (lazy init)."""
        if not self._initialized and not self._initializing:
            self._init_backends()
        return self._initialized

    def capture_screen(self, region: Tuple[int, int, int, int] = None) -> 'Image.Image':
        """
        Capture screen or region using the ScreenshotService.

        Args:
            region: (x, y, width, height) or None for full screen

        Returns:
            PIL Image
        """
        try:
            from core.screenshot import get_screenshot_service
            screenshot_service = get_screenshot_service()

            if region:
                x, y, width, height = region
                return screenshot_service.capture_region(x, y, width, height)
            else:
                return screenshot_service.capture(full_screen=True)

        except Exception as e:
            print(f"[OCR] Screenshot service failed, falling back: {e}")
            # Fallback to direct pyautogui capture
            try:
                import pyautogui

                if region:
                    x, y, width, height = region
                    return pyautogui.screenshot(region=(x, y, width, height))
                else:
                    return pyautogui.screenshot()

            except ImportError:
                raise RuntimeError("pyautogui not installed. Run: pip install pyautogui")

    def recognize(self, image=None, region: Tuple[int, int, int, int] = None) -> Dict[str, Any]:
        """
        Perform OCR on image or screen region.

        Args:
            image: PIL Image, numpy array, or None to capture screen
            region: Screen region to capture (if image is None)

        Returns:
            Dict with 'text', 'confidence', 'results', 'image_size'
        """
        # Lazy initialization
        if not self._initialized and not self._initializing:
            self._init_backends()

        if not self._initialized:
            return {
                'text': '',
                'confidence': 0,
                'error': 'OCR not initialized - no backend available',
                'results': []
            }

        try:
            # Capture if needed
            if image is None:
                image = self.capture_screen(region)

            # Convert to appropriate format
            if self._backend == 'easyocr':
                return self._ocr_easyocr(image)
            elif self._backend == 'tesseract':
                return self._ocr_tesseract(image)
            elif self._backend == 'paddle':
                return self._ocr_paddle(image)
            else:
                return {
                    'text': '',
                    'confidence': 0,
                    'error': 'Unknown backend',
                    'results': []
                }

        except Exception as e:
            return {
                'text': '',
                'confidence': 0,
                'error': str(e),
                'results': []
            }

    def recognize_image(self, image) -> Dict[str, Any]:
        """
        Perform OCR on a PIL Image.
        Convenience alias for recognize(image=image).

        Args:
            image: PIL Image to OCR

        Returns:
            Dict with 'text', 'confidence', 'results', 'image_size'
        """
        return self.recognize(image=image)

    def _ocr_easyocr(self, image) -> Dict[str, Any]:
        """OCR using EasyOCR."""
        import numpy as np

        # Convert PIL to numpy
        if hasattr(image, 'convert'):
            image_np = np.array(image)
        else:
            image_np = image

        results = self._ocr_reader.readtext(image_np)

        # Parse results
        texts = []
        total_confidence = 0
        parsed_results = []

        for (bbox, text, conf) in results:
            texts.append(text)
            total_confidence += conf

            # Get bounding box
            x_coords = [p[0] for p in bbox]
            y_coords = [p[1] for p in bbox]
            x_min, x_max = min(x_coords), max(x_coords)
            y_min, y_max = min(y_coords), max(y_coords)

            parsed_results.append(OCRResult(
                text=text,
                confidence=conf,
                bounding_box=(int(x_min), int(y_min), int(x_max-x_min), int(y_max-y_min)),
                raw_data={'bbox': bbox}
            ))

        avg_confidence = total_confidence / len(results) if results else 0

        return {
            'text': ' '.join(texts),
            'confidence': avg_confidence,
            'results': parsed_results,
            'image_size': image.size if hasattr(image, 'size') else None
        }

    def _ocr_tesseract(self, image) -> Dict[str, Any]:
        """OCR using Tesseract."""
        import pytesseract

        # Get full text
        text = pytesseract.image_to_string(image).strip()

        # Get detailed data
        data = pytesseract.image_to_data(image, output_type=pytesseract.Output.DICT)

        parsed_results = []
        for i, word in enumerate(data['text']):
            if word.strip():
                conf = int(data['conf'][i])
                if conf > 0:  # Valid confidence
                    parsed_results.append(OCRResult(
                        text=word,
                        confidence=conf / 100.0,
                        bounding_box=(
                            data['left'][i],
                            data['top'][i],
                            data['width'][i],
                            data['height'][i]
                        ),
                        raw_data={'block_num': data['block_num'][i]}
                    ))

        avg_confidence = sum(r.confidence for r in parsed_results) / len(parsed_results) if parsed_results else 0

        return {
            'text': text,
            'confidence': avg_confidence,
            'results': parsed_results,
            'image_size': image.size if hasattr(image, 'size') else None
        }

    def _ocr_paddle(self, image) -> Dict[str, Any]:
        """OCR using PaddleOCR."""
        import numpy as np

        # Convert PIL to numpy
        if hasattr(image, 'convert'):
            image_np = np.array(image)
        else:
            image_np = image

        result = self._ocr_reader.ocr(image_np, cls=True)

        texts = []
        parsed_results = []
        total_confidence = 0

        if result and result[0]:
            for line in result[0]:
                bbox, (text, conf) = line
                texts.append(text)
                total_confidence += conf

                # Parse bounding box
                x_coords = [p[0] for p in bbox]
                y_coords = [p[1] for p in bbox]

                parsed_results.append(OCRResult(
                    text=text,
                    confidence=conf,
                    bounding_box=(
                        int(min(x_coords)),
                        int(min(y_coords)),
                        int(max(x_coords) - min(x_coords)),
                        int(max(y_coords) - min(y_coords))
                    ),
                    raw_data={'bbox': bbox}
                ))

        avg_confidence = total_confidence / len(parsed_results) if parsed_results else 0

        return {
            'text': ' '.join(texts),
            'confidence': avg_confidence,
            'results': parsed_results,
            'image_size': image.size if hasattr(image, 'size') else None
        }

    def recognize_region(self, x: int, y: int, width: int, height: int) -> Dict[str, Any]:
        """Convenience method for region OCR."""
        return self.recognize(region=(x, y, width, height))

    def find_text(self, target_text: str, image=None, region: Tuple[int, int, int, int] = None) -> List[OCRResult]:
        """
        Find specific text in image.

        Returns list of OCRResult where target_text is found.
        """
        result = self.recognize(image, region)
        matches = []

        for r in result.get('results', []):
            if target_text.lower() in r.text.lower():
                matches.append(r)

        return matches

    def get_text_at_position(self, x: int, y: int, image=None) -> Optional[str]:
        """Get text at specific screen position."""
        # Small region around point
        region = (x - 50, y - 10, 100, 20)
        result = self.recognize(image, region)
        return result.get('text') if result.get('text') else None


# Singleton instance
_ocr_service = None

def get_ocr_service() -> OCRService:
    """Get global OCRService instance."""
    global _ocr_service
    if _ocr_service is None:
        _ocr_service = OCRService()
    return _ocr_service


# Convenience function for quick OCR
def quick_ocr(region: Tuple[int, int, int, int] = None) -> str:
    """
    Quick OCR - capture and get text.

    Usage:
        text = quick_ocr()  # Full screen
        text = quick_ocr((100, 100, 200, 50))  # Region
    """
    service = get_ocr_service()
    result = service.recognize(region=region)
    return result.get('text', '')