""" EU-Utility - OCR Service Core Module Screen capture and OCR functionality for all plugins. Part of core - not a plugin. Plugins access via PluginAPI. """ import io import base64 from typing import Dict, List, Tuple, Optional, Any from dataclasses import dataclass from pathlib import Path try: import numpy as np NUMPY_AVAILABLE = True except ImportError: NUMPY_AVAILABLE = False np = None @dataclass class OCRResult: """Result from OCR operation.""" text: str confidence: float bounding_box: Tuple[int, int, int, int] # x, y, width, height raw_data: Any = None class OCRService: """ Core OCR service with multiple backend support. Fallback chain: EasyOCR -> Tesseract -> PaddleOCR LAZY INITIALIZATION - only loads when first used """ def __init__(self): self._ocr_reader = None self._backend = None self._initialized = False self._initializing = False def _init_backends(self): """Initialize available OCR backends (lazy - called on first use).""" if self._initialized or self._initializing: return self._initializing = True print("[OCR] Initializing backends...") # Try EasyOCR first (best accuracy) try: import easyocr self._ocr_reader = easyocr.Reader(['en'], gpu=False, verbose=False) self._backend = 'easyocr' self._initialized = True print("[OCR] Using EasyOCR backend") self._initializing = False return except ImportError: pass except Exception as e: print(f"[OCR] EasyOCR failed: {e}") # Try Tesseract (most common) try: import pytesseract from PIL import Image pytesseract.get_tesseract_version() self._backend = 'tesseract' self._initialized = True print("[OCR] Using Tesseract backend") self._initializing = False return except Exception as e: print(f"[OCR] Tesseract failed: {e}") # Try PaddleOCR (fallback) - with minimal config try: from paddleocr import PaddleOCR # Use minimal config to avoid model downloads on init import os os.environ['PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK'] = 'True' self._ocr_reader = PaddleOCR( lang='en', show_log=False, use_gpu=False # This param may not work in all versions ) self._backend = 'paddle' self._initialized = True print("[OCR] Using PaddleOCR backend") except TypeError: # Try without use_gpu if it failed try: self._ocr_reader = PaddleOCR(lang='en', show_log=False) self._backend = 'paddle' self._initialized = True print("[OCR] Using PaddleOCR backend (no GPU)") except Exception as e2: print(f"[OCR] PaddleOCR failed: {e2}") except Exception as e: print(f"[OCR] PaddleOCR failed: {e}") self._initializing = False if not self._initialized: print("[OCR] WARNING: No OCR backend available!") print("[OCR] Install one of: easyocr, pytesseract, paddleocr") def is_available(self) -> bool: """Check if OCR is available (lazy init).""" if not self._initialized and not self._initializing: self._init_backends() return self._initialized def capture_screen(self, region: Tuple[int, int, int, int] = None) -> 'Image.Image': """ Capture screen or region using the ScreenshotService. Args: region: (x, y, width, height) or None for full screen Returns: PIL Image """ try: from core.screenshot import get_screenshot_service screenshot_service = get_screenshot_service() if region: x, y, width, height = region return screenshot_service.capture_region(x, y, width, height) else: return screenshot_service.capture(full_screen=True) except Exception as e: print(f"[OCR] Screenshot service failed, falling back: {e}") # Fallback to direct pyautogui capture try: import pyautogui if region: x, y, width, height = region return pyautogui.screenshot(region=(x, y, width, height)) else: return pyautogui.screenshot() except ImportError: raise RuntimeError("pyautogui not installed. Run: pip install pyautogui") def recognize(self, image=None, region: Tuple[int, int, int, int] = None) -> Dict[str, Any]: """ Perform OCR on image or screen region. Args: image: PIL Image, numpy array, or None to capture screen region: Screen region to capture (if image is None) Returns: Dict with 'text', 'confidence', 'results', 'image_size' """ # Lazy initialization if not self._initialized and not self._initializing: self._init_backends() if not self._initialized: return { 'text': '', 'confidence': 0, 'error': 'OCR not initialized - no backend available', 'results': [] } try: # Capture if needed if image is None: image = self.capture_screen(region) # Convert to appropriate format if self._backend == 'easyocr': return self._ocr_easyocr(image) elif self._backend == 'tesseract': return self._ocr_tesseract(image) elif self._backend == 'paddle': return self._ocr_paddle(image) else: return { 'text': '', 'confidence': 0, 'error': 'Unknown backend', 'results': [] } except Exception as e: return { 'text': '', 'confidence': 0, 'error': str(e), 'results': [] } def recognize_image(self, image) -> Dict[str, Any]: """ Perform OCR on a PIL Image. Convenience alias for recognize(image=image). Args: image: PIL Image to OCR Returns: Dict with 'text', 'confidence', 'results', 'image_size' """ return self.recognize(image=image) def _ocr_easyocr(self, image) -> Dict[str, Any]: """OCR using EasyOCR.""" import numpy as np # Convert PIL to numpy if hasattr(image, 'convert'): image_np = np.array(image) else: image_np = image results = self._ocr_reader.readtext(image_np) # Parse results texts = [] total_confidence = 0 parsed_results = [] for (bbox, text, conf) in results: texts.append(text) total_confidence += conf # Get bounding box x_coords = [p[0] for p in bbox] y_coords = [p[1] for p in bbox] x_min, x_max = min(x_coords), max(x_coords) y_min, y_max = min(y_coords), max(y_coords) parsed_results.append(OCRResult( text=text, confidence=conf, bounding_box=(int(x_min), int(y_min), int(x_max-x_min), int(y_max-y_min)), raw_data={'bbox': bbox} )) avg_confidence = total_confidence / len(results) if results else 0 return { 'text': ' '.join(texts), 'confidence': avg_confidence, 'results': parsed_results, 'image_size': image.size if hasattr(image, 'size') else None } def _ocr_tesseract(self, image) -> Dict[str, Any]: """OCR using Tesseract.""" import pytesseract # Get full text text = pytesseract.image_to_string(image).strip() # Get detailed data data = pytesseract.image_to_data(image, output_type=pytesseract.Output.DICT) parsed_results = [] for i, word in enumerate(data['text']): if word.strip(): conf = int(data['conf'][i]) if conf > 0: # Valid confidence parsed_results.append(OCRResult( text=word, confidence=conf / 100.0, bounding_box=( data['left'][i], data['top'][i], data['width'][i], data['height'][i] ), raw_data={'block_num': data['block_num'][i]} )) avg_confidence = sum(r.confidence for r in parsed_results) / len(parsed_results) if parsed_results else 0 return { 'text': text, 'confidence': avg_confidence, 'results': parsed_results, 'image_size': image.size if hasattr(image, 'size') else None } def _ocr_paddle(self, image) -> Dict[str, Any]: """OCR using PaddleOCR.""" import numpy as np # Convert PIL to numpy if hasattr(image, 'convert'): image_np = np.array(image) else: image_np = image result = self._ocr_reader.ocr(image_np, cls=True) texts = [] parsed_results = [] total_confidence = 0 if result and result[0]: for line in result[0]: bbox, (text, conf) = line texts.append(text) total_confidence += conf # Parse bounding box x_coords = [p[0] for p in bbox] y_coords = [p[1] for p in bbox] parsed_results.append(OCRResult( text=text, confidence=conf, bounding_box=( int(min(x_coords)), int(min(y_coords)), int(max(x_coords) - min(x_coords)), int(max(y_coords) - min(y_coords)) ), raw_data={'bbox': bbox} )) avg_confidence = total_confidence / len(parsed_results) if parsed_results else 0 return { 'text': ' '.join(texts), 'confidence': avg_confidence, 'results': parsed_results, 'image_size': image.size if hasattr(image, 'size') else None } def recognize_region(self, x: int, y: int, width: int, height: int) -> Dict[str, Any]: """Convenience method for region OCR.""" return self.recognize(region=(x, y, width, height)) def find_text(self, target_text: str, image=None, region: Tuple[int, int, int, int] = None) -> List[OCRResult]: """ Find specific text in image. Returns list of OCRResult where target_text is found. """ result = self.recognize(image, region) matches = [] for r in result.get('results', []): if target_text.lower() in r.text.lower(): matches.append(r) return matches def get_text_at_position(self, x: int, y: int, image=None) -> Optional[str]: """Get text at specific screen position.""" # Small region around point region = (x - 50, y - 10, 100, 20) result = self.recognize(image, region) return result.get('text') if result.get('text') else None # Singleton instance _ocr_service = None def get_ocr_service() -> OCRService: """Get global OCRService instance.""" global _ocr_service if _ocr_service is None: _ocr_service = OCRService() return _ocr_service # Convenience function for quick OCR def quick_ocr(region: Tuple[int, int, int, int] = None) -> str: """ Quick OCR - capture and get text. Usage: text = quick_ocr() # Full screen text = quick_ocr((100, 100, 200, 50)) # Region """ service = get_ocr_service() result = service.recognize(region=region) return result.get('text', '')