""" Lemontropia Suite - Game Vision AI Module Advanced computer vision with multiple OCR backends and GPU acceleration. OCR Backends (in priority order): 1. OpenCV EAST - Fastest, no dependencies (primary fallback) 2. EasyOCR - Good accuracy, lighter than PaddleOCR 3. Tesseract OCR - Traditional, stable 4. PaddleOCR - Best accuracy (requires working PyTorch) Handles PyTorch DLL errors on Windows Store Python gracefully. """ import cv2 import numpy as np import logging import time from pathlib import Path from dataclasses import dataclass, field from typing import Optional, Tuple, List, Dict, Any, Union from enum import Enum import json import hashlib logger = logging.getLogger(__name__) # Import hardware detection from .hardware_detection import ( HardwareDetector, HardwareInfo, GPUBackend, recommend_ocr_backend, get_hardware_info ) # Import OCR backends from .ocr_backends import ( BaseOCRBackend, OCRTextRegion, OCRBackendInfo, OCRBackendFactory ) @dataclass class TextRegion: """Detected text region with metadata.""" text: str confidence: float bbox: Tuple[int, int, int, int] # x, y, w, h language: str = "en" backend: str = "unknown" # Which OCR backend detected this def to_dict(self) -> Dict[str, Any]: return { 'text': self.text, 'confidence': self.confidence, 'bbox': self.bbox, 'language': self.language, 'backend': self.backend } @classmethod def from_ocr_region(cls, region: OCRTextRegion, backend: str = "unknown"): """Create from OCR backend region.""" return cls( text=region.text, confidence=region.confidence, bbox=region.bbox, language=region.language, backend=backend ) @dataclass class IconRegion: """Detected icon region with metadata.""" image: np.ndarray bbox: Tuple[int, int, int, int] # x, y, w, h confidence: float icon_hash: str = "" def __post_init__(self): if not self.icon_hash: self.icon_hash = self._compute_hash() def _compute_hash(self) -> str: """Compute perceptual hash of icon.""" if self.image is None or self.image.size == 0: return "" # Resize to standard size and compute average hash small = cv2.resize(self.image, (16, 16), interpolation=cv2.INTER_AREA) gray = cv2.cvtColor(small, cv2.COLOR_BGR2GRAY) if len(small.shape) == 3 else small avg = gray.mean() hash_bits = (gray > avg).flatten() return ''.join(['1' if b else '0' for b in hash_bits]) @dataclass class ItemMatch: """Result of matching an icon to database.""" name: str confidence: float item_id: Optional[str] = None category: Optional[str] = None matched_hash: str = "" @dataclass class VisionResult: """Complete vision processing result.""" text_regions: List[TextRegion] = field(default_factory=list) icon_regions: List[IconRegion] = field(default_factory=list) processing_time_ms: float = 0.0 gpu_backend: str = "cpu" ocr_backend: str = "unknown" timestamp: float = field(default_factory=time.time) def to_dict(self) -> Dict[str, Any]: return { 'text_regions': [t.to_dict() for t in self.text_regions], 'icon_count': len(self.icon_regions), 'processing_time_ms': self.processing_time_ms, 'gpu_backend': self.gpu_backend, 'ocr_backend': self.ocr_backend, 'timestamp': self.timestamp } class GPUDetector: """Detect and manage GPU availability.""" @staticmethod def detect_backend() -> GPUBackend: """Detect best available GPU backend.""" info = HardwareDetector.detect_all() return info.gpu_backend @staticmethod def get_gpu_info() -> Dict[str, Any]: """Get detailed GPU information.""" info = HardwareDetector.detect_all() return info.to_dict() class UnifiedOCRProcessor: """ Unified OCR processor with multiple backend support. Automatically selects the best available backend based on: 1. Hardware capabilities 2. PyTorch DLL compatibility 3. User preferences Gracefully falls through backends if one fails. """ SUPPORTED_LANGUAGES = ['en', 'sv', 'latin', 'de', 'fr', 'es'] # Default priority (can be overridden) DEFAULT_PRIORITY = [ 'paddleocr', # Best accuracy if available 'easyocr', # Good balance 'tesseract', # Stable fallback 'opencv_east', # Fastest, always works ] def __init__(self, use_gpu: bool = True, lang: str = 'en', backend_priority: Optional[List[str]] = None, auto_select: bool = True): """ Initialize Unified OCR Processor. Args: use_gpu: Enable GPU acceleration if available lang: Language for OCR ('en', 'sv', 'latin', etc.) backend_priority: Custom backend priority order auto_select: Automatically select best backend """ self.use_gpu = use_gpu self.lang = lang if lang in self.SUPPORTED_LANGUAGES else 'en' self.backend_priority = backend_priority or self.DEFAULT_PRIORITY self._backend: Optional[BaseOCRBackend] = None self._backend_name: str = "unknown" self._hardware_info: HardwareInfo = HardwareDetector.detect_all() # Initialize if auto_select: self._auto_select_backend() logger.info(f"UnifiedOCR initialized with backend: {self._backend_name}") def _auto_select_backend(self): """Automatically select the best available backend.""" # Check for PyTorch DLL errors first if self._hardware_info.pytorch_dll_error: logger.warning( "PyTorch DLL error detected - avoiding PyTorch-based backends" ) # Remove PyTorch-dependent backends from priority safe_backends = [ b for b in self.backend_priority if b not in ['paddleocr', 'easyocr'] ] else: safe_backends = self.backend_priority # Get recommended backend recommended = HardwareDetector.recommend_ocr_backend() # Try to create backend for name in safe_backends: backend = OCRBackendFactory.create_backend( name, use_gpu=self.use_gpu, lang=self.lang ) if backend is not None and backend.is_available(): self._backend = backend self._backend_name = name logger.info(f"Selected OCR backend: {name}") return # Ultimate fallback - OpenCV EAST always works logger.warning("All preferred backends failed, trying OpenCV EAST...") backend = OCRBackendFactory.create_backend( 'opencv_east', use_gpu=self.use_gpu, lang=self.lang ) if backend is not None and backend.is_available(): self._backend = backend self._backend_name = 'opencv_east' logger.info("Using OpenCV EAST as ultimate fallback") else: logger.error("CRITICAL: No OCR backend available!") def set_backend(self, name: str) -> bool: """ Manually set OCR backend. Args: name: Backend name ('paddleocr', 'easyocr', 'tesseract', 'opencv_east') Returns: True if successful """ backend = OCRBackendFactory.create_backend( name, use_gpu=self.use_gpu, lang=self.lang ) if backend is not None and backend.is_available(): self._backend = backend self._backend_name = name logger.info(f"Switched to OCR backend: {name}") return True else: logger.error(f"Failed to switch to OCR backend: {name}") return False def extract_text(self, image: Union[str, np.ndarray, Path]) -> List[TextRegion]: """ Extract text from image using selected backend. Args: image: Image path or numpy array Returns: List of detected text regions """ # Load image if path provided if isinstance(image, (str, Path)): img = cv2.imread(str(image)) if img is None: logger.error(f"Failed to load image: {image}") return [] else: img = image.copy() # Check backend if self._backend is None: logger.error("No OCR backend available") return [] try: # Extract text using backend ocr_regions = self._backend.extract_text(img) # Convert to TextRegion with backend info regions = [ TextRegion.from_ocr_region(r, self._backend_name) for r in ocr_regions ] logger.debug(f"Extracted {len(regions)} text regions using {self._backend_name}") return regions except Exception as e: logger.error(f"OCR extraction failed: {e}") return [] def extract_text_from_region(self, image: np.ndarray, region: Tuple[int, int, int, int]) -> List[TextRegion]: """Extract text from specific region of image.""" x, y, w, h = region roi = image[y:y+h, x:x+w] if roi.size == 0: return [] regions = self.extract_text(roi) # Adjust coordinates back to original image for r in regions: rx, ry, rw, rh = r.bbox r.bbox = (x + rx, y + ry, rw, rh) return regions def get_available_backends(self) -> List[OCRBackendInfo]: """Get information about all available backends.""" return OCRBackendFactory.check_all_backends(self.use_gpu, self.lang) def get_current_backend(self) -> str: """Get name of current backend.""" return self._backend_name def get_backend_info(self) -> Dict[str, Any]: """Get information about current backend.""" if self._backend: return self._backend.get_info().to_dict() return {"error": "No backend initialized"} def is_recognition_supported(self) -> bool: """ Check if current backend supports text recognition. Note: OpenCV EAST only detects text regions, doesn't recognize text. """ return self._backend_name not in ['opencv_east'] # Legacy class for backward compatibility class OCRProcessor(UnifiedOCRProcessor): """Legacy OCR processor - now wraps UnifiedOCRProcessor.""" pass class IconDetector: """Detect and extract item icons from game UI.""" # Typical Entropia Universe loot window icon sizes ICON_SIZES = { 'small': (32, 32), 'medium': (48, 48), 'large': (64, 64), 'hud': (40, 40) } def __init__(self, template_dir: Optional[Path] = None): self.template_dir = template_dir or Path(__file__).parent / "templates" / "icons" self.templates: Dict[str, np.ndarray] = {} self._load_templates() def _load_templates(self): """Load icon templates for matching.""" if not self.template_dir.exists(): logger.warning(f"Template directory not found: {self.template_dir}") return for template_file in self.template_dir.glob("*.png"): try: name = template_file.stem template = cv2.imread(str(template_file), cv2.IMREAD_COLOR) if template is not None: self.templates[name] = template logger.debug(f"Loaded icon template: {name}") except Exception as e: logger.error(f"Failed to load template {template_file}: {e}") def detect_loot_window(self, image: np.ndarray) -> Optional[Tuple[int, int, int, int]]: """Detect loot window in screenshot.""" # Look for common loot window indicators if 'loot_window' in self.templates: result = cv2.matchTemplate( image, self.templates['loot_window'], cv2.TM_CCOEFF_NORMED ) _, max_val, _, max_loc = cv2.minMaxLoc(result) if max_val > 0.7: h, w = self.templates['loot_window'].shape[:2] return (*max_loc, w, h) # Method 2: Detect based on typical loot window characteristics gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) _, thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY) contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # Filter for icon-sized squares potential_icons = [] for cnt in contours: x, y, w, h = cv2.boundingRect(cnt) aspect = w / h if h > 0 else 0 for size_name, (sw, sh) in self.ICON_SIZES.items(): if abs(w - sw) < 5 and abs(h - sh) < 5 and 0.8 < aspect < 1.2: potential_icons.append((x, y, w, h)) break # If we found multiple icons in a grid pattern, assume loot window if len(potential_icons) >= 2: xs = [p[0] for p in potential_icons] ys = [p[1] for p in potential_icons] ws = [p[2] for p in potential_icons] hs = [p[3] for p in potential_icons] min_x, max_x = min(xs), max(xs) + max(ws) min_y, max_y = min(ys), max(ys) + max(hs) padding = 20 return ( max(0, min_x - padding), max(0, min_y - padding), max_x - min_x + padding * 2, max_y - min_y + padding * 2 ) return None def extract_icons_from_region(self, image: np.ndarray, region: Tuple[int, int, int, int], icon_size: str = 'medium') -> List[IconRegion]: """Extract icons from a specific region.""" x, y, w, h = region roi = image[y:y+h, x:x+w] if roi.size == 0: return [] target_size = self.ICON_SIZES.get(icon_size, (48, 48)) gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY) icons = [] thresholds = [(200, 255), (180, 255), (150, 255)] for thresh_low, thresh_high in thresholds: _, thresh = cv2.threshold(gray, thresh_low, thresh_high, cv2.THRESH_BINARY) contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) for cnt in contours: cx, cy, cw, ch = cv2.boundingRect(cnt) aspect = cw / ch if ch > 0 else 0 if (abs(cw - target_size[0]) < 8 and abs(ch - target_size[1]) < 8 and 0.7 < aspect < 1.3): icon_img = roi[cy:cy+ch, cx:cx+cw] icon_img = cv2.resize(icon_img, target_size, interpolation=cv2.INTER_AREA) icons.append(IconRegion( image=icon_img, bbox=(x + cx, y + cy, cw, ch), confidence=0.8 )) # Remove duplicates unique_icons = self._remove_duplicate_icons(icons) return unique_icons def _remove_duplicate_icons(self, icons: List[IconRegion], iou_threshold: float = 0.5) -> List[IconRegion]: """Remove duplicate icons based on IoU.""" if not icons: return [] sorted_icons = sorted(icons, key=lambda x: x.confidence, reverse=True) kept = [] for icon in sorted_icons: is_duplicate = False for kept_icon in kept: if self._calculate_iou(icon.bbox, kept_icon.bbox) > iou_threshold: is_duplicate = True break if not is_duplicate: kept.append(icon) return kept def _calculate_iou(self, box1: Tuple[int, int, int, int], box2: Tuple[int, int, int, int]) -> float: """Calculate Intersection over Union.""" x1, y1, w1, h1 = box1 x2, y2, w2, h2 = box2 xi1 = max(x1, x2) yi1 = max(y1, y2) xi2 = min(x1 + w1, x2 + w2) yi2 = min(y1 + h1, y2 + h2) inter_area = max(0, xi2 - xi1) * max(0, yi2 - yi1) box1_area = w1 * h1 box2_area = w2 * h2 union_area = box1_area + box2_area - inter_area return inter_area / union_area if union_area > 0 else 0 class GameVisionAI: """ Main AI vision interface for game screenshot analysis. Combines OCR and icon detection with multiple backend support. """ def __init__(self, use_gpu: bool = True, ocr_lang: str = 'en', ocr_backend: Optional[str] = None, data_dir: Optional[Path] = None): """ Initialize Game Vision AI. Args: use_gpu: Enable GPU acceleration if available ocr_lang: Language for OCR ocr_backend: Specific OCR backend to use (None for auto) data_dir: Directory for storing extracted data """ self.use_gpu = use_gpu self.data_dir = data_dir or Path.home() / ".lemontropia" self.extracted_icons_dir = self.data_dir / "extracted_icons" self.extracted_icons_dir.mkdir(parents=True, exist_ok=True) # Detect hardware self.hardware_info = HardwareDetector.detect_all() self.backend = self.hardware_info.gpu_backend # Initialize OCR processor self.ocr = UnifiedOCRProcessor( use_gpu=use_gpu, lang=ocr_lang, auto_select=(ocr_backend is None) ) # Set specific backend if requested if ocr_backend: self.ocr.set_backend(ocr_backend) # Initialize icon detector self.icon_detector = IconDetector() logger.info(f"GameVisionAI initialized (GPU: {self.backend.value}, " f"OCR: {self.ocr.get_current_backend()})") def extract_text_from_image(self, image_path: Union[str, Path]) -> List[TextRegion]: """Extract all text from an image.""" return self.ocr.extract_text(image_path) def extract_icons_from_image(self, image_path: Union[str, Path], auto_detect_window: bool = True) -> List[IconRegion]: """Extract item icons from image.""" image = cv2.imread(str(image_path)) if image is None: logger.error(f"Failed to load image: {image_path}") return [] if auto_detect_window: window_region = self.icon_detector.detect_loot_window(image) if window_region: logger.debug(f"Detected loot window: {window_region}") return self.icon_detector.extract_icons_from_region( image, window_region ) else: logger.debug("No loot window detected, scanning full image") h, w = image.shape[:2] return self.icon_detector.extract_icons_from_region( image, (0, 0, w, h) ) else: h, w = image.shape[:2] return self.icon_detector.extract_icons_from_region( image, (0, 0, w, h) ) def process_screenshot(self, image_path: Union[str, Path], extract_text: bool = True, extract_icons: bool = True) -> VisionResult: """ Process screenshot with all vision capabilities. Args: image_path: Path to screenshot extract_text: Enable text extraction extract_icons: Enable icon extraction Returns: VisionResult with all detections """ start_time = time.time() result = VisionResult( gpu_backend=self.backend.value, ocr_backend=self.ocr.get_current_backend() ) # Load image once image = cv2.imread(str(image_path)) if image is None: logger.error(f"Failed to load image: {image_path}") return result # Extract text if extract_text: result.text_regions = self.ocr.extract_text(image) logger.debug(f"Extracted {len(result.text_regions)} text regions") # Extract icons if extract_icons: result.icon_regions = self.extract_icons_from_image(image_path) logger.debug(f"Extracted {len(result.icon_regions)} icons") # Save extracted icons self._save_extracted_icons(result.icon_regions) result.processing_time_ms = (time.time() - start_time) * 1000 return result def _save_extracted_icons(self, icons: List[IconRegion]): """Save extracted icons to disk.""" for i, icon in enumerate(icons): filename = f"icon_{icon.icon_hash[:16]}_{int(time.time())}_{i}.png" filepath = self.extracted_icons_dir / filename cv2.imwrite(str(filepath), icon.image) logger.debug(f"Saved icon: {filepath}") def get_gpu_info(self) -> Dict[str, Any]: """Get GPU information.""" return self.hardware_info.to_dict() def is_gpu_available(self) -> bool: """Check if GPU acceleration is available.""" return self.backend != GPUBackend.CPU def get_ocr_backends(self) -> List[Dict[str, Any]]: """Get information about all available OCR backends.""" backends = self.ocr.get_available_backends() return [b.to_dict() for b in backends] def switch_ocr_backend(self, name: str) -> bool: """Switch to a different OCR backend.""" return self.ocr.set_backend(name) def calibrate_for_game(self, sample_screenshots: List[Path]) -> Dict[str, Any]: """Calibrate vision system using sample screenshots.""" calibration = { 'screenshots_processed': 0, 'text_regions_detected': 0, 'icons_detected': 0, 'average_processing_time_ms': 0, 'detected_regions': {}, 'ocr_backend': self.ocr.get_current_backend(), 'gpu_backend': self.backend.value, } total_time = 0 for screenshot_path in sample_screenshots: try: start = time.time() result = self.process_screenshot(screenshot_path) elapsed = (time.time() - start) * 1000 calibration['screenshots_processed'] += 1 calibration['text_regions_detected'] += len(result.text_regions) calibration['icons_detected'] += len(result.icon_regions) total_time += elapsed except Exception as e: logger.error(f"Failed to process {screenshot_path}: {e}") if calibration['screenshots_processed'] > 0: calibration['average_processing_time_ms'] = ( total_time / calibration['screenshots_processed'] ) return calibration @staticmethod def diagnose() -> Dict[str, Any]: """Run full diagnostic on vision system.""" return { 'hardware': HardwareDetector.detect_all().to_dict(), 'ocr_backends': [ b.to_dict() for b in OCRBackendFactory.check_all_backends() ], 'recommendations': { 'ocr_backend': HardwareDetector.recommend_ocr_backend(), 'gpu': GPUDetector.detect_backend().value, } } # Export main classes __all__ = [ 'GameVisionAI', 'UnifiedOCRProcessor', 'OCRProcessor', # Legacy 'TextRegion', 'IconRegion', 'ItemMatch', 'VisionResult', 'GPUBackend', 'GPUDetector', 'IconDetector', 'HardwareDetector', 'OCRBackendFactory', 'BaseOCRBackend', ]