From 522ee8e719c9f3aecbcb7ef2df02f233e18f000d Mon Sep 17 00:00:00 2001 From: LemonNexus Date: Wed, 11 Feb 2026 11:29:10 +0000 Subject: [PATCH] feat: add AI Computer Vision with local GPU support - modules/game_vision_ai.py - Main AI vision engine with OCR and icon detection - modules/icon_matcher.py - Icon similarity matching using perceptual hashing - ui/vision_settings_dialog.py - GPU/OCR settings panel - ui/vision_calibration_dialog.py - Calibration wizard - ui/vision_test_dialog.py - Test and debug dialog - vision_example.py - Usage examples - Update requirements.txt with paddlepaddle, opencv, torch dependencies Features: - GPU auto-detection (CUDA, MPS, DirectML) - PaddleOCR for text extraction (English/Swedish) - Icon detection from loot windows - Icon matching against database - Real-time screenshot processing --- modules/game_vision_ai.py | 722 ++++++++++++++++++++++++++++++++ modules/icon_matcher.py | 614 +++++++++++++++++++++++++++ requirements.txt | 30 +- ui/vision_calibration_dialog.py | 628 +++++++++++++++++++++++++++ ui/vision_settings_dialog.py | 645 ++++++++++++++++++++++++++++ ui/vision_test_dialog.py | 470 +++++++++++++++++++++ vision_example.py | 265 ++++++++++++ 7 files changed, 3369 insertions(+), 5 deletions(-) create mode 100644 modules/game_vision_ai.py create mode 100644 modules/icon_matcher.py create mode 100644 ui/vision_calibration_dialog.py create mode 100644 ui/vision_settings_dialog.py create mode 100644 ui/vision_test_dialog.py create mode 100644 vision_example.py diff --git a/modules/game_vision_ai.py b/modules/game_vision_ai.py new file mode 100644 index 0000000..b611f0f --- /dev/null +++ b/modules/game_vision_ai.py @@ -0,0 +1,722 @@ +""" +Lemontropia Suite - Game Vision AI Module +Advanced computer vision with local GPU-accelerated AI models. +Supports OCR (PaddleOCR) and icon detection for game UI analysis. +""" + +import cv2 +import numpy as np +import logging +import torch +import time +from pathlib import Path +from dataclasses import dataclass, field +from typing import Optional, Tuple, List, Dict, Any, Union +from enum import Enum +import json +import hashlib + +logger = logging.getLogger(__name__) + + +class GPUBackend(Enum): + """Supported GPU backends.""" + CUDA = "cuda" # NVIDIA CUDA + MPS = "mps" # Apple Metal Performance Shaders + DIRECTML = "directml" # Windows DirectML + CPU = "cpu" # Fallback CPU + + +@dataclass +class TextRegion: + """Detected text region with metadata.""" + text: str + confidence: float + bbox: Tuple[int, int, int, int] # x, y, w, h + language: str = "en" + + def to_dict(self) -> Dict[str, Any]: + return { + 'text': self.text, + 'confidence': self.confidence, + 'bbox': self.bbox, + 'language': self.language + } + + +@dataclass +class IconRegion: + """Detected icon region with metadata.""" + image: np.ndarray + bbox: Tuple[int, int, int, int] # x, y, w, h + confidence: float + icon_hash: str = "" + + def __post_init__(self): + if not self.icon_hash: + self.icon_hash = self._compute_hash() + + def _compute_hash(self) -> str: + """Compute perceptual hash of icon.""" + if self.image is None or self.image.size == 0: + return "" + # Resize to standard size and compute average hash + small = cv2.resize(self.image, (16, 16), interpolation=cv2.INTER_AREA) + gray = cv2.cvtColor(small, cv2.COLOR_BGR2GRAY) if len(small.shape) == 3 else small + avg = gray.mean() + hash_bits = (gray > avg).flatten() + return ''.join(['1' if b else '0' for b in hash_bits]) + + +@dataclass +class ItemMatch: + """Result of matching an icon to database.""" + name: str + confidence: float + item_id: Optional[str] = None + category: Optional[str] = None + matched_hash: str = "" + + +@dataclass +class VisionResult: + """Complete vision processing result.""" + text_regions: List[TextRegion] = field(default_factory=list) + icon_regions: List[IconRegion] = field(default_factory=list) + processing_time_ms: float = 0.0 + gpu_backend: str = "cpu" + timestamp: float = field(default_factory=time.time) + + def to_dict(self) -> Dict[str, Any]: + return { + 'text_regions': [t.to_dict() for t in self.text_regions], + 'icon_count': len(self.icon_regions), + 'processing_time_ms': self.processing_time_ms, + 'gpu_backend': self.gpu_backend, + 'timestamp': self.timestamp + } + + +class GPUDetector: + """Detect and manage GPU availability.""" + + @staticmethod + def detect_backend() -> GPUBackend: + """Detect best available GPU backend.""" + # Check CUDA first (most common) + if torch.cuda.is_available(): + logger.info(f"CUDA available: {torch.cuda.get_device_name(0)}") + return GPUBackend.CUDA + + # Check Apple MPS + if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available(): + logger.info("Apple MPS (Metal) available") + return GPUBackend.MPS + + # Check DirectML on Windows + try: + import torch_directml + if torch_directml.is_available(): + logger.info("DirectML available") + return GPUBackend.DIRECTML + except ImportError: + pass + + logger.info("No GPU backend available, using CPU") + return GPUBackend.CPU + + @staticmethod + def get_device_string(backend: GPUBackend) -> str: + """Get PyTorch device string for backend.""" + if backend == GPUBackend.CUDA: + return "cuda:0" + elif backend == GPUBackend.MPS: + return "mps" + elif backend == GPUBackend.DIRECTML: + return "privateuseone:0" # DirectML device + return "cpu" + + @staticmethod + def get_gpu_info() -> Dict[str, Any]: + """Get detailed GPU information.""" + info = { + 'backend': GPUDetector.detect_backend().value, + 'cuda_available': torch.cuda.is_available(), + 'mps_available': hasattr(torch.backends, 'mps') and torch.backends.mps.is_available(), + 'devices': [] + } + + if torch.cuda.is_available(): + for i in range(torch.cuda.device_count()): + info['devices'].append({ + 'id': i, + 'name': torch.cuda.get_device_name(i), + 'memory_total': torch.cuda.get_device_properties(i).total_memory + }) + + return info + + +class OCRProcessor: + """OCR text extraction using PaddleOCR with GPU support.""" + + SUPPORTED_LANGUAGES = ['en', 'sv', 'latin'] # English, Swedish, Latin script + + def __init__(self, use_gpu: bool = True, lang: str = 'en'): + self.use_gpu = use_gpu + self.lang = lang if lang in self.SUPPORTED_LANGUAGES else 'en' + self.ocr = None + self.backend = GPUBackend.CPU + self._init_ocr() + + def _init_ocr(self): + """Initialize PaddleOCR with appropriate backend.""" + try: + from paddleocr import PaddleOCR + + # Detect GPU + if self.use_gpu: + self.backend = GPUDetector.detect_backend() + use_gpu_flag = self.backend != GPUBackend.CPU + else: + use_gpu_flag = False + + # Map language codes + lang_map = { + 'en': 'en', + 'sv': 'latin', # Swedish uses latin script model + 'latin': 'latin' + } + paddle_lang = lang_map.get(self.lang, 'en') + + logger.info(f"Initializing PaddleOCR (lang={paddle_lang}, gpu={use_gpu_flag})") + + self.ocr = PaddleOCR( + lang=paddle_lang, + use_gpu=use_gpu_flag, + show_log=False, + use_angle_cls=True, + det_db_thresh=0.3, + det_db_box_thresh=0.5, + rec_thresh=0.5, + ) + + logger.info(f"PaddleOCR initialized successfully (backend: {self.backend.value})") + + except ImportError: + logger.error("PaddleOCR not installed. Install with: pip install paddleocr") + self.ocr = None + except Exception as e: + logger.error(f"Failed to initialize PaddleOCR: {e}") + self.ocr = None + + def preprocess_for_ocr(self, image: np.ndarray) -> np.ndarray: + """Preprocess image for better OCR results.""" + # Convert to grayscale if needed + if len(image.shape) == 3: + gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + else: + gray = image + + # Denoise + denoised = cv2.fastNlMeansDenoising(gray, None, 10, 7, 21) + + # Adaptive threshold for better text contrast + binary = cv2.adaptiveThreshold( + denoised, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, + cv2.THRESH_BINARY, 11, 2 + ) + + return binary + + def extract_text(self, image: Union[str, np.ndarray, Path]) -> List[TextRegion]: + """ + Extract text from image. + + Args: + image: Image path or numpy array + + Returns: + List of detected text regions + """ + if self.ocr is None: + logger.warning("OCR not available") + return [] + + # Load image if path provided + if isinstance(image, (str, Path)): + img = cv2.imread(str(image)) + if img is None: + logger.error(f"Failed to load image: {image}") + return [] + else: + img = image.copy() + + # Preprocess + processed = self.preprocess_for_ocr(img) + + try: + # Run OCR + result = self.ocr.ocr(processed, cls=True) + + detected = [] + if result and result[0]: + for line in result[0]: + if line is None: + continue + bbox, (text, confidence) = line + + # Calculate bounding box + x_coords = [p[0] for p in bbox] + y_coords = [p[1] for p in bbox] + x, y = int(min(x_coords)), int(min(y_coords)) + w = int(max(x_coords) - x) + h = int(max(y_coords) - y) + + detected.append(TextRegion( + text=text.strip(), + confidence=float(confidence), + bbox=(x, y, w, h), + language=self.lang + )) + + return detected + + except Exception as e: + logger.error(f"OCR processing failed: {e}") + return [] + + def extract_text_from_region(self, image: np.ndarray, + region: Tuple[int, int, int, int]) -> List[TextRegion]: + """Extract text from specific region of image.""" + x, y, w, h = region + roi = image[y:y+h, x:x+w] + + if roi.size == 0: + return [] + + regions = self.extract_text(roi) + + # Adjust coordinates back to original image + for r in regions: + rx, ry, rw, rh = r.bbox + r.bbox = (x + rx, y + ry, rw, rh) + + return regions + + +class IconDetector: + """Detect and extract item icons from game UI.""" + + # Typical Entropia Universe loot window icon sizes + ICON_SIZES = { + 'small': (32, 32), + 'medium': (48, 48), + 'large': (64, 64), + 'hud': (40, 40) + } + + def __init__(self, template_dir: Optional[Path] = None): + self.template_dir = template_dir or Path(__file__).parent / "templates" / "icons" + self.templates: Dict[str, np.ndarray] = {} + self._load_templates() + + def _load_templates(self): + """Load icon templates for matching.""" + if not self.template_dir.exists(): + logger.warning(f"Template directory not found: {self.template_dir}") + return + + for template_file in self.template_dir.glob("*.png"): + try: + name = template_file.stem + template = cv2.imread(str(template_file), cv2.IMREAD_COLOR) + if template is not None: + self.templates[name] = template + logger.debug(f"Loaded icon template: {name}") + except Exception as e: + logger.error(f"Failed to load template {template_file}: {e}") + + def detect_loot_window(self, image: np.ndarray) -> Optional[Tuple[int, int, int, int]]: + """ + Detect loot window in screenshot. + + Returns bounding box of loot window or None if not found. + """ + # Look for common loot window indicators + # Method 1: Template matching for "Loot" text or window frame + if 'loot_window' in self.templates: + result = cv2.matchTemplate( + image, self.templates['loot_window'], cv2.TM_CCOEFF_NORMED + ) + _, max_val, _, max_loc = cv2.minMaxLoc(result) + if max_val > 0.7: + h, w = self.templates['loot_window'].shape[:2] + return (*max_loc, w, h) + + # Method 2: Detect based on typical loot window characteristics + # Loot windows usually have a grid of items with consistent spacing + gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + + # Look for high-contrast regions that could be icons + _, thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY) + + # Find contours + contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + + # Filter for icon-sized squares + potential_icons = [] + for cnt in contours: + x, y, w, h = cv2.boundingRect(cnt) + aspect = w / h if h > 0 else 0 + + # Check if dimensions match typical icon sizes + for size_name, (sw, sh) in self.ICON_SIZES.items(): + if abs(w - sw) < 5 and abs(h - sh) < 5 and 0.8 < aspect < 1.2: + potential_icons.append((x, y, w, h)) + break + + # If we found multiple icons in a grid pattern, assume loot window + if len(potential_icons) >= 2: + # Calculate bounding box of all icons + xs = [p[0] for p in potential_icons] + ys = [p[1] for p in potential_icons] + ws = [p[2] for p in potential_icons] + hs = [p[3] for p in potential_icons] + + min_x, max_x = min(xs), max(xs) + max(ws) + min_y, max_y = min(ys), max(ys) + max(hs) + + # Add padding + padding = 20 + return ( + max(0, min_x - padding), + max(0, min_y - padding), + max_x - min_x + padding * 2, + max_y - min_y + padding * 2 + ) + + return None + + def extract_icons_from_region(self, image: np.ndarray, + region: Tuple[int, int, int, int], + icon_size: str = 'medium') -> List[IconRegion]: + """ + Extract icons from a specific region (e.g., loot window). + + Args: + image: Full screenshot + region: Bounding box (x, y, w, h) + icon_size: Size preset ('small', 'medium', 'large') + + Returns: + List of detected icon regions + """ + x, y, w, h = region + roi = image[y:y+h, x:x+w] + + if roi.size == 0: + return [] + + target_size = self.ICON_SIZES.get(icon_size, (48, 48)) + gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY) + + # Multiple threshold attempts for different icon styles + icons = [] + thresholds = [(200, 255), (180, 255), (150, 255)] + + for thresh_low, thresh_high in thresholds: + _, thresh = cv2.threshold(gray, thresh_low, thresh_high, cv2.THRESH_BINARY) + contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + + for cnt in contours: + cx, cy, cw, ch = cv2.boundingRect(cnt) + aspect = cw / ch if ch > 0 else 0 + + # Match icon size with tolerance + if (abs(cw - target_size[0]) < 8 and + abs(ch - target_size[1]) < 8 and + 0.7 < aspect < 1.3): + + # Extract icon image + icon_img = roi[cy:cy+ch, cx:cx+cw] + + # Resize to standard size + icon_img = cv2.resize(icon_img, target_size, interpolation=cv2.INTER_AREA) + + icons.append(IconRegion( + image=icon_img, + bbox=(x + cx, y + cy, cw, ch), + confidence=0.8 # Placeholder confidence + )) + + # Remove duplicates (icons that overlap significantly) + unique_icons = self._remove_duplicate_icons(icons) + + return unique_icons + + def _remove_duplicate_icons(self, icons: List[IconRegion], + iou_threshold: float = 0.5) -> List[IconRegion]: + """Remove duplicate icons based on IoU.""" + if not icons: + return [] + + # Sort by confidence + sorted_icons = sorted(icons, key=lambda x: x.confidence, reverse=True) + + kept = [] + for icon in sorted_icons: + is_duplicate = False + for kept_icon in kept: + if self._calculate_iou(icon.bbox, kept_icon.bbox) > iou_threshold: + is_duplicate = True + break + if not is_duplicate: + kept.append(icon) + + return kept + + def _calculate_iou(self, box1: Tuple[int, int, int, int], + box2: Tuple[int, int, int, int]) -> float: + """Calculate Intersection over Union of two bounding boxes.""" + x1, y1, w1, h1 = box1 + x2, y2, w2, h2 = box2 + + xi1 = max(x1, x2) + yi1 = max(y1, y2) + xi2 = min(x1 + w1, x2 + w2) + yi2 = min(y1 + h1, y2 + h2) + + inter_area = max(0, xi2 - xi1) * max(0, yi2 - yi1) + box1_area = w1 * h1 + box2_area = w2 * h2 + + union_area = box1_area + box2_area - inter_area + + return inter_area / union_area if union_area > 0 else 0 + + def detect_icons_yolo(self, image: np.ndarray, + model_path: Optional[str] = None) -> List[IconRegion]: + """ + Detect icons using YOLO model (if available). + + This is a placeholder for future YOLO integration. + """ + # TODO: Implement YOLO detection when model is trained + logger.debug("YOLO detection not yet implemented") + return [] + + +class GameVisionAI: + """ + Main AI vision interface for game screenshot analysis. + Combines OCR and icon detection with GPU acceleration. + """ + + def __init__(self, use_gpu: bool = True, ocr_lang: str = 'en', + data_dir: Optional[Path] = None): + """ + Initialize Game Vision AI. + + Args: + use_gpu: Enable GPU acceleration if available + ocr_lang: Language for OCR ('en', 'sv', 'latin') + data_dir: Directory for storing extracted data + """ + self.use_gpu = use_gpu + self.data_dir = data_dir or Path.home() / ".lemontropia" + self.extracted_icons_dir = self.data_dir / "extracted_icons" + self.extracted_icons_dir.mkdir(parents=True, exist_ok=True) + + # Detect GPU + self.backend = GPUDetector.detect_backend() if use_gpu else GPUBackend.CPU + + # Initialize processors + self.ocr = OCRProcessor(use_gpu=use_gpu, lang=ocr_lang) + self.icon_detector = IconDetector() + + # Icon matching cache + self.icon_cache: Dict[str, ItemMatch] = {} + + logger.info(f"GameVisionAI initialized (GPU: {self.backend.value})") + + def extract_text_from_image(self, image_path: Union[str, Path]) -> List[TextRegion]: + """ + Extract all text from an image. + + Args: + image_path: Path to screenshot image + + Returns: + List of detected text regions + """ + return self.ocr.extract_text(image_path) + + def extract_icons_from_image(self, image_path: Union[str, Path], + auto_detect_window: bool = True) -> List[IconRegion]: + """ + Extract item icons from image. + + Args: + image_path: Path to screenshot image + auto_detect_window: Automatically detect loot window + + Returns: + List of detected icon regions + """ + image = cv2.imread(str(image_path)) + if image is None: + logger.error(f"Failed to load image: {image_path}") + return [] + + if auto_detect_window: + window_region = self.icon_detector.detect_loot_window(image) + if window_region: + logger.debug(f"Detected loot window: {window_region}") + return self.icon_detector.extract_icons_from_region( + image, window_region + ) + else: + logger.debug("No loot window detected, scanning full image") + # Scan full image + h, w = image.shape[:2] + return self.icon_detector.extract_icons_from_region( + image, (0, 0, w, h) + ) + else: + h, w = image.shape[:2] + return self.icon_detector.extract_icons_from_region( + image, (0, 0, w, h) + ) + + def match_icon_to_database(self, icon_image: np.ndarray, + database_path: Optional[Path] = None) -> Optional[ItemMatch]: + """ + Match extracted icon to item database. + + Args: + icon_image: Icon image (numpy array) + database_path: Path to icon database directory + + Returns: + ItemMatch if found, None otherwise + """ + from .icon_matcher import IconMatcher + + # Lazy load matcher + if not hasattr(self, '_icon_matcher'): + self._icon_matcher = IconMatcher(database_path) + + return self._icon_matcher.match_icon(icon_image) + + def process_screenshot(self, image_path: Union[str, Path], + extract_text: bool = True, + extract_icons: bool = True) -> VisionResult: + """ + Process screenshot with all vision capabilities. + + Args: + image_path: Path to screenshot + extract_text: Enable text extraction + extract_icons: Enable icon extraction + + Returns: + VisionResult with all detections + """ + start_time = time.time() + + result = VisionResult(gpu_backend=self.backend.value) + + # Load image once + image = cv2.imread(str(image_path)) + if image is None: + logger.error(f"Failed to load image: {image_path}") + return result + + # Extract text + if extract_text: + result.text_regions = self.ocr.extract_text(image) + logger.debug(f"Extracted {len(result.text_regions)} text regions") + + # Extract icons + if extract_icons: + result.icon_regions = self.extract_icons_from_image(image_path) + logger.debug(f"Extracted {len(result.icon_regions)} icons") + + # Save extracted icons + self._save_extracted_icons(result.icon_regions) + + result.processing_time_ms = (time.time() - start_time) * 1000 + + return result + + def _save_extracted_icons(self, icons: List[IconRegion]): + """Save extracted icons to disk.""" + for i, icon in enumerate(icons): + filename = f"icon_{icon.icon_hash[:16]}_{int(time.time())}_{i}.png" + filepath = self.extracted_icons_dir / filename + cv2.imwrite(str(filepath), icon.image) + logger.debug(f"Saved icon: {filepath}") + + def get_gpu_info(self) -> Dict[str, Any]: + """Get GPU information.""" + return GPUDetector.get_gpu_info() + + def is_gpu_available(self) -> bool: + """Check if GPU acceleration is available.""" + return self.backend != GPUBackend.CPU + + def calibrate_for_game(self, sample_screenshots: List[Path]) -> Dict[str, Any]: + """ + Calibrate vision system using sample screenshots. + + Args: + sample_screenshots: List of sample game screenshots + + Returns: + Calibration results + """ + calibration = { + 'screenshots_processed': 0, + 'text_regions_detected': 0, + 'icons_detected': 0, + 'average_processing_time_ms': 0, + 'detected_regions': {} + } + + total_time = 0 + + for screenshot_path in sample_screenshots: + try: + start = time.time() + result = self.process_screenshot(screenshot_path) + elapsed = (time.time() - start) * 1000 + + calibration['screenshots_processed'] += 1 + calibration['text_regions_detected'] += len(result.text_regions) + calibration['icons_detected'] += len(result.icon_regions) + total_time += elapsed + + except Exception as e: + logger.error(f"Failed to process {screenshot_path}: {e}") + + if calibration['screenshots_processed'] > 0: + calibration['average_processing_time_ms'] = ( + total_time / calibration['screenshots_processed'] + ) + + return calibration + + +# Export main classes +__all__ = [ + 'GameVisionAI', + 'TextRegion', + 'IconRegion', + 'ItemMatch', + 'VisionResult', + 'GPUBackend', + 'GPUDetector', + 'OCRProcessor', + 'IconDetector' +] diff --git a/modules/icon_matcher.py b/modules/icon_matcher.py new file mode 100644 index 0000000..9cdac1c --- /dev/null +++ b/modules/icon_matcher.py @@ -0,0 +1,614 @@ +""" +Lemontropia Suite - Icon Matcher Module +Icon similarity matching using multiple algorithms. +Supports perceptual hashing, template matching, and feature-based matching. +""" + +import cv2 +import numpy as np +import logging +import json +from pathlib import Path +from dataclasses import dataclass, asdict +from typing import Optional, List, Dict, Tuple, Any +import sqlite3 +import pickle + +logger = logging.getLogger(__name__) + + +@dataclass +class MatchResult: + """Icon match result.""" + item_name: str + confidence: float + match_method: str + item_id: Optional[str] = None + category: Optional[str] = None + metadata: Dict[str, Any] = None + + def __post_init__(self): + if self.metadata is None: + self.metadata = {} + + +class PerceptualHash: + """Perceptual hash implementation for icon matching.""" + + @staticmethod + def average_hash(image: np.ndarray, hash_size: int = 16) -> str: + """Compute average hash (aHash).""" + # Convert to grayscale + if len(image.shape) == 3: + gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + else: + gray = image + + # Resize + resized = cv2.resize(gray, (hash_size, hash_size), interpolation=cv2.INTER_AREA) + + # Compute average + avg = resized.mean() + + # Create hash + hash_bits = (resized > avg).flatten() + return ''.join(['1' if b else '0' for b in hash_bits]) + + @staticmethod + def difference_hash(image: np.ndarray, hash_size: int = 16) -> str: + """Compute difference hash (dHash).""" + if len(image.shape) == 3: + gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + else: + gray = image + + # Resize (hash_size+1 for horizontal differences) + resized = cv2.resize(gray, (hash_size + 1, hash_size), interpolation=cv2.INTER_AREA) + + # Compute differences + diff = resized[:, 1:] > resized[:, :-1] + return ''.join(['1' if b else '0' for b in diff.flatten()]) + + @staticmethod + def wavelet_hash(image: np.ndarray, hash_size: int = 16) -> str: + """Compute wavelet hash (wHash) using Haar wavelet.""" + try: + import pywt + except ImportError: + logger.debug("PyWavelets not available, falling back to average hash") + return PerceptualHash.average_hash(image, hash_size) + + if len(image.shape) == 3: + gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + else: + gray = image + + # Resize to power of 2 + size = 2 ** (hash_size - 1).bit_length() + resized = cv2.resize(gray, (size, size), interpolation=cv2.INTER_AREA) + + # Apply Haar wavelet transform + coeffs = pywt.dwt2(resized, 'haar') + cA, (cH, cV, cD) = coeffs + + # Use approximation coefficients + avg = cA.mean() + hash_bits = (cA > avg).flatten() + return ''.join(['1' if b else '0' for b in hash_bits]) + + @staticmethod + def hamming_distance(hash1: str, hash2: str) -> int: + """Calculate Hamming distance between two hashes.""" + if len(hash1) != len(hash2): + raise ValueError("Hashes must be same length") + return sum(c1 != c2 for c1, c2 in zip(hash1, hash2)) + + @staticmethod + def similarity(hash1: str, hash2: str) -> float: + """Calculate similarity between 0 and 1.""" + distance = PerceptualHash.hamming_distance(hash1, hash2) + max_distance = len(hash1) + return 1.0 - (distance / max_distance) + + +class FeatureMatcher: + """Feature-based icon matching using ORB/SIFT.""" + + def __init__(self): + self.orb = cv2.ORB_create(nfeatures=500) + self.matcher = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True) + + def extract_features(self, image: np.ndarray) -> Tuple[List, np.ndarray]: + """Extract ORB features from image.""" + if len(image.shape) == 3: + gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + else: + gray = image + + keypoints, descriptors = self.orb.detectAndCompute(gray, None) + return keypoints, descriptors + + def match_features(self, desc1: np.ndarray, desc2: np.ndarray, + threshold: float = 0.7) -> float: + """ + Match features between two descriptors. + + Returns confidence score (0-1). + """ + if desc1 is None or desc2 is None: + return 0.0 + + try: + matches = self.matcher.match(desc1, desc2) + matches = sorted(matches, key=lambda x: x.distance) + + # Calculate match ratio + if len(matches) < 4: + return 0.0 + + # Good matches have distance below threshold + good_matches = [m for m in matches if m.distance < 50] + + if not good_matches: + return 0.0 + + # Score based on number of good matches vs minimum needed + score = min(len(good_matches) / 20, 1.0) # Normalize to 20 matches + return score + + except Exception as e: + logger.debug(f"Feature matching failed: {e}") + return 0.0 + + +class TemplateMatcher: + """Template matching for icons.""" + + @staticmethod + def match(template: np.ndarray, image: np.ndarray, + methods: List[int] = None) -> float: + """ + Match template to image using multiple methods. + + Returns best confidence score. + """ + if methods is None: + methods = [ + cv2.TM_CCOEFF_NORMED, + cv2.TM_CCORR_NORMED, + cv2.TM_SQDIFF_NORMED + ] + + # Ensure same size + h, w = template.shape[:2] + image = cv2.resize(image, (w, h), interpolation=cv2.INTER_AREA) + + best_score = 0.0 + + for method in methods: + try: + result = cv2.matchTemplate(image, template, method) + _, max_val, _, _ = cv2.minMaxLoc(result) + + # Normalize SQDIFF (lower is better) + if method == cv2.TM_SQDIFF_NORMED: + max_val = 1.0 - max_val + + best_score = max(best_score, max_val) + except Exception as e: + logger.debug(f"Template matching failed: {e}") + continue + + return best_score + + +class IconDatabase: + """Database for storing and retrieving icon hashes.""" + + def __init__(self, db_path: Optional[Path] = None): + self.db_path = db_path or Path.home() / ".lemontropia" / "icon_database.db" + self.db_path.parent.mkdir(parents=True, exist_ok=True) + self._init_database() + + def _init_database(self): + """Initialize SQLite database.""" + conn = sqlite3.connect(str(self.db_path)) + cursor = conn.cursor() + + cursor.execute(''' + CREATE TABLE IF NOT EXISTS icons ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + item_name TEXT NOT NULL, + item_id TEXT, + category TEXT, + avg_hash TEXT, + diff_hash TEXT, + wavelet_hash TEXT, + features BLOB, + metadata TEXT, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ) + ''') + + cursor.execute(''' + CREATE INDEX IF NOT EXISTS idx_avg_hash ON icons(avg_hash) + ''') + + cursor.execute(''' + CREATE INDEX IF NOT EXISTS idx_item_name ON icons(item_name) + ''') + + conn.commit() + conn.close() + + def add_icon(self, item_name: str, image: np.ndarray, + item_id: Optional[str] = None, + category: Optional[str] = None, + metadata: Optional[Dict] = None) -> bool: + """Add icon to database.""" + try: + # Compute hashes + avg_hash = PerceptualHash.average_hash(image) + diff_hash = PerceptualHash.difference_hash(image) + wavelet_hash = PerceptualHash.wavelet_hash(image) + + # Extract features + feature_matcher = FeatureMatcher() + _, features = feature_matcher.extract_features(image) + features_blob = pickle.dumps(features) if features is not None else None + + conn = sqlite3.connect(str(self.db_path)) + cursor = conn.cursor() + + cursor.execute(''' + INSERT INTO icons + (item_name, item_id, category, avg_hash, diff_hash, wavelet_hash, features, metadata) + VALUES (?, ?, ?, ?, ?, ?, ?, ?) + ''', ( + item_name, item_id, category, + avg_hash, diff_hash, wavelet_hash, + features_blob, + json.dumps(metadata) if metadata else None + )) + + conn.commit() + conn.close() + + logger.debug(f"Added icon to database: {item_name}") + return True + + except Exception as e: + logger.error(f"Failed to add icon: {e}") + return False + + def find_by_hash(self, avg_hash: str, max_distance: int = 10) -> List[Tuple[str, float, Dict]]: + """Find icons by hash similarity.""" + conn = sqlite3.connect(str(self.db_path)) + cursor = conn.cursor() + + cursor.execute('SELECT item_name, avg_hash, diff_hash, item_id, category, metadata FROM icons') + results = [] + + for row in cursor.fetchall(): + item_name, db_avg_hash, db_diff_hash, item_id, category, metadata_json = row + + # Check average hash similarity + distance = PerceptualHash.hamming_distance(avg_hash, db_avg_hash) + + if distance <= max_distance: + similarity = 1.0 - (distance / len(avg_hash)) + metadata = json.loads(metadata_json) if metadata_json else {} + results.append((item_name, similarity, { + 'item_id': item_id, + 'category': category, + 'metadata': metadata + })) + + conn.close() + + # Sort by similarity + results.sort(key=lambda x: x[1], reverse=True) + return results + + def get_all_icons(self) -> List[Dict]: + """Get all icons from database.""" + conn = sqlite3.connect(str(self.db_path)) + cursor = conn.cursor() + + cursor.execute(''' + SELECT item_name, item_id, category, avg_hash, metadata + FROM icons + ''') + + results = [] + for row in cursor.fetchall(): + results.append({ + 'item_name': row[0], + 'item_id': row[1], + 'category': row[2], + 'avg_hash': row[3], + 'metadata': json.loads(row[4]) if row[4] else {} + }) + + conn.close() + return results + + def get_icon_count(self) -> int: + """Get total number of icons in database.""" + conn = sqlite3.connect(str(self.db_path)) + cursor = conn.cursor() + cursor.execute('SELECT COUNT(*) FROM icons') + count = cursor.fetchone()[0] + conn.close() + return count + + def delete_icon(self, item_name: str) -> bool: + """Delete icon from database.""" + conn = sqlite3.connect(str(self.db_path)) + cursor = conn.cursor() + cursor.execute('DELETE FROM icons WHERE item_name = ?', (item_name,)) + conn.commit() + deleted = cursor.rowcount > 0 + conn.close() + return deleted + + +class IconMatcher: + """ + Main icon matching interface. + Combines multiple matching algorithms for best results. + """ + + # Confidence thresholds + CONFIDENCE_HIGH = 0.85 + CONFIDENCE_MEDIUM = 0.70 + CONFIDENCE_LOW = 0.50 + + def __init__(self, database_path: Optional[Path] = None, + icons_dir: Optional[Path] = None): + """ + Initialize icon matcher. + + Args: + database_path: Path to icon database + icons_dir: Directory containing icon images for matching + """ + self.database = IconDatabase(database_path) + self.icons_dir = icons_dir or Path.home() / ".lemontropia" / "icons" + self.feature_matcher = FeatureMatcher() + + # Cache for loaded icons + self._icon_cache: Dict[str, np.ndarray] = {} + + def match_icon(self, image: np.ndarray, + match_methods: List[str] = None) -> Optional[MatchResult]: + """ + Match an icon image against the database. + + Args: + image: Icon image (numpy array) + match_methods: List of methods to use ('hash', 'feature', 'template') + + Returns: + MatchResult if match found, None otherwise + """ + if match_methods is None: + match_methods = ['hash', 'feature', 'template'] + + results = [] + + # Method 1: Perceptual Hash Matching + if 'hash' in match_methods: + hash_result = self._match_by_hash(image) + if hash_result: + results.append(hash_result) + + # Method 2: Feature Matching + if 'feature' in match_methods: + feature_result = self._match_by_features(image) + if feature_result: + results.append(feature_result) + + # Method 3: Template Matching + if 'template' in match_methods: + template_result = self._match_by_template(image) + if template_result: + results.append(template_result) + + if not results: + return None + + # Return best match + best = max(results, key=lambda x: x.confidence) + return best + + def _match_by_hash(self, image: np.ndarray) -> Optional[MatchResult]: + """Match using perceptual hashing.""" + avg_hash = PerceptualHash.average_hash(image) + + # Query database + matches = self.database.find_by_hash(avg_hash, max_distance=15) + + if not matches: + return None + + best_match = matches[0] + item_name, similarity, meta = best_match + + if similarity >= self.CONFIDENCE_LOW: + return MatchResult( + item_name=item_name, + confidence=similarity, + match_method='hash', + item_id=meta.get('item_id'), + category=meta.get('category'), + metadata=meta.get('metadata', {}) + ) + + return None + + def _match_by_features(self, image: np.ndarray) -> Optional[MatchResult]: + """Match using ORB features.""" + _, query_desc = self.feature_matcher.extract_features(image) + + if query_desc is None: + return None + + # Get all icons with features from database + conn = sqlite3.connect(str(self.database.db_path)) + cursor = conn.cursor() + cursor.execute(''' + SELECT item_name, features, item_id, category, metadata + FROM icons WHERE features IS NOT NULL + ''') + + best_match = None + best_score = 0.0 + best_meta = {} + + for row in cursor.fetchall(): + item_name, features_blob, item_id, category, metadata_json = row + db_desc = pickle.loads(features_blob) + + score = self.feature_matcher.match_features(query_desc, db_desc) + + if score > best_score: + best_score = score + best_match = item_name + best_meta = { + 'item_id': item_id, + 'category': category, + 'metadata': json.loads(metadata_json) if metadata_json else {} + } + + conn.close() + + if best_match and best_score >= self.CONFIDENCE_LOW: + return MatchResult( + item_name=best_match, + confidence=best_score, + match_method='feature', + item_id=best_meta.get('item_id'), + category=best_meta.get('category'), + metadata=best_meta.get('metadata', {}) + ) + + return None + + def _match_by_template(self, image: np.ndarray) -> Optional[MatchResult]: + """Match using template matching against icon files.""" + if not self.icons_dir.exists(): + return None + + # Resize query to standard size + standard_size = (64, 64) + query_resized = cv2.resize(image, standard_size, interpolation=cv2.INTER_AREA) + + best_match = None + best_score = 0.0 + + for icon_file in self.icons_dir.glob("**/*.png"): + try: + template = cv2.imread(str(icon_file), cv2.IMREAD_COLOR) + if template is None: + continue + + template_resized = cv2.resize(template, standard_size, interpolation=cv2.INTER_AREA) + + score = TemplateMatcher.match(query_resized, template_resized) + + if score > best_score: + best_score = score + best_match = icon_file.stem + + except Exception as e: + logger.debug(f"Template matching failed for {icon_file}: {e}") + continue + + if best_match and best_score >= self.CONFIDENCE_MEDIUM: + return MatchResult( + item_name=best_match, + confidence=best_score, + match_method='template' + ) + + return None + + def add_icon_to_database(self, item_name: str, image: np.ndarray, + item_id: Optional[str] = None, + category: Optional[str] = None, + metadata: Optional[Dict] = None) -> bool: + """Add a new icon to the database.""" + return self.database.add_icon(item_name, image, item_id, category, metadata) + + def batch_add_icons(self, icons_dir: Path, + category: Optional[str] = None) -> Tuple[int, int]: + """ + Batch add icons from directory. + + Returns: + Tuple of (success_count, fail_count) + """ + success = 0 + failed = 0 + + for icon_file in icons_dir.glob("**/*.png"): + try: + image = cv2.imread(str(icon_file), cv2.IMREAD_COLOR) + if image is None: + failed += 1 + continue + + item_name = icon_file.stem.replace('_', ' ').title() + + if self.add_icon_to_database(item_name, image, category=category): + success += 1 + else: + failed += 1 + + except Exception as e: + logger.error(f"Failed to add icon {icon_file}: {e}") + failed += 1 + + logger.info(f"Batch add complete: {success} success, {failed} failed") + return success, failed + + def get_database_stats(self) -> Dict[str, Any]: + """Get database statistics.""" + return { + 'total_icons': self.database.get_icon_count(), + 'database_path': str(self.database.db_path), + 'icons_directory': str(self.icons_dir) + } + + def find_similar_icons(self, image: np.ndarray, + top_k: int = 5) -> List[MatchResult]: + """Find top-k similar icons.""" + avg_hash = PerceptualHash.average_hash(image) + + # Get all matches + matches = self.database.find_by_hash(avg_hash, max_distance=20) + + results = [] + for item_name, similarity, meta in matches[:top_k]: + results.append(MatchResult( + item_name=item_name, + confidence=similarity, + match_method='hash', + item_id=meta.get('item_id'), + category=meta.get('category'), + metadata=meta.get('metadata', {}) + )) + + return results + + +# Export main classes +__all__ = [ + 'IconMatcher', + 'MatchResult', + 'PerceptualHash', + 'FeatureMatcher', + 'TemplateMatcher', + 'IconDatabase' +] diff --git a/requirements.txt b/requirements.txt index 0559fc3..9a27251 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,13 +11,33 @@ PyQt6>=6.4.0 pytest>=7.0.0 pytest-asyncio>=0.21.0 -# GUI Framework -PyQt6>=6.4.0 - -# OCR Engines -paddleocr>=2.6.0 +# OCR Engines - PaddleOCR for GPU-accelerated text recognition +paddlepaddle-gpu>=2.5.0; sys_platform != 'darwin' # CUDA version for Linux/Windows +paddlepaddle>=2.5.0; sys_platform == 'darwin' # CPU version for macOS +paddleocr>=2.7.0 pytesseract>=0.3.10 +# Computer Vision +opencv-python>=4.8.0 +numpy>=1.24.0 +Pillow>=10.0.0 + +# Deep Learning Framework (for GPU detection and YOLO support) +torch>=2.0.0 +torchvision>=0.15.0 + +# Windows DirectML support (optional) +# torch-directml>=0.3.0; sys_platform == 'win32' + +# Screen capture +mss>=9.0.0 + +# Image hashing and processing +imagehash>=4.3.1 + +# Wavelet transforms (for wHash) +PyWavelets>=1.4.0 + # Async support aiofiles>=23.0.0 diff --git a/ui/vision_calibration_dialog.py b/ui/vision_calibration_dialog.py new file mode 100644 index 0000000..0ed2f62 --- /dev/null +++ b/ui/vision_calibration_dialog.py @@ -0,0 +1,628 @@ +""" +Lemontropia Suite - Vision Calibration Dialog +Wizard for calibrating Game Vision AI to user's game setup. +""" + +import sys +import time +from pathlib import Path +from typing import Optional, List, Dict, Any + +from PyQt6.QtWidgets import ( + QWizard, QWizardPage, QVBoxLayout, QHBoxLayout, QLabel, QLineEdit, + QPushButton, QComboBox, QCheckBox, QProgressBar, QGroupBox, + QFormLayout, QTextEdit, QMessageBox, QFileDialog, QListWidget, + QListWidgetItem, QSpinBox, QDoubleSpinBox, QWidget +) +from PyQt6.QtCore import Qt, QSettings, QThread, pyqtSignal +from PyQt6.QtGui import QFont, QPixmap, QImage +import numpy as np + +import logging + +logger = logging.getLogger(__name__) + + +class CalibrationWorker(QThread): + """Background worker for calibration processing.""" + + progress = pyqtSignal(int, str) # percentage, message + calibration_complete = pyqtSignal(dict) + error_occurred = pyqtSignal(str) + + def __init__(self, screenshot_paths: List[Path], settings: Dict[str, Any]): + super().__init__() + self.screenshot_paths = screenshot_paths + self.settings = settings + self._cancelled = False + + def run(self): + try: + from modules.game_vision_ai import GameVisionAI + + self.progress.emit(0, "Initializing Game Vision AI...") + + vision = GameVisionAI( + use_gpu=self.settings.get('use_gpu', True), + ocr_lang=self.settings.get('ocr_lang', 'en') + ) + + results = { + 'screenshots_processed': 0, + 'text_regions_detected': 0, + 'icons_detected': 0, + 'processing_times': [], + 'errors': [], + 'detected_regions': {}, + 'sample_extractions': [] + } + + total = len(self.screenshot_paths) + + for i, screenshot_path in enumerate(self.screenshot_paths): + if self._cancelled: + self.error_occurred.emit("Calibration cancelled") + return + + progress = int((i / total) * 100) + self.progress.emit(progress, f"Processing {screenshot_path.name}...") + + try: + start_time = time.time() + result = vision.process_screenshot( + screenshot_path, + extract_text=self.settings.get('extract_text', True), + extract_icons=self.settings.get('extract_icons', True) + ) + processing_time = (time.time() - start_time) * 1000 + + results['screenshots_processed'] += 1 + results['text_regions_detected'] += len(result.text_regions) + results['icons_detected'] += len(result.icon_regions) + results['processing_times'].append(processing_time) + + # Store sample extractions + if i < 3: # Store first 3 as samples + sample = { + 'screenshot': str(screenshot_path), + 'text_count': len(result.text_regions), + 'icon_count': len(result.icon_regions), + 'processing_time_ms': result.processing_time_ms, + 'text_samples': [ + {'text': t.text, 'confidence': t.confidence} + for t in result.text_regions[:5] # First 5 texts + ] + } + results['sample_extractions'].append(sample) + + except Exception as e: + results['errors'].append(f"{screenshot_path.name}: {str(e)}") + logger.error(f"Failed to process {screenshot_path}: {e}") + + # Calculate statistics + if results['processing_times']: + results['avg_processing_time'] = np.mean(results['processing_times']) + results['min_processing_time'] = np.min(results['processing_times']) + results['max_processing_time'] = np.max(results['processing_times']) + + self.progress.emit(100, "Calibration complete!") + self.calibration_complete.emit(results) + + except Exception as e: + self.error_occurred.emit(str(e)) + + def cancel(self): + self._cancelled = True + + +class WelcomePage(QWizardPage): + """Welcome page of calibration wizard.""" + + def __init__(self, parent=None): + super().__init__(parent) + self.setTitle("Vision Calibration Wizard") + self.setSubTitle("Calibrate Game Vision AI for your game setup") + self.setup_ui() + + def setup_ui(self): + layout = QVBoxLayout(self) + + welcome_label = QLabel( + "

Welcome to Vision Calibration

" + "

This wizard will help you calibrate the Game Vision AI system " + "for optimal performance with your Entropia Universe setup.

" + "

You will need:

" + "" + ) + welcome_label.setWordWrap(True) + layout.addWidget(welcome_label) + + # Info box + info_group = QGroupBox("What will be calibrated?") + info_layout = QVBoxLayout(info_group) + + info_text = QLabel( + "" + ) + info_text.setWordWrap(True) + info_layout.addWidget(info_text) + + layout.addWidget(info_group) + layout.addStretch() + + +class ScreenshotSelectionPage(QWizardPage): + """Page for selecting sample screenshots.""" + + def __init__(self, parent=None): + super().__init__(parent) + self.setTitle("Select Sample Screenshots") + self.setSubTitle("Choose screenshots from your game for calibration") + self.screenshot_paths: List[Path] = [] + self.setup_ui() + + def setup_ui(self): + layout = QVBoxLayout(self) + + # Instructions + instructions = QLabel( + "Select 3-10 screenshots that represent typical game situations:\n" + "• Loot windows with items\n" + "• Inventory screens\n" + "• Chat windows with text\n" + "• HUD with gear equipped" + ) + instructions.setWordWrap(True) + layout.addWidget(instructions) + + # File list + list_group = QGroupBox("Selected Screenshots") + list_layout = QVBoxLayout(list_group) + + self.file_list = QListWidget() + list_layout.addWidget(self.file_list) + + # Buttons + btn_layout = QHBoxLayout() + + self.add_btn = QPushButton("Add Screenshots...") + self.add_btn.clicked.connect(self.add_screenshots) + btn_layout.addWidget(self.add_btn) + + self.add_dir_btn = QPushButton("Add Directory...") + self.add_dir_btn.clicked.connect(self.add_directory) + btn_layout.addWidget(self.add_dir_btn) + + self.remove_btn = QPushButton("Remove Selected") + self.remove_btn.clicked.connect(self.remove_selected) + btn_layout.addWidget(self.remove_btn) + + self.clear_btn = QPushButton("Clear All") + self.clear_btn.clicked.connect(self.clear_all) + btn_layout.addWidget(self.clear_btn) + + btn_layout.addStretch() + list_layout.addLayout(btn_layout) + + layout.addWidget(list_group) + + # Status + self.status_label = QLabel("No screenshots selected") + layout.addWidget(self.status_label) + + def add_screenshots(self): + """Add individual screenshot files.""" + files, _ = QFileDialog.getOpenFileNames( + self, "Select Screenshots", + str(Path.home()), + "Images (*.png *.jpg *.jpeg *.bmp)" + ) + + for file_path in files: + path = Path(file_path) + if path not in self.screenshot_paths: + self.screenshot_paths.append(path) + self.file_list.addItem(path.name) + + self.update_status() + + def add_directory(self): + """Add all images from a directory.""" + dir_path = QFileDialog.getExistingDirectory( + self, "Select Screenshot Directory", + str(Path.home()) + ) + + if dir_path: + path = Path(dir_path) + for ext in ['*.png', '*.jpg', '*.jpeg', '*.bmp']: + for file_path in path.glob(ext): + if file_path not in self.screenshot_paths: + self.screenshot_paths.append(file_path) + self.file_list.addItem(file_path.name) + + self.update_status() + + def remove_selected(self): + """Remove selected screenshots.""" + selected = self.file_list.currentRow() + if selected >= 0: + self.file_list.takeItem(selected) + del self.screenshot_paths[selected] + self.update_status() + + def clear_all(self): + """Clear all screenshots.""" + self.file_list.clear() + self.screenshot_paths.clear() + self.update_status() + + def update_status(self): + """Update status label.""" + count = len(self.screenshot_paths) + if count == 0: + self.status_label.setText("No screenshots selected") + elif count < 3: + self.status_label.setText(f"⚠️ {count} screenshot(s) selected (recommend at least 3)") + else: + self.status_label.setText(f"✅ {count} screenshot(s) selected") + + def validatePage(self) -> bool: + """Validate page before proceeding.""" + if len(self.screenshot_paths) < 1: + QMessageBox.warning(self, "No Screenshots", + "Please select at least one screenshot.") + return False + return True + + def get_screenshot_paths(self) -> List[Path]: + """Get selected screenshot paths.""" + return self.screenshot_paths + + +class SettingsPage(QWizardPage): + """Page for configuring calibration settings.""" + + def __init__(self, parent=None): + super().__init__(parent) + self.setTitle("Calibration Settings") + self.setSubTitle("Configure vision processing options") + self.setup_ui() + + def setup_ui(self): + layout = QVBoxLayout(self) + + # GPU Settings + gpu_group = QGroupBox("GPU Acceleration") + gpu_layout = QFormLayout(gpu_group) + + self.use_gpu_cb = QCheckBox("Use GPU for processing") + self.use_gpu_cb.setChecked(True) + self.use_gpu_cb.setToolTip( + "Enable GPU acceleration for faster processing" + ) + gpu_layout.addRow(self.use_gpu_cb) + + self.gpu_info_label = QLabel("GPU info will be detected during calibration") + gpu_layout.addRow("GPU:", self.gpu_info_label) + + layout.addWidget(gpu_group) + + # OCR Settings + ocr_group = QGroupBox("OCR (Text Recognition)") + ocr_layout = QFormLayout(ocr_group) + + self.extract_text_cb = QCheckBox("Enable text extraction") + self.extract_text_cb.setChecked(True) + ocr_layout.addRow(self.extract_text_cb) + + self.ocr_lang_combo = QComboBox() + self.ocr_lang_combo.addItem("English", "en") + self.ocr_lang_combo.addItem("Swedish", "sv") + ocr_layout.addRow("Language:", self.ocr_lang_combo) + + layout.addWidget(ocr_group) + + # Icon Settings + icon_group = QGroupBox("Icon Detection") + icon_layout = QFormLayout(icon_group) + + self.extract_icons_cb = QCheckBox("Enable icon extraction") + self.extract_icons_cb.setChecked(True) + icon_layout.addRow(self.extract_icons_cb) + + self.icon_size_combo = QComboBox() + self.icon_size_combo.addItem("Small (32x32)", "small") + self.icon_size_combo.addItem("Medium (48x48)", "medium") + self.icon_size_combo.addItem("Large (64x64)", "large") + icon_layout.addRow("Icon Size:", self.icon_size_combo) + + self.auto_detect_window_cb = QCheckBox("Auto-detect loot windows") + self.auto_detect_window_cb.setChecked(True) + icon_layout.addRow(self.auto_detect_window_cb) + + layout.addWidget(icon_group) + layout.addStretch() + + def get_settings(self) -> Dict[str, Any]: + """Get calibration settings.""" + return { + 'use_gpu': self.use_gpu_cb.isChecked(), + 'extract_text': self.extract_text_cb.isChecked(), + 'extract_icons': self.extract_icons_cb.isChecked(), + 'ocr_lang': self.ocr_lang_combo.currentData(), + 'icon_size': self.icon_size_combo.currentData(), + 'auto_detect_window': self.auto_detect_window_cb.isChecked() + } + + +class ProcessingPage(QWizardPage): + """Page for running calibration processing.""" + + def __init__(self, parent=None): + super().__init__(parent) + self.setTitle("Processing") + self.setSubTitle("Running calibration...") + self.is_complete = False + self.calibration_results: Optional[Dict] = None + self.setup_ui() + + def setup_ui(self): + layout = QVBoxLayout(self) + + # Progress + self.status_label = QLabel("Ready to start calibration") + layout.addWidget(self.status_label) + + self.progress_bar = QProgressBar() + self.progress_bar.setRange(0, 100) + self.progress_bar.setValue(0) + layout.addWidget(self.progress_bar) + + # Results area + self.results_text = QTextEdit() + self.results_text.setReadOnly(True) + self.results_text.setPlaceholderText("Calibration results will appear here...") + layout.addWidget(self.results_text) + + # Buttons + btn_layout = QHBoxLayout() + + self.start_btn = QPushButton("Start Calibration") + self.start_btn.clicked.connect(self.start_calibration) + btn_layout.addWidget(self.start_btn) + + self.cancel_btn = QPushButton("Cancel") + self.cancel_btn.clicked.connect(self.cancel_calibration) + self.cancel_btn.setEnabled(False) + btn_layout.addWidget(self.cancel_btn) + + btn_layout.addStretch() + layout.addLayout(btn_layout) + + def initializePage(self): + """Called when page is shown.""" + self.results_text.clear() + self.progress_bar.setValue(0) + self.status_label.setText("Ready to start calibration") + self.is_complete = False + self.start_btn.setEnabled(True) + + def start_calibration(self): + """Start calibration processing.""" + wizard = self.wizard() + screenshot_page = wizard.page(1) # ScreenshotSelectionPage + settings_page = wizard.page(2) # SettingsPage + + screenshot_paths = screenshot_page.get_screenshot_paths() + settings = settings_page.get_settings() + + if not screenshot_paths: + QMessageBox.warning(self, "No Screenshots", + "No screenshots selected!") + return + + self.start_btn.setEnabled(False) + self.cancel_btn.setEnabled(True) + self.status_label.setText("Starting calibration...") + + # Start worker thread + self.worker = CalibrationWorker(screenshot_paths, settings) + self.worker.progress.connect(self.on_progress) + self.worker.calibration_complete.connect(self.on_complete) + self.worker.error_occurred.connect(self.on_error) + self.worker.start() + + def on_progress(self, percentage: int, message: str): + """Handle progress update.""" + self.progress_bar.setValue(percentage) + self.status_label.setText(message) + self.results_text.append(message) + + def on_complete(self, results: Dict): + """Handle calibration completion.""" + self.calibration_results = results + self.is_complete = True + self.cancel_btn.setEnabled(False) + + # Display results + summary = f""" +Calibration Complete! + +Screenshots processed: {results['screenshots_processed']} +Text regions detected: {results['text_regions_detected']} +Icons detected: {results['icons_detected']} +""" + if 'avg_processing_time' in results: + summary += f"Average processing time: {results['avg_processing_time']:.1f}ms\n" + + if results.get('errors'): + summary += f"\nErrors: {len(results['errors'])}" + + self.results_text.append(summary) + + # Enable next button + self.completeChanged.emit() + + def on_error(self, error: str): + """Handle calibration error.""" + self.status_label.setText(f"Error: {error}") + self.results_text.append(f"❌ Error: {error}") + self.start_btn.setEnabled(True) + self.cancel_btn.setEnabled(False) + + def cancel_calibration(self): + """Cancel calibration.""" + if hasattr(self, 'worker'): + self.worker.cancel() + self.status_label.setText("Cancelling...") + + def isComplete(self) -> bool: + return self.is_complete + + def get_results(self) -> Optional[Dict]: + """Get calibration results.""" + return self.calibration_results + + +class ResultsPage(QWizardPage): + """Final page showing calibration results.""" + + def __init__(self, parent=None): + super().__init__(parent) + self.setTitle("Calibration Results") + self.setSubTitle("Review and save calibration results") + self.setup_ui() + + def setup_ui(self): + layout = QVBoxLayout(self) + + self.results_label = QLabel("Processing results will appear here...") + self.results_label.setWordWrap(True) + layout.addWidget(self.results_label) + + # Recommendations + self.recommendations_label = QLabel("") + self.recommendations_label.setWordWrap(True) + layout.addWidget(self.recommendations_label) + + layout.addStretch() + + def initializePage(self): + """Called when page is shown.""" + wizard = self.wizard() + processing_page = wizard.page(3) # ProcessingPage + results = processing_page.get_results() + + if results: + # Format results + text = f""" +

Calibration Results

+ +

Processing Summary:

+ +""" + if 'avg_processing_time' in results: + text += f""" +

Performance:

+ +""" + self.results_label.setText(text) + + # Generate recommendations + recommendations = self._generate_recommendations(results) + self.recommendations_label.setText(recommendations) + + # Save results to settings + self._save_calibration_results(results) + + def _generate_recommendations(self, results: Dict) -> str: + """Generate calibration recommendations.""" + recs = ["

Recommendations

") + return "".join(recs) + + def _save_calibration_results(self, results: Dict): + """Save calibration results to settings.""" + settings = QSettings("Lemontropia", "GameVision") + settings.setValue("calibration/last_run", time.time()) + settings.setValue("calibration/screenshots_processed", results['screenshots_processed']) + settings.setValue("calibration/avg_processing_time", results.get('avg_processing_time', 0)) + settings.setValue("calibration/text_detection_rate", results['text_regions_detected']) + settings.setValue("calibration/icon_detection_rate", results['icons_detected']) + settings.sync() + + +class VisionCalibrationWizard(QWizard): + """ + Wizard for calibrating Game Vision AI. + """ + + calibration_complete = pyqtSignal(dict) + + def __init__(self, parent=None): + super().__init__(parent) + self.setWindowTitle("Vision Calibration Wizard") + self.setMinimumSize(700, 550) + + # Add pages + self.addPage(WelcomePage()) + self.addPage(ScreenshotSelectionPage()) + self.addPage(SettingsPage()) + self.addPage(ProcessingPage()) + self.addPage(ResultsPage()) + + self.setWizardStyle(QWizard.WizardStyle.ModernStyle) + + def accept(self): + """Handle wizard completion.""" + processing_page = self.page(3) + results = processing_page.get_results() + + if results: + self.calibration_complete.emit(results) + + super().accept() + + +# Export +__all__ = ['VisionCalibrationWizard', 'CalibrationWorker'] diff --git a/ui/vision_settings_dialog.py b/ui/vision_settings_dialog.py new file mode 100644 index 0000000..25c18b1 --- /dev/null +++ b/ui/vision_settings_dialog.py @@ -0,0 +1,645 @@ +""" +Lemontropia Suite - Vision Settings Dialog +Settings panel for configuring Game Vision AI. +""" + +import sys +from pathlib import Path +from typing import Optional + +from PyQt6.QtWidgets import ( + QDialog, QVBoxLayout, QHBoxLayout, QLabel, QLineEdit, + QPushButton, QComboBox, QCheckBox, QGroupBox, QFormLayout, + QMessageBox, QSpinBox, QDoubleSpinBox, QTabWidget, + QFileDialog, QTextEdit, QProgressBar, QWidget, QSlider +) +from PyQt6.QtCore import Qt, QSettings, QThread, pyqtSignal +from PyQt6.QtGui import QFont, QPixmap + +import logging + +logger = logging.getLogger(__name__) + + +class GPUInfoThread(QThread): + """Thread to gather GPU information.""" + + info_ready = pyqtSignal(dict) + error_occurred = pyqtSignal(str) + + def run(self): + try: + from modules.game_vision_ai import GPUDetector + info = GPUDetector.get_gpu_info() + self.info_ready.emit(info) + except Exception as e: + self.error_occurred.emit(str(e)) + + +class VisionSettingsDialog(QDialog): + """ + Settings dialog for Game Vision AI configuration. + """ + + settings_saved = pyqtSignal() + + def __init__(self, parent=None): + super().__init__(parent) + self.setWindowTitle("Game Vision Settings") + self.setMinimumSize(600, 500) + + self.settings = QSettings("Lemontropia", "GameVision") + self.gpu_info = {} + + self.setup_ui() + self.load_settings() + self.refresh_gpu_info() + + def setup_ui(self): + """Setup the dialog UI.""" + layout = QVBoxLayout(self) + layout.setSpacing(15) + + # Title + title_label = QLabel("🎮 Game Vision AI Settings") + title_font = QFont() + title_font.setPointSize(14) + title_font.setBold(True) + title_label.setFont(title_font) + layout.addWidget(title_label) + + # Description + desc_label = QLabel( + "Configure AI-powered computer vision for automatic game UI analysis." + ) + desc_label.setWordWrap(True) + layout.addWidget(desc_label) + + # Tabs + self.tabs = QTabWidget() + layout.addWidget(self.tabs) + + # General tab + self.tabs.addTab(self._create_general_tab(), "General") + + # GPU tab + self.tabs.addTab(self._create_gpu_tab(), "GPU & Performance") + + # OCR tab + self.tabs.addTab(self._create_ocr_tab(), "OCR Settings") + + # Icon Detection tab + self.tabs.addTab(self._create_icon_tab(), "Icon Detection") + + # Buttons + button_layout = QHBoxLayout() + button_layout.addStretch() + + self.reset_btn = QPushButton("Reset to Defaults") + self.reset_btn.clicked.connect(self.reset_settings) + button_layout.addWidget(self.reset_btn) + + self.test_btn = QPushButton("Test Vision...") + self.test_btn.clicked.connect(self.open_test_dialog) + button_layout.addWidget(self.test_btn) + + self.save_btn = QPushButton("Save") + self.save_btn.clicked.connect(self.save_settings) + self.save_btn.setDefault(True) + button_layout.addWidget(self.save_btn) + + self.cancel_btn = QPushButton("Cancel") + self.cancel_btn.clicked.connect(self.reject) + button_layout.addWidget(self.cancel_btn) + + layout.addLayout(button_layout) + + def _create_general_tab(self) -> QWidget: + """Create general settings tab.""" + tab = QWidget() + layout = QVBoxLayout(tab) + layout.setSpacing(15) + + # Enable Vision + self.enable_vision_cb = QCheckBox("Enable Game Vision AI") + self.enable_vision_cb.setToolTip( + "Enable automatic screenshot analysis using AI" + ) + layout.addWidget(self.enable_vision_cb) + + # Auto Processing + self.auto_process_cb = QCheckBox("Auto-process screenshots") + self.auto_process_cb.setToolTip( + "Automatically analyze screenshots when captured" + ) + layout.addWidget(self.auto_process_cb) + + # Data Directory + dir_group = QGroupBox("Data Directories") + dir_layout = QFormLayout(dir_group) + + # Extracted icons directory + icons_dir_layout = QHBoxLayout() + self.icons_dir_input = QLineEdit() + self.icons_dir_input.setReadOnly(True) + icons_dir_layout.addWidget(self.icons_dir_input) + + self.icons_dir_btn = QPushButton("Browse...") + self.icons_dir_btn.clicked.connect(self.browse_icons_dir) + icons_dir_layout.addWidget(self.icons_dir_btn) + + dir_layout.addRow("Extracted Icons:", icons_dir_layout) + + # Icon database directory + db_dir_layout = QHBoxLayout() + self.db_dir_input = QLineEdit() + self.db_dir_input.setReadOnly(True) + db_dir_layout.addWidget(self.db_dir_input) + + self.db_dir_btn = QPushButton("Browse...") + self.db_dir_btn.clicked.connect(self.browse_db_dir) + db_dir_layout.addWidget(self.db_dir_btn) + + dir_layout.addRow("Icon Database:", db_dir_layout) + + layout.addWidget(dir_group) + + # Processing Options + options_group = QGroupBox("Processing Options") + options_layout = QFormLayout(options_group) + + self.extract_text_cb = QCheckBox("Extract text (OCR)") + self.extract_text_cb.setChecked(True) + options_layout.addRow(self.extract_text_cb) + + self.extract_icons_cb = QCheckBox("Extract icons") + self.extract_icons_cb.setChecked(True) + options_layout.addRow(self.extract_icons_cb) + + self.save_icons_cb = QCheckBox("Save extracted icons to disk") + self.save_icons_cb.setChecked(True) + options_layout.addRow(self.save_icons_cb) + + self.match_icons_cb = QCheckBox("Match icons to database") + self.match_icons_cb.setChecked(True) + options_layout.addRow(self.match_icons_cb) + + layout.addWidget(options_group) + layout.addStretch() + + return tab + + def _create_gpu_tab(self) -> QWidget: + """Create GPU settings tab.""" + tab = QWidget() + layout = QVBoxLayout(tab) + layout.setSpacing(15) + + # GPU Info Group + gpu_group = QGroupBox("GPU Information") + gpu_layout = QVBoxLayout(gpu_group) + + self.gpu_info_label = QLabel("Detecting GPU...") + self.gpu_info_label.setWordWrap(True) + gpu_layout.addWidget(self.gpu_info_label) + + self.gpu_details = QTextEdit() + self.gpu_details.setReadOnly(True) + self.gpu_details.setMaximumHeight(100) + gpu_layout.addWidget(self.gpu_details) + + self.refresh_gpu_btn = QPushButton("Refresh GPU Info") + self.refresh_gpu_btn.clicked.connect(self.refresh_gpu_info) + gpu_layout.addWidget(self.refresh_gpu_btn) + + layout.addWidget(gpu_group) + + # GPU Settings + settings_group = QGroupBox("GPU Acceleration") + settings_layout = QFormLayout(settings_group) + + self.use_gpu_cb = QCheckBox("Use GPU acceleration") + self.use_gpu_cb.setToolTip( + "Enable GPU acceleration for OCR and vision processing" + ) + settings_layout.addRow(self.use_gpu_cb) + + # GPU Backend selection + self.backend_combo = QComboBox() + self.backend_combo.addItem("Auto-detect", "auto") + self.backend_combo.addItem("CUDA (NVIDIA)", "cuda") + self.backend_combo.addItem("MPS (Apple Silicon)", "mps") + self.backend_combo.addItem("DirectML (Windows)", "directml") + self.backend_combo.addItem("CPU only", "cpu") + settings_layout.addRow("Preferred Backend:", self.backend_combo) + + layout.addWidget(settings_group) + + # Performance Settings + perf_group = QGroupBox("Performance") + perf_layout = QFormLayout(perf_group) + + self.batch_size_spin = QSpinBox() + self.batch_size_spin.setRange(1, 16) + self.batch_size_spin.setValue(1) + self.batch_size_spin.setToolTip( + "Number of images to process in parallel (higher = faster but more VRAM)" + ) + perf_layout.addRow("Batch Size:", self.batch_size_spin) + + self.threads_spin = QSpinBox() + self.threads_spin.setRange(1, 8) + self.threads_spin.setValue(2) + perf_layout.addRow("Processing Threads:", self.threads_spin) + + layout.addWidget(perf_group) + layout.addStretch() + + return tab + + def _create_ocr_tab(self) -> QWidget: + """Create OCR settings tab.""" + tab = QWidget() + layout = QVBoxLayout(tab) + layout.setSpacing(15) + + # Language Settings + lang_group = QGroupBox("Language Settings") + lang_layout = QFormLayout(lang_group) + + self.ocr_lang_combo = QComboBox() + self.ocr_lang_combo.addItem("English", "en") + self.ocr_lang_combo.addItem("Swedish", "sv") + self.ocr_lang_combo.addItem("Latin Script (Generic)", "latin") + lang_layout.addRow("OCR Language:", self.ocr_lang_combo) + + self.multi_lang_cb = QCheckBox("Enable multi-language detection") + lang_layout.addRow(self.multi_lang_cb) + + layout.addWidget(lang_group) + + # OCR Parameters + params_group = QGroupBox("OCR Parameters") + params_layout = QFormLayout(params_group) + + self.det_thresh_spin = QDoubleSpinBox() + self.det_thresh_spin.setRange(0.1, 0.9) + self.det_thresh_spin.setValue(0.3) + self.det_thresh_spin.setSingleStep(0.05) + self.det_thresh_spin.setToolTip( + "Text detection threshold (lower = more sensitive)" + ) + params_layout.addRow("Detection Threshold:", self.det_thresh_spin) + + self.rec_thresh_spin = QDoubleSpinBox() + self.rec_thresh_spin.setRange(0.1, 0.9) + self.rec_thresh_spin.setValue(0.5) + self.rec_thresh_spin.setSingleStep(0.05) + self.rec_thresh_spin.setToolTip( + "Text recognition confidence threshold" + ) + params_layout.addRow("Recognition Threshold:", self.rec_thresh_spin) + + self.use_angle_cls_cb = QCheckBox("Use angle classifier") + self.use_angle_cls_cb.setChecked(True) + self.use_angle_cls_cb.setToolTip( + "Detect and correct rotated text (slower but more accurate)" + ) + params_layout.addRow(self.use_angle_cls_cb) + + layout.addWidget(params_group) + + # Preprocessing + preprocess_group = QGroupBox("Preprocessing") + preprocess_layout = QFormLayout(preprocess_group) + + self.denoise_cb = QCheckBox("Apply denoising") + self.denoise_cb.setChecked(True) + preprocess_layout.addRow(self.denoise_cb) + + self.contrast_enhance_cb = QCheckBox("Enhance contrast") + self.contrast_enhance_cb.setChecked(True) + preprocess_layout.addRow(self.contrast_enhance_cb) + + layout.addWidget(preprocess_group) + layout.addStretch() + + return tab + + def _create_icon_tab(self) -> QWidget: + """Create icon detection settings tab.""" + tab = QWidget() + layout = QVBoxLayout(tab) + layout.setSpacing(15) + + # Detection Settings + detect_group = QGroupBox("Detection Settings") + detect_layout = QFormLayout(detect_group) + + self.auto_detect_window_cb = QCheckBox("Auto-detect loot windows") + self.auto_detect_window_cb.setChecked(True) + self.auto_detect_window_cb.setToolTip( + "Automatically detect loot windows in screenshots" + ) + detect_layout.addRow(self.auto_detect_window_cb) + + self.icon_size_combo = QComboBox() + self.icon_size_combo.addItem("Small (32x32)", "small") + self.icon_size_combo.addItem("Medium (48x48)", "medium") + self.icon_size_combo.addItem("Large (64x64)", "large") + self.icon_size_combo.addItem("HUD (40x40)", "hud") + detect_layout.addRow("Icon Size:", self.icon_size_combo) + + self.confidence_thresh_spin = QDoubleSpinBox() + self.confidence_thresh_spin.setRange(0.1, 1.0) + self.confidence_thresh_spin.setValue(0.7) + self.confidence_thresh_spin.setSingleStep(0.05) + detect_layout.addRow("Detection Confidence:", self.confidence_thresh_spin) + + layout.addWidget(detect_group) + + # Matching Settings + match_group = QGroupBox("Icon Matching") + match_layout = QFormLayout(match_group) + + self.hash_match_cb = QCheckBox("Use perceptual hashing") + self.hash_match_cb.setChecked(True) + match_layout.addRow(self.hash_match_cb) + + self.feature_match_cb = QCheckBox("Use feature matching (ORB)") + self.feature_match_cb.setChecked(True) + match_layout.addRow(self.feature_match_cb) + + self.template_match_cb = QCheckBox("Use template matching") + self.template_match_cb.setChecked(True) + match_layout.addRow(self.template_match_cb) + + self.match_thresh_spin = QDoubleSpinBox() + self.match_thresh_spin.setRange(0.1, 1.0) + self.match_thresh_spin.setValue(0.70) + self.match_thresh_spin.setSingleStep(0.05) + self.match_thresh_spin.setToolTip( + "Minimum confidence for icon match" + ) + match_layout.addRow("Match Threshold:", self.match_thresh_spin) + + layout.addWidget(match_group) + + # Template Directory + template_group = QGroupBox("Template Directory") + template_layout = QHBoxLayout(template_group) + + self.template_dir_input = QLineEdit() + self.template_dir_input.setReadOnly(True) + template_layout.addWidget(self.template_dir_input) + + self.template_dir_btn = QPushButton("Browse...") + self.template_dir_btn.clicked.connect(self.browse_template_dir) + template_layout.addWidget(self.template_dir_btn) + + layout.addWidget(template_group) + layout.addStretch() + + return tab + + def refresh_gpu_info(self): + """Refresh GPU information display.""" + self.gpu_info_label.setText("Detecting GPU...") + self.refresh_gpu_btn.setEnabled(False) + + self.gpu_thread = GPUInfoThread() + self.gpu_thread.info_ready.connect(self.on_gpu_info_ready) + self.gpu_thread.error_occurred.connect(self.on_gpu_error) + self.gpu_thread.start() + + def on_gpu_info_ready(self, info: dict): + """Handle GPU info received.""" + self.gpu_info = info + + backend = info.get('backend', 'cpu') + cuda_available = info.get('cuda_available', False) + mps_available = info.get('mps_available', False) + + # Update label + if backend == 'cuda': + devices = info.get('devices', []) + if devices: + device_name = devices[0].get('name', 'Unknown') + memory_gb = devices[0].get('memory_total', 0) / (1024**3) + self.gpu_info_label.setText( + f"✅ GPU Detected: {device_name} ({memory_gb:.1f} GB)" + ) + else: + self.gpu_info_label.setText("✅ CUDA Available") + elif backend == 'mps': + self.gpu_info_label.setText("✅ Apple MPS (Metal) Available") + elif backend == 'directml': + self.gpu_info_label.setText("✅ DirectML Available") + else: + self.gpu_info_label.setText("⚠️ No GPU detected - CPU only") + + # Update details + details = f"Backend: {backend}\n" + details += f"CUDA Available: {cuda_available}\n" + details += f"MPS Available: {mps_available}\n" + + if info.get('devices'): + for dev in info['devices']: + details += f"\nDevice {dev['id']}: {dev['name']}" + + self.gpu_details.setText(details) + self.refresh_gpu_btn.setEnabled(True) + + def on_gpu_error(self, error: str): + """Handle GPU detection error.""" + self.gpu_info_label.setText(f"❌ Error detecting GPU: {error}") + self.refresh_gpu_btn.setEnabled(True) + + def browse_icons_dir(self): + """Browse for extracted icons directory.""" + dir_path = QFileDialog.getExistingDirectory( + self, "Select Extracted Icons Directory", + self.icons_dir_input.text() or str(Path.home()) + ) + if dir_path: + self.icons_dir_input.setText(dir_path) + + def browse_db_dir(self): + """Browse for database directory.""" + dir_path = QFileDialog.getExistingDirectory( + self, "Select Database Directory", + self.db_dir_input.text() or str(Path.home()) + ) + if dir_path: + self.db_dir_input.setText(dir_path) + + def browse_template_dir(self): + """Browse for template directory.""" + dir_path = QFileDialog.getExistingDirectory( + self, "Select Template Directory", + self.template_dir_input.text() or str(Path.home()) + ) + if dir_path: + self.template_dir_input.setText(dir_path) + + def load_settings(self): + """Load settings from QSettings.""" + # General + self.enable_vision_cb.setChecked( + self.settings.value("vision/enabled", True, bool) + ) + self.auto_process_cb.setChecked( + self.settings.value("vision/auto_process", False, bool) + ) + self.icons_dir_input.setText( + self.settings.value("vision/icons_dir", "", str) + ) + self.db_dir_input.setText( + self.settings.value("vision/db_dir", "", str) + ) + self.extract_text_cb.setChecked( + self.settings.value("vision/extract_text", True, bool) + ) + self.extract_icons_cb.setChecked( + self.settings.value("vision/extract_icons", True, bool) + ) + self.save_icons_cb.setChecked( + self.settings.value("vision/save_icons", True, bool) + ) + self.match_icons_cb.setChecked( + self.settings.value("vision/match_icons", True, bool) + ) + + # GPU + self.use_gpu_cb.setChecked( + self.settings.value("vision/use_gpu", True, bool) + ) + backend = self.settings.value("vision/gpu_backend", "auto", str) + index = self.backend_combo.findData(backend) + if index >= 0: + self.backend_combo.setCurrentIndex(index) + self.batch_size_spin.setValue( + self.settings.value("vision/batch_size", 1, int) + ) + self.threads_spin.setValue( + self.settings.value("vision/threads", 2, int) + ) + + # OCR + lang = self.settings.value("vision/ocr_lang", "en", str) + index = self.ocr_lang_combo.findData(lang) + if index >= 0: + self.ocr_lang_combo.setCurrentIndex(index) + self.multi_lang_cb.setChecked( + self.settings.value("vision/multi_lang", False, bool) + ) + self.det_thresh_spin.setValue( + self.settings.value("vision/det_thresh", 0.3, float) + ) + self.rec_thresh_spin.setValue( + self.settings.value("vision/rec_thresh", 0.5, float) + ) + self.use_angle_cls_cb.setChecked( + self.settings.value("vision/use_angle_cls", True, bool) + ) + self.denoise_cb.setChecked( + self.settings.value("vision/denoise", True, bool) + ) + self.contrast_enhance_cb.setChecked( + self.settings.value("vision/contrast_enhance", True, bool) + ) + + # Icon Detection + self.auto_detect_window_cb.setChecked( + self.settings.value("vision/auto_detect_window", True, bool) + ) + icon_size = self.settings.value("vision/icon_size", "medium", str) + index = self.icon_size_combo.findData(icon_size) + if index >= 0: + self.icon_size_combo.setCurrentIndex(index) + self.confidence_thresh_spin.setValue( + self.settings.value("vision/confidence_thresh", 0.7, float) + ) + self.hash_match_cb.setChecked( + self.settings.value("vision/hash_match", True, bool) + ) + self.feature_match_cb.setChecked( + self.settings.value("vision/feature_match", True, bool) + ) + self.template_match_cb.setChecked( + self.settings.value("vision/template_match", True, bool) + ) + self.match_thresh_spin.setValue( + self.settings.value("vision/match_thresh", 0.70, float) + ) + self.template_dir_input.setText( + self.settings.value("vision/template_dir", "", str) + ) + + def save_settings(self): + """Save settings to QSettings.""" + # General + self.settings.setValue("vision/enabled", self.enable_vision_cb.isChecked()) + self.settings.setValue("vision/auto_process", self.auto_process_cb.isChecked()) + self.settings.setValue("vision/icons_dir", self.icons_dir_input.text()) + self.settings.setValue("vision/db_dir", self.db_dir_input.text()) + self.settings.setValue("vision/extract_text", self.extract_text_cb.isChecked()) + self.settings.setValue("vision/extract_icons", self.extract_icons_cb.isChecked()) + self.settings.setValue("vision/save_icons", self.save_icons_cb.isChecked()) + self.settings.setValue("vision/match_icons", self.match_icons_cb.isChecked()) + + # GPU + self.settings.setValue("vision/use_gpu", self.use_gpu_cb.isChecked()) + self.settings.setValue("vision/gpu_backend", self.backend_combo.currentData()) + self.settings.setValue("vision/batch_size", self.batch_size_spin.value()) + self.settings.setValue("vision/threads", self.threads_spin.value()) + + # OCR + self.settings.setValue("vision/ocr_lang", self.ocr_lang_combo.currentData()) + self.settings.setValue("vision/multi_lang", self.multi_lang_cb.isChecked()) + self.settings.setValue("vision/det_thresh", self.det_thresh_spin.value()) + self.settings.setValue("vision/rec_thresh", self.rec_thresh_spin.value()) + self.settings.setValue("vision/use_angle_cls", self.use_angle_cls_cb.isChecked()) + self.settings.setValue("vision/denoise", self.denoise_cb.isChecked()) + self.settings.setValue("vision/contrast_enhance", self.contrast_enhance_cb.isChecked()) + + # Icon Detection + self.settings.setValue("vision/auto_detect_window", self.auto_detect_window_cb.isChecked()) + self.settings.setValue("vision/icon_size", self.icon_size_combo.currentData()) + self.settings.setValue("vision/confidence_thresh", self.confidence_thresh_spin.value()) + self.settings.setValue("vision/hash_match", self.hash_match_cb.isChecked()) + self.settings.setValue("vision/feature_match", self.feature_match_cb.isChecked()) + self.settings.setValue("vision/template_match", self.template_match_cb.isChecked()) + self.settings.setValue("vision/match_thresh", self.match_thresh_spin.value()) + self.settings.setValue("vision/template_dir", self.template_dir_input.text()) + + self.settings.sync() + + self.settings_saved.emit() + self.accept() + + logger.info("Vision settings saved") + + def reset_settings(self): + """Reset settings to defaults.""" + reply = QMessageBox.question( + self, "Reset Settings", + "Are you sure you want to reset all vision settings to defaults?", + QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No + ) + + if reply == QMessageBox.StandardButton.Yes: + self.settings.clear() + self.load_settings() + QMessageBox.information(self, "Reset Complete", + "Settings have been reset to defaults.") + + def open_test_dialog(self): + """Open vision test dialog.""" + from .vision_test_dialog import VisionTestDialog + dialog = VisionTestDialog(self) + dialog.exec() + + +# Export +__all__ = ['VisionSettingsDialog'] diff --git a/ui/vision_test_dialog.py b/ui/vision_test_dialog.py new file mode 100644 index 0000000..e839330 --- /dev/null +++ b/ui/vision_test_dialog.py @@ -0,0 +1,470 @@ +""" +Lemontropia Suite - Vision Test Dialog +Test and debug Game Vision AI functionality. +""" + +import time +from pathlib import Path +from typing import Optional + +from PyQt6.QtWidgets import ( + QDialog, QVBoxLayout, QHBoxLayout, QLabel, QLineEdit, + QPushButton, QComboBox, QCheckBox, QGroupBox, QFormLayout, + QMessageBox, QFileDialog, QTextEdit, QProgressBar, + QListWidget, QListWidgetItem, QSplitter, QWidget, + QTableWidget, QTableWidgetItem, QHeaderView +) +from PyQt6.QtCore import Qt, QThread, pyqtSignal +from PyQt6.QtGui import QPixmap, QImage, QFont +import numpy as np + +import logging + +logger = logging.getLogger(__name__) + + +class VisionTestWorker(QThread): + """Worker thread for vision testing.""" + + test_complete = pyqtSignal(dict) + progress = pyqtSignal(str) + error_occurred = pyqtSignal(str) + + def __init__(self, image_path: Path, settings: dict): + super().__init__() + self.image_path = image_path + self.settings = settings + + def run(self): + try: + from modules.game_vision_ai import GameVisionAI + + self.progress.emit("Initializing Game Vision AI...") + + vision = GameVisionAI( + use_gpu=self.settings.get('use_gpu', True), + ocr_lang=self.settings.get('ocr_lang', 'en') + ) + + self.progress.emit("Processing image...") + + start_time = time.time() + result = vision.process_screenshot( + self.image_path, + extract_text=self.settings.get('extract_text', True), + extract_icons=self.settings.get('extract_icons', True) + ) + processing_time = (time.time() - start_time) * 1000 + + # Prepare results + test_results = { + 'success': True, + 'processing_time_ms': processing_time, + 'gpu_backend': result.gpu_backend, + 'text_regions': [ + { + 'text': t.text, + 'confidence': t.confidence, + 'bbox': t.bbox, + 'language': t.language + } + for t in result.text_regions + ], + 'icon_regions': [ + { + 'bbox': i.bbox, + 'confidence': i.confidence, + 'hash': i.icon_hash[:16] # Truncated hash + } + for i in result.icon_regions + ], + 'text_count': len(result.text_regions), + 'icon_count': len(result.icon_regions) + } + + self.test_complete.emit(test_results) + + except Exception as e: + self.error_occurred.emit(str(e)) + + +class VisionTestDialog(QDialog): + """ + Dialog for testing and debugging Game Vision AI. + """ + + def __init__(self, parent=None): + super().__init__(parent) + self.setWindowTitle("Test Game Vision") + self.setMinimumSize(900, 700) + + self.current_image_path: Optional[Path] = None + self.current_results: Optional[dict] = None + + self.setup_ui() + + def setup_ui(self): + """Setup dialog UI.""" + layout = QVBoxLayout(self) + layout.setSpacing(10) + + # Title + title_label = QLabel("🧪 Game Vision Test & Debug") + title_font = QFont() + title_font.setPointSize(14) + title_font.setBold(True) + title_label.setFont(title_font) + layout.addWidget(title_label) + + # Main splitter + splitter = QSplitter(Qt.Orientation.Horizontal) + layout.addWidget(splitter) + + # Left panel - Controls + left_panel = QWidget() + left_layout = QVBoxLayout(left_panel) + left_layout.setContentsMargins(5, 5, 5, 5) + + # Image selection + image_group = QGroupBox("Test Image") + image_layout = QVBoxLayout(image_group) + + self.image_path_label = QLabel("No image selected") + self.image_path_label.setWordWrap(True) + image_layout.addWidget(self.image_path_label) + + image_btn_layout = QHBoxLayout() + + self.browse_btn = QPushButton("Browse...") + self.browse_btn.clicked.connect(self.browse_image) + image_btn_layout.addWidget(self.browse_btn) + + self.capture_btn = QPushButton("Capture Screen") + self.capture_btn.clicked.connect(self.capture_screen) + image_btn_layout.addWidget(self.capture_btn) + + image_btn_layout.addStretch() + image_layout.addLayout(image_btn_layout) + + left_layout.addWidget(image_group) + + # Test settings + settings_group = QGroupBox("Test Settings") + settings_layout = QFormLayout(settings_group) + + self.use_gpu_cb = QCheckBox("Use GPU acceleration") + self.use_gpu_cb.setChecked(True) + settings_layout.addRow(self.use_gpu_cb) + + self.extract_text_cb = QCheckBox("Extract text (OCR)") + self.extract_text_cb.setChecked(True) + settings_layout.addRow(self.extract_text_cb) + + self.extract_icons_cb = QCheckBox("Extract icons") + self.extract_icons_cb.setChecked(True) + settings_layout.addRow(self.extract_icons_cb) + + self.ocr_lang_combo = QComboBox() + self.ocr_lang_combo.addItem("English", "en") + self.ocr_lang_combo.addItem("Swedish", "sv") + settings_layout.addRow("OCR Language:", self.ocr_lang_combo) + + left_layout.addWidget(settings_group) + + # Run test button + self.test_btn = QPushButton("▶ Run Vision Test") + self.test_btn.setStyleSheet(""" + QPushButton { + background-color: #4CAF50; + color: white; + font-weight: bold; + padding: 10px; + } + QPushButton:hover { + background-color: #45a049; + } + QPushButton:disabled { + background-color: #cccccc; + } + """) + self.test_btn.clicked.connect(self.run_test) + self.test_btn.setEnabled(False) + left_layout.addWidget(self.test_btn) + + # Progress + self.progress_label = QLabel("") + left_layout.addWidget(self.progress_label) + + self.progress_bar = QProgressBar() + self.progress_bar.setRange(0, 0) # Indeterminate + self.progress_bar.setVisible(False) + left_layout.addWidget(self.progress_bar) + + # GPU Info + gpu_group = QGroupBox("GPU Information") + gpu_layout = QVBoxLayout(gpu_group) + + self.gpu_info_label = QLabel("Click 'Check GPU' to detect") + self.gpu_info_label.setWordWrap(True) + gpu_layout.addWidget(self.gpu_info_label) + + self.check_gpu_btn = QPushButton("Check GPU") + self.check_gpu_btn.clicked.connect(self.check_gpu) + gpu_layout.addWidget(self.check_gpu_btn) + + left_layout.addWidget(gpu_group) + left_layout.addStretch() + + splitter.addWidget(left_panel) + + # Right panel - Results + right_panel = QWidget() + right_layout = QVBoxLayout(right_panel) + right_layout.setContentsMargins(5, 5, 5, 5) + + # Image preview + preview_group = QGroupBox("Image Preview") + preview_layout = QVBoxLayout(preview_group) + + self.preview_label = QLabel("No image loaded") + self.preview_label.setAlignment(Qt.AlignmentFlag.AlignCenter) + self.preview_label.setMinimumHeight(200) + self.preview_label.setStyleSheet("background-color: #f0f0f0; border: 1px solid #ccc;") + preview_layout.addWidget(self.preview_label) + + right_layout.addWidget(preview_group) + + # Results tabs + from PyQt6.QtWidgets import QTabWidget + self.results_tabs = QTabWidget() + right_layout.addWidget(self.results_tabs) + + # Summary tab + self.summary_tab = QTextEdit() + self.summary_tab.setReadOnly(True) + self.results_tabs.addTab(self.summary_tab, "Summary") + + # Text regions tab + self.text_table = QTableWidget() + self.text_table.setColumnCount(4) + self.text_table.setHorizontalHeaderLabels(["Text", "Confidence", "Position", "Language"]) + self.text_table.horizontalHeader().setSectionResizeMode(0, QHeaderView.ResizeMode.Stretch) + self.results_tabs.addTab(self.text_table, "Text Regions") + + # Icon regions tab + self.icon_table = QTableWidget() + self.icon_table.setColumnCount(3) + self.icon_table.setHorizontalHeaderLabels(["Position", "Confidence", "Hash"]) + self.icon_table.horizontalHeader().setSectionResizeMode(0, QHeaderView.ResizeMode.Stretch) + self.results_tabs.addTab(self.icon_table, "Icon Regions") + + # Log tab + self.log_text = QTextEdit() + self.log_text.setReadOnly(True) + self.results_tabs.addTab(self.log_text, "Log") + + splitter.addWidget(right_panel) + splitter.setSizes([300, 600]) + + # Close button + btn_layout = QHBoxLayout() + btn_layout.addStretch() + + self.close_btn = QPushButton("Close") + self.close_btn.clicked.connect(self.accept) + btn_layout.addWidget(self.close_btn) + + layout.addLayout(btn_layout) + + def browse_image(self): + """Browse for test image.""" + file_path, _ = QFileDialog.getOpenFileName( + self, "Select Test Image", + str(Path.home()), + "Images (*.png *.jpg *.jpeg *.bmp)" + ) + + if file_path: + self.load_image(Path(file_path)) + + def capture_screen(self): + """Capture screen for testing.""" + try: + import mss + import numpy as np + import cv2 + from PyQt6.QtGui import QImage, QPixmap + + self.progress_label.setText("Capturing screen...") + + with mss.mss() as sct: + monitor = sct.monitors[1] # Primary monitor + screenshot = sct.grab(monitor) + + # Convert to numpy array + img = np.array(screenshot) + img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR) + + # Save temporarily + temp_path = Path.home() / ".lemontropia" / "temp_capture.png" + temp_path.parent.mkdir(parents=True, exist_ok=True) + cv2.imwrite(str(temp_path), img) + + self.load_image(temp_path) + self.progress_label.setText("Screen captured") + + except Exception as e: + QMessageBox.critical(self, "Capture Failed", f"Failed to capture screen: {e}") + self.progress_label.setText("") + + def load_image(self, image_path: Path): + """Load and display image.""" + self.current_image_path = image_path + self.image_path_label.setText(str(image_path)) + + # Load and display preview + pixmap = QPixmap(str(image_path)) + if not pixmap.isNull(): + # Scale to fit + scaled = pixmap.scaled( + self.preview_label.size(), + Qt.AspectRatioMode.KeepAspectRatio, + Qt.TransformationMode.SmoothTransformation + ) + self.preview_label.setPixmap(scaled) + self.test_btn.setEnabled(True) + else: + self.preview_label.setText("Failed to load image") + self.test_btn.setEnabled(False) + + def run_test(self): + """Run vision test.""" + if not self.current_image_path: + QMessageBox.warning(self, "No Image", "Please select an image first.") + return + + # Collect settings + settings = { + 'use_gpu': self.use_gpu_cb.isChecked(), + 'extract_text': self.extract_text_cb.isChecked(), + 'extract_icons': self.extract_icons_cb.isChecked(), + 'ocr_lang': self.ocr_lang_combo.currentData() + } + + # Disable controls + self.test_btn.setEnabled(False) + self.browse_btn.setEnabled(False) + self.capture_btn.setEnabled(False) + self.progress_bar.setVisible(True) + self.progress_label.setText("Running vision test...") + + # Clear previous results + self.summary_tab.clear() + self.text_table.setRowCount(0) + self.icon_table.setRowCount(0) + + # Start worker + self.worker = VisionTestWorker(self.current_image_path, settings) + self.worker.test_complete.connect(self.on_test_complete) + self.worker.progress.connect(self.on_test_progress) + self.worker.error_occurred.connect(self.on_test_error) + self.worker.start() + + def on_test_progress(self, message: str): + """Handle test progress.""" + self.progress_label.setText(message) + self.log_text.append(f"[{time.strftime('%H:%M:%S')}] {message}") + + def on_test_complete(self, results: dict): + """Handle test completion.""" + self.current_results = results + + # Re-enable controls + self.test_btn.setEnabled(True) + self.browse_btn.setEnabled(True) + self.capture_btn.setEnabled(True) + self.progress_bar.setVisible(False) + self.progress_label.setText("Test complete!") + + # Update summary + summary = f""" +

Vision Test Results

+ +

Processing Time: {results['processing_time_ms']:.1f}ms

+

GPU Backend: {results['gpu_backend']}

+

Text Regions Detected: {results['text_count']}

+

Icon Regions Detected: {results['icon_count']}

+""" + self.summary_tab.setHtml(summary) + + # Update text table + self.text_table.setRowCount(len(results['text_regions'])) + for i, text in enumerate(results['text_regions']): + self.text_table.setItem(i, 0, QTableWidgetItem(text['text'])) + self.text_table.setItem(i, 1, QTableWidgetItem(f"{text['confidence']:.2%}")) + bbox_str = f"({text['bbox'][0]}, {text['bbox'][1]})" + self.text_table.setItem(i, 2, QTableWidgetItem(bbox_str)) + self.text_table.setItem(i, 3, QTableWidgetItem(text['language'])) + + # Update icon table + self.icon_table.setRowCount(len(results['icon_regions'])) + for i, icon in enumerate(results['icon_regions']): + bbox_str = f"({icon['bbox'][0]}, {icon['bbox'][1]}, {icon['bbox'][2]}x{icon['bbox'][3]})" + self.icon_table.setItem(i, 0, QTableWidgetItem(bbox_str)) + self.icon_table.setItem(i, 1, QTableWidgetItem(f"{icon['confidence']:.2%}")) + self.icon_table.setItem(i, 2, QTableWidgetItem(icon['hash'])) + + logger.info(f"Vision test complete: {results['text_count']} texts, {results['icon_count']} icons") + + def on_test_error(self, error: str): + """Handle test error.""" + self.test_btn.setEnabled(True) + self.browse_btn.setEnabled(True) + self.capture_btn.setEnabled(True) + self.progress_bar.setVisible(False) + self.progress_label.setText(f"Error: {error}") + + QMessageBox.critical(self, "Test Failed", f"Vision test failed:\n{error}") + self.log_text.append(f"[ERROR] {error}") + + logger.error(f"Vision test failed: {error}") + + def check_gpu(self): + """Check GPU availability.""" + try: + from modules.game_vision_ai import GPUDetector + + info = GPUDetector.get_gpu_info() + + text = f""" +GPU Information
+Backend: {info['backend']}
+CUDA Available: {info['cuda_available']}
+MPS Available: {info['mps_available']}
+""" + if info.get('devices'): + for dev in info['devices']: + mem_gb = dev.get('memory_total', 0) / (1024**3) + text += f"Device {dev['id']}: {dev['name']} ({mem_gb:.1f} GB)
" + + self.gpu_info_label.setText(text) + + except Exception as e: + self.gpu_info_label.setText(f"Error detecting GPU: {e}") + logger.error(f"GPU detection failed: {e}") + + def resizeEvent(self, event): + """Handle resize to update preview.""" + super().resizeEvent(event) + if self.current_image_path and self.preview_label.pixmap(): + pixmap = QPixmap(str(self.current_image_path)) + scaled = pixmap.scaled( + self.preview_label.size(), + Qt.AspectRatioMode.KeepAspectRatio, + Qt.TransformationMode.SmoothTransformation + ) + self.preview_label.setPixmap(scaled) + + +# Export +__all__ = ['VisionTestDialog'] diff --git a/vision_example.py b/vision_example.py new file mode 100644 index 0000000..53ea52e --- /dev/null +++ b/vision_example.py @@ -0,0 +1,265 @@ +""" +Lemontropia Suite - Game Vision AI Example +Demonstrates usage of the Game Vision AI module. +""" + +import sys +from pathlib import Path +import logging + +# Setup logging +logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s') +logger = logging.getLogger(__name__) + + +def demo_gpu_detection(): + """Demonstrate GPU detection.""" + print("\n" + "="*60) + print("GPU DETECTION DEMO") + print("="*60) + + from modules.game_vision_ai import GPUDetector, GPUBackend + + # Detect GPU + backend = GPUDetector.detect_backend() + print(f"\nDetected GPU Backend: {backend.value}") + + # Get detailed info + info = GPUDetector.get_gpu_info() + print(f"\nGPU Details:") + print(f" Backend: {info['backend']}") + print(f" CUDA Available: {info['cuda_available']}") + print(f" MPS Available: {info['mps_available']}") + + if info.get('devices'): + print(f"\n Devices:") + for dev in info['devices']: + mem_gb = dev.get('memory_total', 0) / (1024**3) + print(f" [{dev['id']}] {dev['name']} ({mem_gb:.1f} GB)") + + print(f"\n PyTorch Device String: {GPUDetector.get_device_string(backend)}") + + +def demo_ocr(image_path: str = None): + """Demonstrate OCR functionality.""" + print("\n" + "="*60) + print("OCR TEXT EXTRACTION DEMO") + print("="*60) + + from modules.game_vision_ai import OCRProcessor + + # Initialize OCR + print("\nInitializing OCR (this may take a moment on first run)...") + ocr = OCRProcessor(use_gpu=True, lang='en') + + if image_path and Path(image_path).exists(): + print(f"\nProcessing: {image_path}") + regions = ocr.extract_text(image_path) + + print(f"\nDetected {len(regions)} text regions:") + for i, region in enumerate(regions, 1): + print(f" {i}. '{region.text}' (confidence: {region.confidence:.2%})") + print(f" Position: ({region.bbox[0]}, {region.bbox[1]}) {region.bbox[2]}x{region.bbox[3]}") + else: + print(f"\nNo image provided or file not found: {image_path}") + print("Usage: python vision_example.py --ocr path/to/screenshot.png") + + +def demo_icon_detection(image_path: str = None): + """Demonstrate icon detection.""" + print("\n" + "="*60) + print("ICON DETECTION DEMO") + print("="*60) + + from modules.game_vision_ai import IconDetector + import cv2 + + detector = IconDetector() + + if image_path and Path(image_path).exists(): + print(f"\nProcessing: {image_path}") + image = cv2.imread(image_path) + + # Detect loot window + window = detector.detect_loot_window(image) + if window: + print(f"\nDetected loot window at: {window}") + + # Extract icons + icons = detector.extract_icons_from_region(image, window) + print(f"\nExtracted {len(icons)} icons:") + + for i, icon in enumerate(icons, 1): + print(f" {i}. Position: {icon.bbox}") + print(f" Hash: {icon.icon_hash[:32]}...") + else: + print("\nNo loot window detected. Trying full image...") + h, w = image.shape[:2] + icons = detector.extract_icons_from_region(image, (0, 0, w, h)) + print(f"Found {len(icons)} potential icons in full image") + else: + print(f"\nNo image provided or file not found: {image_path}") + + +def demo_full_vision(image_path: str = None): + """Demonstrate full vision processing.""" + print("\n" + "="*60) + print("FULL VISION PROCESSING DEMO") + print("="*60) + + from modules.game_vision_ai import GameVisionAI + + # Initialize vision AI + print("\nInitializing Game Vision AI...") + vision = GameVisionAI(use_gpu=True, ocr_lang='en') + + print(f"GPU Available: {vision.is_gpu_available()}") + print(f"Backend: {vision.backend.value}") + + if image_path and Path(image_path).exists(): + print(f"\nProcessing: {image_path}") + + # Process screenshot + result = vision.process_screenshot(image_path) + + print(f"\n--- Results ---") + print(f"Processing Time: {result.processing_time_ms:.1f}ms") + print(f"GPU Backend: {result.gpu_backend}") + + print(f"\nText Regions ({len(result.text_regions)}):") + for region in result.text_regions: + print(f" • '{region.text}' ({region.confidence:.2%})") + + print(f"\nIcon Regions ({len(result.icon_regions)}):") + for region in result.icon_regions: + print(f" • Position: {region.bbox}") + + print(f"\nExtracted icons saved to: {vision.extracted_icons_dir}") + else: + print(f"\nNo image provided or file not found: {image_path}") + print("Usage: python vision_example.py --full path/to/screenshot.png") + + +def demo_icon_matching(): + """Demonstrate icon matching.""" + print("\n" + "="*60) + print("ICON MATCHING DEMO") + print("="*60) + + from modules.icon_matcher import IconMatcher, PerceptualHash + import cv2 + import numpy as np + + # Create matcher + matcher = IconMatcher() + + print(f"\nIcon Database Stats:") + stats = matcher.get_database_stats() + print(f" Total Icons: {stats['total_icons']}") + print(f" Database Path: {stats['database_path']}") + + # Demonstrate perceptual hashing + print(f"\nPerceptual Hashing:") + # Create a sample image + sample = np.random.randint(0, 255, (64, 64, 3), dtype=np.uint8) + + avg_hash = PerceptualHash.average_hash(sample) + diff_hash = PerceptualHash.difference_hash(sample) + + print(f" Average Hash: {avg_hash[:32]}...") + print(f" Difference Hash: {diff_hash[:32]}...") + + # Show similarity calculation + similar = np.random.randint(0, 255, (64, 64, 3), dtype=np.uint8) + similar[20:40, 20:40] = sample[20:40, 20:40] # Make it somewhat similar + + hash1 = PerceptualHash.average_hash(sample) + hash2 = PerceptualHash.average_hash(similar) + similarity = PerceptualHash.similarity(hash1, hash2) + + print(f" Similarity between two images: {similarity:.2%}") + + +def demo_calibration(): + """Demonstrate calibration.""" + print("\n" + "="*60) + print("CALIBRATION DEMO") + print("="*60) + + from modules.game_vision_ai import GameVisionAI + + vision = GameVisionAI(use_gpu=True) + + print("\nTo calibrate, provide sample screenshots:") + print(" vision.calibrate_for_game([path1, path2, ...])") + print("\nThis will:") + print(" 1. Process each screenshot") + print(" 2. Measure detection accuracy") + print(" 3. Calculate average processing time") + print(" 4. Provide recommendations") + + +def main(): + """Main entry point.""" + import argparse + + parser = argparse.ArgumentParser( + description="Game Vision AI Examples", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + python vision_example.py --gpu # GPU detection demo + python vision_example.py --ocr image.png # OCR demo + python vision_example.py --icons image.png # Icon detection demo + python vision_example.py --full image.png # Full vision demo + python vision_example.py --matching # Icon matching demo + python vision_example.py --all # Run all demos + """ + ) + + parser.add_argument('--gpu', action='store_true', help='GPU detection demo') + parser.add_argument('--ocr', type=str, metavar='IMAGE', help='OCR demo with image') + parser.add_argument('--icons', type=str, metavar='IMAGE', help='Icon detection demo') + parser.add_argument('--full', type=str, metavar='IMAGE', help='Full vision demo') + parser.add_argument('--matching', action='store_true', help='Icon matching demo') + parser.add_argument('--calibration', action='store_true', help='Calibration demo') + parser.add_argument('--all', action='store_true', help='Run all demos') + + args = parser.parse_args() + + # If no args, show help + if not any([args.gpu, args.ocr, args.icons, args.full, args.matching, args.calibration, args.all]): + parser.print_help() + return + + try: + if args.all or args.gpu: + demo_gpu_detection() + + if args.all or args.ocr: + demo_ocr(args.ocr) + + if args.all or args.icons: + demo_icon_detection(args.icons) + + if args.all or args.full: + demo_full_vision(args.full) + + if args.all or args.matching: + demo_icon_matching() + + if args.all or args.calibration: + demo_calibration() + + except ImportError as e: + print(f"\n❌ Import Error: {e}") + print("\nMake sure all dependencies are installed:") + print(" pip install -r requirements.txt") + except Exception as e: + print(f"\n❌ Error: {e}") + import traceback + traceback.print_exc() + + +if __name__ == "__main__": + main()