diff --git a/modules/game_vision_ai.py b/modules/game_vision_ai.py
new file mode 100644
index 0000000..b611f0f
--- /dev/null
+++ b/modules/game_vision_ai.py
@@ -0,0 +1,722 @@
+"""
+Lemontropia Suite - Game Vision AI Module
+Advanced computer vision with local GPU-accelerated AI models.
+Supports OCR (PaddleOCR) and icon detection for game UI analysis.
+"""
+
+import cv2
+import numpy as np
+import logging
+import torch
+import time
+from pathlib import Path
+from dataclasses import dataclass, field
+from typing import Optional, Tuple, List, Dict, Any, Union
+from enum import Enum
+import json
+import hashlib
+
+logger = logging.getLogger(__name__)
+
+
+class GPUBackend(Enum):
+ """Supported GPU backends."""
+ CUDA = "cuda" # NVIDIA CUDA
+ MPS = "mps" # Apple Metal Performance Shaders
+ DIRECTML = "directml" # Windows DirectML
+ CPU = "cpu" # Fallback CPU
+
+
+@dataclass
+class TextRegion:
+ """Detected text region with metadata."""
+ text: str
+ confidence: float
+ bbox: Tuple[int, int, int, int] # x, y, w, h
+ language: str = "en"
+
+ def to_dict(self) -> Dict[str, Any]:
+ return {
+ 'text': self.text,
+ 'confidence': self.confidence,
+ 'bbox': self.bbox,
+ 'language': self.language
+ }
+
+
+@dataclass
+class IconRegion:
+ """Detected icon region with metadata."""
+ image: np.ndarray
+ bbox: Tuple[int, int, int, int] # x, y, w, h
+ confidence: float
+ icon_hash: str = ""
+
+ def __post_init__(self):
+ if not self.icon_hash:
+ self.icon_hash = self._compute_hash()
+
+ def _compute_hash(self) -> str:
+ """Compute perceptual hash of icon."""
+ if self.image is None or self.image.size == 0:
+ return ""
+ # Resize to standard size and compute average hash
+ small = cv2.resize(self.image, (16, 16), interpolation=cv2.INTER_AREA)
+ gray = cv2.cvtColor(small, cv2.COLOR_BGR2GRAY) if len(small.shape) == 3 else small
+ avg = gray.mean()
+ hash_bits = (gray > avg).flatten()
+ return ''.join(['1' if b else '0' for b in hash_bits])
+
+
+@dataclass
+class ItemMatch:
+ """Result of matching an icon to database."""
+ name: str
+ confidence: float
+ item_id: Optional[str] = None
+ category: Optional[str] = None
+ matched_hash: str = ""
+
+
+@dataclass
+class VisionResult:
+ """Complete vision processing result."""
+ text_regions: List[TextRegion] = field(default_factory=list)
+ icon_regions: List[IconRegion] = field(default_factory=list)
+ processing_time_ms: float = 0.0
+ gpu_backend: str = "cpu"
+ timestamp: float = field(default_factory=time.time)
+
+ def to_dict(self) -> Dict[str, Any]:
+ return {
+ 'text_regions': [t.to_dict() for t in self.text_regions],
+ 'icon_count': len(self.icon_regions),
+ 'processing_time_ms': self.processing_time_ms,
+ 'gpu_backend': self.gpu_backend,
+ 'timestamp': self.timestamp
+ }
+
+
+class GPUDetector:
+ """Detect and manage GPU availability."""
+
+ @staticmethod
+ def detect_backend() -> GPUBackend:
+ """Detect best available GPU backend."""
+ # Check CUDA first (most common)
+ if torch.cuda.is_available():
+ logger.info(f"CUDA available: {torch.cuda.get_device_name(0)}")
+ return GPUBackend.CUDA
+
+ # Check Apple MPS
+ if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
+ logger.info("Apple MPS (Metal) available")
+ return GPUBackend.MPS
+
+ # Check DirectML on Windows
+ try:
+ import torch_directml
+ if torch_directml.is_available():
+ logger.info("DirectML available")
+ return GPUBackend.DIRECTML
+ except ImportError:
+ pass
+
+ logger.info("No GPU backend available, using CPU")
+ return GPUBackend.CPU
+
+ @staticmethod
+ def get_device_string(backend: GPUBackend) -> str:
+ """Get PyTorch device string for backend."""
+ if backend == GPUBackend.CUDA:
+ return "cuda:0"
+ elif backend == GPUBackend.MPS:
+ return "mps"
+ elif backend == GPUBackend.DIRECTML:
+ return "privateuseone:0" # DirectML device
+ return "cpu"
+
+ @staticmethod
+ def get_gpu_info() -> Dict[str, Any]:
+ """Get detailed GPU information."""
+ info = {
+ 'backend': GPUDetector.detect_backend().value,
+ 'cuda_available': torch.cuda.is_available(),
+ 'mps_available': hasattr(torch.backends, 'mps') and torch.backends.mps.is_available(),
+ 'devices': []
+ }
+
+ if torch.cuda.is_available():
+ for i in range(torch.cuda.device_count()):
+ info['devices'].append({
+ 'id': i,
+ 'name': torch.cuda.get_device_name(i),
+ 'memory_total': torch.cuda.get_device_properties(i).total_memory
+ })
+
+ return info
+
+
+class OCRProcessor:
+ """OCR text extraction using PaddleOCR with GPU support."""
+
+ SUPPORTED_LANGUAGES = ['en', 'sv', 'latin'] # English, Swedish, Latin script
+
+ def __init__(self, use_gpu: bool = True, lang: str = 'en'):
+ self.use_gpu = use_gpu
+ self.lang = lang if lang in self.SUPPORTED_LANGUAGES else 'en'
+ self.ocr = None
+ self.backend = GPUBackend.CPU
+ self._init_ocr()
+
+ def _init_ocr(self):
+ """Initialize PaddleOCR with appropriate backend."""
+ try:
+ from paddleocr import PaddleOCR
+
+ # Detect GPU
+ if self.use_gpu:
+ self.backend = GPUDetector.detect_backend()
+ use_gpu_flag = self.backend != GPUBackend.CPU
+ else:
+ use_gpu_flag = False
+
+ # Map language codes
+ lang_map = {
+ 'en': 'en',
+ 'sv': 'latin', # Swedish uses latin script model
+ 'latin': 'latin'
+ }
+ paddle_lang = lang_map.get(self.lang, 'en')
+
+ logger.info(f"Initializing PaddleOCR (lang={paddle_lang}, gpu={use_gpu_flag})")
+
+ self.ocr = PaddleOCR(
+ lang=paddle_lang,
+ use_gpu=use_gpu_flag,
+ show_log=False,
+ use_angle_cls=True,
+ det_db_thresh=0.3,
+ det_db_box_thresh=0.5,
+ rec_thresh=0.5,
+ )
+
+ logger.info(f"PaddleOCR initialized successfully (backend: {self.backend.value})")
+
+ except ImportError:
+ logger.error("PaddleOCR not installed. Install with: pip install paddleocr")
+ self.ocr = None
+ except Exception as e:
+ logger.error(f"Failed to initialize PaddleOCR: {e}")
+ self.ocr = None
+
+ def preprocess_for_ocr(self, image: np.ndarray) -> np.ndarray:
+ """Preprocess image for better OCR results."""
+ # Convert to grayscale if needed
+ if len(image.shape) == 3:
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+ else:
+ gray = image
+
+ # Denoise
+ denoised = cv2.fastNlMeansDenoising(gray, None, 10, 7, 21)
+
+ # Adaptive threshold for better text contrast
+ binary = cv2.adaptiveThreshold(
+ denoised, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+ cv2.THRESH_BINARY, 11, 2
+ )
+
+ return binary
+
+ def extract_text(self, image: Union[str, np.ndarray, Path]) -> List[TextRegion]:
+ """
+ Extract text from image.
+
+ Args:
+ image: Image path or numpy array
+
+ Returns:
+ List of detected text regions
+ """
+ if self.ocr is None:
+ logger.warning("OCR not available")
+ return []
+
+ # Load image if path provided
+ if isinstance(image, (str, Path)):
+ img = cv2.imread(str(image))
+ if img is None:
+ logger.error(f"Failed to load image: {image}")
+ return []
+ else:
+ img = image.copy()
+
+ # Preprocess
+ processed = self.preprocess_for_ocr(img)
+
+ try:
+ # Run OCR
+ result = self.ocr.ocr(processed, cls=True)
+
+ detected = []
+ if result and result[0]:
+ for line in result[0]:
+ if line is None:
+ continue
+ bbox, (text, confidence) = line
+
+ # Calculate bounding box
+ x_coords = [p[0] for p in bbox]
+ y_coords = [p[1] for p in bbox]
+ x, y = int(min(x_coords)), int(min(y_coords))
+ w = int(max(x_coords) - x)
+ h = int(max(y_coords) - y)
+
+ detected.append(TextRegion(
+ text=text.strip(),
+ confidence=float(confidence),
+ bbox=(x, y, w, h),
+ language=self.lang
+ ))
+
+ return detected
+
+ except Exception as e:
+ logger.error(f"OCR processing failed: {e}")
+ return []
+
+ def extract_text_from_region(self, image: np.ndarray,
+ region: Tuple[int, int, int, int]) -> List[TextRegion]:
+ """Extract text from specific region of image."""
+ x, y, w, h = region
+ roi = image[y:y+h, x:x+w]
+
+ if roi.size == 0:
+ return []
+
+ regions = self.extract_text(roi)
+
+ # Adjust coordinates back to original image
+ for r in regions:
+ rx, ry, rw, rh = r.bbox
+ r.bbox = (x + rx, y + ry, rw, rh)
+
+ return regions
+
+
+class IconDetector:
+ """Detect and extract item icons from game UI."""
+
+ # Typical Entropia Universe loot window icon sizes
+ ICON_SIZES = {
+ 'small': (32, 32),
+ 'medium': (48, 48),
+ 'large': (64, 64),
+ 'hud': (40, 40)
+ }
+
+ def __init__(self, template_dir: Optional[Path] = None):
+ self.template_dir = template_dir or Path(__file__).parent / "templates" / "icons"
+ self.templates: Dict[str, np.ndarray] = {}
+ self._load_templates()
+
+ def _load_templates(self):
+ """Load icon templates for matching."""
+ if not self.template_dir.exists():
+ logger.warning(f"Template directory not found: {self.template_dir}")
+ return
+
+ for template_file in self.template_dir.glob("*.png"):
+ try:
+ name = template_file.stem
+ template = cv2.imread(str(template_file), cv2.IMREAD_COLOR)
+ if template is not None:
+ self.templates[name] = template
+ logger.debug(f"Loaded icon template: {name}")
+ except Exception as e:
+ logger.error(f"Failed to load template {template_file}: {e}")
+
+ def detect_loot_window(self, image: np.ndarray) -> Optional[Tuple[int, int, int, int]]:
+ """
+ Detect loot window in screenshot.
+
+ Returns bounding box of loot window or None if not found.
+ """
+ # Look for common loot window indicators
+ # Method 1: Template matching for "Loot" text or window frame
+ if 'loot_window' in self.templates:
+ result = cv2.matchTemplate(
+ image, self.templates['loot_window'], cv2.TM_CCOEFF_NORMED
+ )
+ _, max_val, _, max_loc = cv2.minMaxLoc(result)
+ if max_val > 0.7:
+ h, w = self.templates['loot_window'].shape[:2]
+ return (*max_loc, w, h)
+
+ # Method 2: Detect based on typical loot window characteristics
+ # Loot windows usually have a grid of items with consistent spacing
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+
+ # Look for high-contrast regions that could be icons
+ _, thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
+
+ # Find contours
+ contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+
+ # Filter for icon-sized squares
+ potential_icons = []
+ for cnt in contours:
+ x, y, w, h = cv2.boundingRect(cnt)
+ aspect = w / h if h > 0 else 0
+
+ # Check if dimensions match typical icon sizes
+ for size_name, (sw, sh) in self.ICON_SIZES.items():
+ if abs(w - sw) < 5 and abs(h - sh) < 5 and 0.8 < aspect < 1.2:
+ potential_icons.append((x, y, w, h))
+ break
+
+ # If we found multiple icons in a grid pattern, assume loot window
+ if len(potential_icons) >= 2:
+ # Calculate bounding box of all icons
+ xs = [p[0] for p in potential_icons]
+ ys = [p[1] for p in potential_icons]
+ ws = [p[2] for p in potential_icons]
+ hs = [p[3] for p in potential_icons]
+
+ min_x, max_x = min(xs), max(xs) + max(ws)
+ min_y, max_y = min(ys), max(ys) + max(hs)
+
+ # Add padding
+ padding = 20
+ return (
+ max(0, min_x - padding),
+ max(0, min_y - padding),
+ max_x - min_x + padding * 2,
+ max_y - min_y + padding * 2
+ )
+
+ return None
+
+ def extract_icons_from_region(self, image: np.ndarray,
+ region: Tuple[int, int, int, int],
+ icon_size: str = 'medium') -> List[IconRegion]:
+ """
+ Extract icons from a specific region (e.g., loot window).
+
+ Args:
+ image: Full screenshot
+ region: Bounding box (x, y, w, h)
+ icon_size: Size preset ('small', 'medium', 'large')
+
+ Returns:
+ List of detected icon regions
+ """
+ x, y, w, h = region
+ roi = image[y:y+h, x:x+w]
+
+ if roi.size == 0:
+ return []
+
+ target_size = self.ICON_SIZES.get(icon_size, (48, 48))
+ gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
+
+ # Multiple threshold attempts for different icon styles
+ icons = []
+ thresholds = [(200, 255), (180, 255), (150, 255)]
+
+ for thresh_low, thresh_high in thresholds:
+ _, thresh = cv2.threshold(gray, thresh_low, thresh_high, cv2.THRESH_BINARY)
+ contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+
+ for cnt in contours:
+ cx, cy, cw, ch = cv2.boundingRect(cnt)
+ aspect = cw / ch if ch > 0 else 0
+
+ # Match icon size with tolerance
+ if (abs(cw - target_size[0]) < 8 and
+ abs(ch - target_size[1]) < 8 and
+ 0.7 < aspect < 1.3):
+
+ # Extract icon image
+ icon_img = roi[cy:cy+ch, cx:cx+cw]
+
+ # Resize to standard size
+ icon_img = cv2.resize(icon_img, target_size, interpolation=cv2.INTER_AREA)
+
+ icons.append(IconRegion(
+ image=icon_img,
+ bbox=(x + cx, y + cy, cw, ch),
+ confidence=0.8 # Placeholder confidence
+ ))
+
+ # Remove duplicates (icons that overlap significantly)
+ unique_icons = self._remove_duplicate_icons(icons)
+
+ return unique_icons
+
+ def _remove_duplicate_icons(self, icons: List[IconRegion],
+ iou_threshold: float = 0.5) -> List[IconRegion]:
+ """Remove duplicate icons based on IoU."""
+ if not icons:
+ return []
+
+ # Sort by confidence
+ sorted_icons = sorted(icons, key=lambda x: x.confidence, reverse=True)
+
+ kept = []
+ for icon in sorted_icons:
+ is_duplicate = False
+ for kept_icon in kept:
+ if self._calculate_iou(icon.bbox, kept_icon.bbox) > iou_threshold:
+ is_duplicate = True
+ break
+ if not is_duplicate:
+ kept.append(icon)
+
+ return kept
+
+ def _calculate_iou(self, box1: Tuple[int, int, int, int],
+ box2: Tuple[int, int, int, int]) -> float:
+ """Calculate Intersection over Union of two bounding boxes."""
+ x1, y1, w1, h1 = box1
+ x2, y2, w2, h2 = box2
+
+ xi1 = max(x1, x2)
+ yi1 = max(y1, y2)
+ xi2 = min(x1 + w1, x2 + w2)
+ yi2 = min(y1 + h1, y2 + h2)
+
+ inter_area = max(0, xi2 - xi1) * max(0, yi2 - yi1)
+ box1_area = w1 * h1
+ box2_area = w2 * h2
+
+ union_area = box1_area + box2_area - inter_area
+
+ return inter_area / union_area if union_area > 0 else 0
+
+ def detect_icons_yolo(self, image: np.ndarray,
+ model_path: Optional[str] = None) -> List[IconRegion]:
+ """
+ Detect icons using YOLO model (if available).
+
+ This is a placeholder for future YOLO integration.
+ """
+ # TODO: Implement YOLO detection when model is trained
+ logger.debug("YOLO detection not yet implemented")
+ return []
+
+
+class GameVisionAI:
+ """
+ Main AI vision interface for game screenshot analysis.
+ Combines OCR and icon detection with GPU acceleration.
+ """
+
+ def __init__(self, use_gpu: bool = True, ocr_lang: str = 'en',
+ data_dir: Optional[Path] = None):
+ """
+ Initialize Game Vision AI.
+
+ Args:
+ use_gpu: Enable GPU acceleration if available
+ ocr_lang: Language for OCR ('en', 'sv', 'latin')
+ data_dir: Directory for storing extracted data
+ """
+ self.use_gpu = use_gpu
+ self.data_dir = data_dir or Path.home() / ".lemontropia"
+ self.extracted_icons_dir = self.data_dir / "extracted_icons"
+ self.extracted_icons_dir.mkdir(parents=True, exist_ok=True)
+
+ # Detect GPU
+ self.backend = GPUDetector.detect_backend() if use_gpu else GPUBackend.CPU
+
+ # Initialize processors
+ self.ocr = OCRProcessor(use_gpu=use_gpu, lang=ocr_lang)
+ self.icon_detector = IconDetector()
+
+ # Icon matching cache
+ self.icon_cache: Dict[str, ItemMatch] = {}
+
+ logger.info(f"GameVisionAI initialized (GPU: {self.backend.value})")
+
+ def extract_text_from_image(self, image_path: Union[str, Path]) -> List[TextRegion]:
+ """
+ Extract all text from an image.
+
+ Args:
+ image_path: Path to screenshot image
+
+ Returns:
+ List of detected text regions
+ """
+ return self.ocr.extract_text(image_path)
+
+ def extract_icons_from_image(self, image_path: Union[str, Path],
+ auto_detect_window: bool = True) -> List[IconRegion]:
+ """
+ Extract item icons from image.
+
+ Args:
+ image_path: Path to screenshot image
+ auto_detect_window: Automatically detect loot window
+
+ Returns:
+ List of detected icon regions
+ """
+ image = cv2.imread(str(image_path))
+ if image is None:
+ logger.error(f"Failed to load image: {image_path}")
+ return []
+
+ if auto_detect_window:
+ window_region = self.icon_detector.detect_loot_window(image)
+ if window_region:
+ logger.debug(f"Detected loot window: {window_region}")
+ return self.icon_detector.extract_icons_from_region(
+ image, window_region
+ )
+ else:
+ logger.debug("No loot window detected, scanning full image")
+ # Scan full image
+ h, w = image.shape[:2]
+ return self.icon_detector.extract_icons_from_region(
+ image, (0, 0, w, h)
+ )
+ else:
+ h, w = image.shape[:2]
+ return self.icon_detector.extract_icons_from_region(
+ image, (0, 0, w, h)
+ )
+
+ def match_icon_to_database(self, icon_image: np.ndarray,
+ database_path: Optional[Path] = None) -> Optional[ItemMatch]:
+ """
+ Match extracted icon to item database.
+
+ Args:
+ icon_image: Icon image (numpy array)
+ database_path: Path to icon database directory
+
+ Returns:
+ ItemMatch if found, None otherwise
+ """
+ from .icon_matcher import IconMatcher
+
+ # Lazy load matcher
+ if not hasattr(self, '_icon_matcher'):
+ self._icon_matcher = IconMatcher(database_path)
+
+ return self._icon_matcher.match_icon(icon_image)
+
+ def process_screenshot(self, image_path: Union[str, Path],
+ extract_text: bool = True,
+ extract_icons: bool = True) -> VisionResult:
+ """
+ Process screenshot with all vision capabilities.
+
+ Args:
+ image_path: Path to screenshot
+ extract_text: Enable text extraction
+ extract_icons: Enable icon extraction
+
+ Returns:
+ VisionResult with all detections
+ """
+ start_time = time.time()
+
+ result = VisionResult(gpu_backend=self.backend.value)
+
+ # Load image once
+ image = cv2.imread(str(image_path))
+ if image is None:
+ logger.error(f"Failed to load image: {image_path}")
+ return result
+
+ # Extract text
+ if extract_text:
+ result.text_regions = self.ocr.extract_text(image)
+ logger.debug(f"Extracted {len(result.text_regions)} text regions")
+
+ # Extract icons
+ if extract_icons:
+ result.icon_regions = self.extract_icons_from_image(image_path)
+ logger.debug(f"Extracted {len(result.icon_regions)} icons")
+
+ # Save extracted icons
+ self._save_extracted_icons(result.icon_regions)
+
+ result.processing_time_ms = (time.time() - start_time) * 1000
+
+ return result
+
+ def _save_extracted_icons(self, icons: List[IconRegion]):
+ """Save extracted icons to disk."""
+ for i, icon in enumerate(icons):
+ filename = f"icon_{icon.icon_hash[:16]}_{int(time.time())}_{i}.png"
+ filepath = self.extracted_icons_dir / filename
+ cv2.imwrite(str(filepath), icon.image)
+ logger.debug(f"Saved icon: {filepath}")
+
+ def get_gpu_info(self) -> Dict[str, Any]:
+ """Get GPU information."""
+ return GPUDetector.get_gpu_info()
+
+ def is_gpu_available(self) -> bool:
+ """Check if GPU acceleration is available."""
+ return self.backend != GPUBackend.CPU
+
+ def calibrate_for_game(self, sample_screenshots: List[Path]) -> Dict[str, Any]:
+ """
+ Calibrate vision system using sample screenshots.
+
+ Args:
+ sample_screenshots: List of sample game screenshots
+
+ Returns:
+ Calibration results
+ """
+ calibration = {
+ 'screenshots_processed': 0,
+ 'text_regions_detected': 0,
+ 'icons_detected': 0,
+ 'average_processing_time_ms': 0,
+ 'detected_regions': {}
+ }
+
+ total_time = 0
+
+ for screenshot_path in sample_screenshots:
+ try:
+ start = time.time()
+ result = self.process_screenshot(screenshot_path)
+ elapsed = (time.time() - start) * 1000
+
+ calibration['screenshots_processed'] += 1
+ calibration['text_regions_detected'] += len(result.text_regions)
+ calibration['icons_detected'] += len(result.icon_regions)
+ total_time += elapsed
+
+ except Exception as e:
+ logger.error(f"Failed to process {screenshot_path}: {e}")
+
+ if calibration['screenshots_processed'] > 0:
+ calibration['average_processing_time_ms'] = (
+ total_time / calibration['screenshots_processed']
+ )
+
+ return calibration
+
+
+# Export main classes
+__all__ = [
+ 'GameVisionAI',
+ 'TextRegion',
+ 'IconRegion',
+ 'ItemMatch',
+ 'VisionResult',
+ 'GPUBackend',
+ 'GPUDetector',
+ 'OCRProcessor',
+ 'IconDetector'
+]
diff --git a/modules/icon_matcher.py b/modules/icon_matcher.py
new file mode 100644
index 0000000..9cdac1c
--- /dev/null
+++ b/modules/icon_matcher.py
@@ -0,0 +1,614 @@
+"""
+Lemontropia Suite - Icon Matcher Module
+Icon similarity matching using multiple algorithms.
+Supports perceptual hashing, template matching, and feature-based matching.
+"""
+
+import cv2
+import numpy as np
+import logging
+import json
+from pathlib import Path
+from dataclasses import dataclass, asdict
+from typing import Optional, List, Dict, Tuple, Any
+import sqlite3
+import pickle
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class MatchResult:
+ """Icon match result."""
+ item_name: str
+ confidence: float
+ match_method: str
+ item_id: Optional[str] = None
+ category: Optional[str] = None
+ metadata: Dict[str, Any] = None
+
+ def __post_init__(self):
+ if self.metadata is None:
+ self.metadata = {}
+
+
+class PerceptualHash:
+ """Perceptual hash implementation for icon matching."""
+
+ @staticmethod
+ def average_hash(image: np.ndarray, hash_size: int = 16) -> str:
+ """Compute average hash (aHash)."""
+ # Convert to grayscale
+ if len(image.shape) == 3:
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+ else:
+ gray = image
+
+ # Resize
+ resized = cv2.resize(gray, (hash_size, hash_size), interpolation=cv2.INTER_AREA)
+
+ # Compute average
+ avg = resized.mean()
+
+ # Create hash
+ hash_bits = (resized > avg).flatten()
+ return ''.join(['1' if b else '0' for b in hash_bits])
+
+ @staticmethod
+ def difference_hash(image: np.ndarray, hash_size: int = 16) -> str:
+ """Compute difference hash (dHash)."""
+ if len(image.shape) == 3:
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+ else:
+ gray = image
+
+ # Resize (hash_size+1 for horizontal differences)
+ resized = cv2.resize(gray, (hash_size + 1, hash_size), interpolation=cv2.INTER_AREA)
+
+ # Compute differences
+ diff = resized[:, 1:] > resized[:, :-1]
+ return ''.join(['1' if b else '0' for b in diff.flatten()])
+
+ @staticmethod
+ def wavelet_hash(image: np.ndarray, hash_size: int = 16) -> str:
+ """Compute wavelet hash (wHash) using Haar wavelet."""
+ try:
+ import pywt
+ except ImportError:
+ logger.debug("PyWavelets not available, falling back to average hash")
+ return PerceptualHash.average_hash(image, hash_size)
+
+ if len(image.shape) == 3:
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+ else:
+ gray = image
+
+ # Resize to power of 2
+ size = 2 ** (hash_size - 1).bit_length()
+ resized = cv2.resize(gray, (size, size), interpolation=cv2.INTER_AREA)
+
+ # Apply Haar wavelet transform
+ coeffs = pywt.dwt2(resized, 'haar')
+ cA, (cH, cV, cD) = coeffs
+
+ # Use approximation coefficients
+ avg = cA.mean()
+ hash_bits = (cA > avg).flatten()
+ return ''.join(['1' if b else '0' for b in hash_bits])
+
+ @staticmethod
+ def hamming_distance(hash1: str, hash2: str) -> int:
+ """Calculate Hamming distance between two hashes."""
+ if len(hash1) != len(hash2):
+ raise ValueError("Hashes must be same length")
+ return sum(c1 != c2 for c1, c2 in zip(hash1, hash2))
+
+ @staticmethod
+ def similarity(hash1: str, hash2: str) -> float:
+ """Calculate similarity between 0 and 1."""
+ distance = PerceptualHash.hamming_distance(hash1, hash2)
+ max_distance = len(hash1)
+ return 1.0 - (distance / max_distance)
+
+
+class FeatureMatcher:
+ """Feature-based icon matching using ORB/SIFT."""
+
+ def __init__(self):
+ self.orb = cv2.ORB_create(nfeatures=500)
+ self.matcher = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
+
+ def extract_features(self, image: np.ndarray) -> Tuple[List, np.ndarray]:
+ """Extract ORB features from image."""
+ if len(image.shape) == 3:
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+ else:
+ gray = image
+
+ keypoints, descriptors = self.orb.detectAndCompute(gray, None)
+ return keypoints, descriptors
+
+ def match_features(self, desc1: np.ndarray, desc2: np.ndarray,
+ threshold: float = 0.7) -> float:
+ """
+ Match features between two descriptors.
+
+ Returns confidence score (0-1).
+ """
+ if desc1 is None or desc2 is None:
+ return 0.0
+
+ try:
+ matches = self.matcher.match(desc1, desc2)
+ matches = sorted(matches, key=lambda x: x.distance)
+
+ # Calculate match ratio
+ if len(matches) < 4:
+ return 0.0
+
+ # Good matches have distance below threshold
+ good_matches = [m for m in matches if m.distance < 50]
+
+ if not good_matches:
+ return 0.0
+
+ # Score based on number of good matches vs minimum needed
+ score = min(len(good_matches) / 20, 1.0) # Normalize to 20 matches
+ return score
+
+ except Exception as e:
+ logger.debug(f"Feature matching failed: {e}")
+ return 0.0
+
+
+class TemplateMatcher:
+ """Template matching for icons."""
+
+ @staticmethod
+ def match(template: np.ndarray, image: np.ndarray,
+ methods: List[int] = None) -> float:
+ """
+ Match template to image using multiple methods.
+
+ Returns best confidence score.
+ """
+ if methods is None:
+ methods = [
+ cv2.TM_CCOEFF_NORMED,
+ cv2.TM_CCORR_NORMED,
+ cv2.TM_SQDIFF_NORMED
+ ]
+
+ # Ensure same size
+ h, w = template.shape[:2]
+ image = cv2.resize(image, (w, h), interpolation=cv2.INTER_AREA)
+
+ best_score = 0.0
+
+ for method in methods:
+ try:
+ result = cv2.matchTemplate(image, template, method)
+ _, max_val, _, _ = cv2.minMaxLoc(result)
+
+ # Normalize SQDIFF (lower is better)
+ if method == cv2.TM_SQDIFF_NORMED:
+ max_val = 1.0 - max_val
+
+ best_score = max(best_score, max_val)
+ except Exception as e:
+ logger.debug(f"Template matching failed: {e}")
+ continue
+
+ return best_score
+
+
+class IconDatabase:
+ """Database for storing and retrieving icon hashes."""
+
+ def __init__(self, db_path: Optional[Path] = None):
+ self.db_path = db_path or Path.home() / ".lemontropia" / "icon_database.db"
+ self.db_path.parent.mkdir(parents=True, exist_ok=True)
+ self._init_database()
+
+ def _init_database(self):
+ """Initialize SQLite database."""
+ conn = sqlite3.connect(str(self.db_path))
+ cursor = conn.cursor()
+
+ cursor.execute('''
+ CREATE TABLE IF NOT EXISTS icons (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ item_name TEXT NOT NULL,
+ item_id TEXT,
+ category TEXT,
+ avg_hash TEXT,
+ diff_hash TEXT,
+ wavelet_hash TEXT,
+ features BLOB,
+ metadata TEXT,
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+ )
+ ''')
+
+ cursor.execute('''
+ CREATE INDEX IF NOT EXISTS idx_avg_hash ON icons(avg_hash)
+ ''')
+
+ cursor.execute('''
+ CREATE INDEX IF NOT EXISTS idx_item_name ON icons(item_name)
+ ''')
+
+ conn.commit()
+ conn.close()
+
+ def add_icon(self, item_name: str, image: np.ndarray,
+ item_id: Optional[str] = None,
+ category: Optional[str] = None,
+ metadata: Optional[Dict] = None) -> bool:
+ """Add icon to database."""
+ try:
+ # Compute hashes
+ avg_hash = PerceptualHash.average_hash(image)
+ diff_hash = PerceptualHash.difference_hash(image)
+ wavelet_hash = PerceptualHash.wavelet_hash(image)
+
+ # Extract features
+ feature_matcher = FeatureMatcher()
+ _, features = feature_matcher.extract_features(image)
+ features_blob = pickle.dumps(features) if features is not None else None
+
+ conn = sqlite3.connect(str(self.db_path))
+ cursor = conn.cursor()
+
+ cursor.execute('''
+ INSERT INTO icons
+ (item_name, item_id, category, avg_hash, diff_hash, wavelet_hash, features, metadata)
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+ ''', (
+ item_name, item_id, category,
+ avg_hash, diff_hash, wavelet_hash,
+ features_blob,
+ json.dumps(metadata) if metadata else None
+ ))
+
+ conn.commit()
+ conn.close()
+
+ logger.debug(f"Added icon to database: {item_name}")
+ return True
+
+ except Exception as e:
+ logger.error(f"Failed to add icon: {e}")
+ return False
+
+ def find_by_hash(self, avg_hash: str, max_distance: int = 10) -> List[Tuple[str, float, Dict]]:
+ """Find icons by hash similarity."""
+ conn = sqlite3.connect(str(self.db_path))
+ cursor = conn.cursor()
+
+ cursor.execute('SELECT item_name, avg_hash, diff_hash, item_id, category, metadata FROM icons')
+ results = []
+
+ for row in cursor.fetchall():
+ item_name, db_avg_hash, db_diff_hash, item_id, category, metadata_json = row
+
+ # Check average hash similarity
+ distance = PerceptualHash.hamming_distance(avg_hash, db_avg_hash)
+
+ if distance <= max_distance:
+ similarity = 1.0 - (distance / len(avg_hash))
+ metadata = json.loads(metadata_json) if metadata_json else {}
+ results.append((item_name, similarity, {
+ 'item_id': item_id,
+ 'category': category,
+ 'metadata': metadata
+ }))
+
+ conn.close()
+
+ # Sort by similarity
+ results.sort(key=lambda x: x[1], reverse=True)
+ return results
+
+ def get_all_icons(self) -> List[Dict]:
+ """Get all icons from database."""
+ conn = sqlite3.connect(str(self.db_path))
+ cursor = conn.cursor()
+
+ cursor.execute('''
+ SELECT item_name, item_id, category, avg_hash, metadata
+ FROM icons
+ ''')
+
+ results = []
+ for row in cursor.fetchall():
+ results.append({
+ 'item_name': row[0],
+ 'item_id': row[1],
+ 'category': row[2],
+ 'avg_hash': row[3],
+ 'metadata': json.loads(row[4]) if row[4] else {}
+ })
+
+ conn.close()
+ return results
+
+ def get_icon_count(self) -> int:
+ """Get total number of icons in database."""
+ conn = sqlite3.connect(str(self.db_path))
+ cursor = conn.cursor()
+ cursor.execute('SELECT COUNT(*) FROM icons')
+ count = cursor.fetchone()[0]
+ conn.close()
+ return count
+
+ def delete_icon(self, item_name: str) -> bool:
+ """Delete icon from database."""
+ conn = sqlite3.connect(str(self.db_path))
+ cursor = conn.cursor()
+ cursor.execute('DELETE FROM icons WHERE item_name = ?', (item_name,))
+ conn.commit()
+ deleted = cursor.rowcount > 0
+ conn.close()
+ return deleted
+
+
+class IconMatcher:
+ """
+ Main icon matching interface.
+ Combines multiple matching algorithms for best results.
+ """
+
+ # Confidence thresholds
+ CONFIDENCE_HIGH = 0.85
+ CONFIDENCE_MEDIUM = 0.70
+ CONFIDENCE_LOW = 0.50
+
+ def __init__(self, database_path: Optional[Path] = None,
+ icons_dir: Optional[Path] = None):
+ """
+ Initialize icon matcher.
+
+ Args:
+ database_path: Path to icon database
+ icons_dir: Directory containing icon images for matching
+ """
+ self.database = IconDatabase(database_path)
+ self.icons_dir = icons_dir or Path.home() / ".lemontropia" / "icons"
+ self.feature_matcher = FeatureMatcher()
+
+ # Cache for loaded icons
+ self._icon_cache: Dict[str, np.ndarray] = {}
+
+ def match_icon(self, image: np.ndarray,
+ match_methods: List[str] = None) -> Optional[MatchResult]:
+ """
+ Match an icon image against the database.
+
+ Args:
+ image: Icon image (numpy array)
+ match_methods: List of methods to use ('hash', 'feature', 'template')
+
+ Returns:
+ MatchResult if match found, None otherwise
+ """
+ if match_methods is None:
+ match_methods = ['hash', 'feature', 'template']
+
+ results = []
+
+ # Method 1: Perceptual Hash Matching
+ if 'hash' in match_methods:
+ hash_result = self._match_by_hash(image)
+ if hash_result:
+ results.append(hash_result)
+
+ # Method 2: Feature Matching
+ if 'feature' in match_methods:
+ feature_result = self._match_by_features(image)
+ if feature_result:
+ results.append(feature_result)
+
+ # Method 3: Template Matching
+ if 'template' in match_methods:
+ template_result = self._match_by_template(image)
+ if template_result:
+ results.append(template_result)
+
+ if not results:
+ return None
+
+ # Return best match
+ best = max(results, key=lambda x: x.confidence)
+ return best
+
+ def _match_by_hash(self, image: np.ndarray) -> Optional[MatchResult]:
+ """Match using perceptual hashing."""
+ avg_hash = PerceptualHash.average_hash(image)
+
+ # Query database
+ matches = self.database.find_by_hash(avg_hash, max_distance=15)
+
+ if not matches:
+ return None
+
+ best_match = matches[0]
+ item_name, similarity, meta = best_match
+
+ if similarity >= self.CONFIDENCE_LOW:
+ return MatchResult(
+ item_name=item_name,
+ confidence=similarity,
+ match_method='hash',
+ item_id=meta.get('item_id'),
+ category=meta.get('category'),
+ metadata=meta.get('metadata', {})
+ )
+
+ return None
+
+ def _match_by_features(self, image: np.ndarray) -> Optional[MatchResult]:
+ """Match using ORB features."""
+ _, query_desc = self.feature_matcher.extract_features(image)
+
+ if query_desc is None:
+ return None
+
+ # Get all icons with features from database
+ conn = sqlite3.connect(str(self.database.db_path))
+ cursor = conn.cursor()
+ cursor.execute('''
+ SELECT item_name, features, item_id, category, metadata
+ FROM icons WHERE features IS NOT NULL
+ ''')
+
+ best_match = None
+ best_score = 0.0
+ best_meta = {}
+
+ for row in cursor.fetchall():
+ item_name, features_blob, item_id, category, metadata_json = row
+ db_desc = pickle.loads(features_blob)
+
+ score = self.feature_matcher.match_features(query_desc, db_desc)
+
+ if score > best_score:
+ best_score = score
+ best_match = item_name
+ best_meta = {
+ 'item_id': item_id,
+ 'category': category,
+ 'metadata': json.loads(metadata_json) if metadata_json else {}
+ }
+
+ conn.close()
+
+ if best_match and best_score >= self.CONFIDENCE_LOW:
+ return MatchResult(
+ item_name=best_match,
+ confidence=best_score,
+ match_method='feature',
+ item_id=best_meta.get('item_id'),
+ category=best_meta.get('category'),
+ metadata=best_meta.get('metadata', {})
+ )
+
+ return None
+
+ def _match_by_template(self, image: np.ndarray) -> Optional[MatchResult]:
+ """Match using template matching against icon files."""
+ if not self.icons_dir.exists():
+ return None
+
+ # Resize query to standard size
+ standard_size = (64, 64)
+ query_resized = cv2.resize(image, standard_size, interpolation=cv2.INTER_AREA)
+
+ best_match = None
+ best_score = 0.0
+
+ for icon_file in self.icons_dir.glob("**/*.png"):
+ try:
+ template = cv2.imread(str(icon_file), cv2.IMREAD_COLOR)
+ if template is None:
+ continue
+
+ template_resized = cv2.resize(template, standard_size, interpolation=cv2.INTER_AREA)
+
+ score = TemplateMatcher.match(query_resized, template_resized)
+
+ if score > best_score:
+ best_score = score
+ best_match = icon_file.stem
+
+ except Exception as e:
+ logger.debug(f"Template matching failed for {icon_file}: {e}")
+ continue
+
+ if best_match and best_score >= self.CONFIDENCE_MEDIUM:
+ return MatchResult(
+ item_name=best_match,
+ confidence=best_score,
+ match_method='template'
+ )
+
+ return None
+
+ def add_icon_to_database(self, item_name: str, image: np.ndarray,
+ item_id: Optional[str] = None,
+ category: Optional[str] = None,
+ metadata: Optional[Dict] = None) -> bool:
+ """Add a new icon to the database."""
+ return self.database.add_icon(item_name, image, item_id, category, metadata)
+
+ def batch_add_icons(self, icons_dir: Path,
+ category: Optional[str] = None) -> Tuple[int, int]:
+ """
+ Batch add icons from directory.
+
+ Returns:
+ Tuple of (success_count, fail_count)
+ """
+ success = 0
+ failed = 0
+
+ for icon_file in icons_dir.glob("**/*.png"):
+ try:
+ image = cv2.imread(str(icon_file), cv2.IMREAD_COLOR)
+ if image is None:
+ failed += 1
+ continue
+
+ item_name = icon_file.stem.replace('_', ' ').title()
+
+ if self.add_icon_to_database(item_name, image, category=category):
+ success += 1
+ else:
+ failed += 1
+
+ except Exception as e:
+ logger.error(f"Failed to add icon {icon_file}: {e}")
+ failed += 1
+
+ logger.info(f"Batch add complete: {success} success, {failed} failed")
+ return success, failed
+
+ def get_database_stats(self) -> Dict[str, Any]:
+ """Get database statistics."""
+ return {
+ 'total_icons': self.database.get_icon_count(),
+ 'database_path': str(self.database.db_path),
+ 'icons_directory': str(self.icons_dir)
+ }
+
+ def find_similar_icons(self, image: np.ndarray,
+ top_k: int = 5) -> List[MatchResult]:
+ """Find top-k similar icons."""
+ avg_hash = PerceptualHash.average_hash(image)
+
+ # Get all matches
+ matches = self.database.find_by_hash(avg_hash, max_distance=20)
+
+ results = []
+ for item_name, similarity, meta in matches[:top_k]:
+ results.append(MatchResult(
+ item_name=item_name,
+ confidence=similarity,
+ match_method='hash',
+ item_id=meta.get('item_id'),
+ category=meta.get('category'),
+ metadata=meta.get('metadata', {})
+ ))
+
+ return results
+
+
+# Export main classes
+__all__ = [
+ 'IconMatcher',
+ 'MatchResult',
+ 'PerceptualHash',
+ 'FeatureMatcher',
+ 'TemplateMatcher',
+ 'IconDatabase'
+]
diff --git a/requirements.txt b/requirements.txt
index 0559fc3..9a27251 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -11,13 +11,33 @@ PyQt6>=6.4.0
pytest>=7.0.0
pytest-asyncio>=0.21.0
-# GUI Framework
-PyQt6>=6.4.0
-
-# OCR Engines
-paddleocr>=2.6.0
+# OCR Engines - PaddleOCR for GPU-accelerated text recognition
+paddlepaddle-gpu>=2.5.0; sys_platform != 'darwin' # CUDA version for Linux/Windows
+paddlepaddle>=2.5.0; sys_platform == 'darwin' # CPU version for macOS
+paddleocr>=2.7.0
pytesseract>=0.3.10
+# Computer Vision
+opencv-python>=4.8.0
+numpy>=1.24.0
+Pillow>=10.0.0
+
+# Deep Learning Framework (for GPU detection and YOLO support)
+torch>=2.0.0
+torchvision>=0.15.0
+
+# Windows DirectML support (optional)
+# torch-directml>=0.3.0; sys_platform == 'win32'
+
+# Screen capture
+mss>=9.0.0
+
+# Image hashing and processing
+imagehash>=4.3.1
+
+# Wavelet transforms (for wHash)
+PyWavelets>=1.4.0
+
# Async support
aiofiles>=23.0.0
diff --git a/ui/vision_calibration_dialog.py b/ui/vision_calibration_dialog.py
new file mode 100644
index 0000000..0ed2f62
--- /dev/null
+++ b/ui/vision_calibration_dialog.py
@@ -0,0 +1,628 @@
+"""
+Lemontropia Suite - Vision Calibration Dialog
+Wizard for calibrating Game Vision AI to user's game setup.
+"""
+
+import sys
+import time
+from pathlib import Path
+from typing import Optional, List, Dict, Any
+
+from PyQt6.QtWidgets import (
+ QWizard, QWizardPage, QVBoxLayout, QHBoxLayout, QLabel, QLineEdit,
+ QPushButton, QComboBox, QCheckBox, QProgressBar, QGroupBox,
+ QFormLayout, QTextEdit, QMessageBox, QFileDialog, QListWidget,
+ QListWidgetItem, QSpinBox, QDoubleSpinBox, QWidget
+)
+from PyQt6.QtCore import Qt, QSettings, QThread, pyqtSignal
+from PyQt6.QtGui import QFont, QPixmap, QImage
+import numpy as np
+
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+class CalibrationWorker(QThread):
+ """Background worker for calibration processing."""
+
+ progress = pyqtSignal(int, str) # percentage, message
+ calibration_complete = pyqtSignal(dict)
+ error_occurred = pyqtSignal(str)
+
+ def __init__(self, screenshot_paths: List[Path], settings: Dict[str, Any]):
+ super().__init__()
+ self.screenshot_paths = screenshot_paths
+ self.settings = settings
+ self._cancelled = False
+
+ def run(self):
+ try:
+ from modules.game_vision_ai import GameVisionAI
+
+ self.progress.emit(0, "Initializing Game Vision AI...")
+
+ vision = GameVisionAI(
+ use_gpu=self.settings.get('use_gpu', True),
+ ocr_lang=self.settings.get('ocr_lang', 'en')
+ )
+
+ results = {
+ 'screenshots_processed': 0,
+ 'text_regions_detected': 0,
+ 'icons_detected': 0,
+ 'processing_times': [],
+ 'errors': [],
+ 'detected_regions': {},
+ 'sample_extractions': []
+ }
+
+ total = len(self.screenshot_paths)
+
+ for i, screenshot_path in enumerate(self.screenshot_paths):
+ if self._cancelled:
+ self.error_occurred.emit("Calibration cancelled")
+ return
+
+ progress = int((i / total) * 100)
+ self.progress.emit(progress, f"Processing {screenshot_path.name}...")
+
+ try:
+ start_time = time.time()
+ result = vision.process_screenshot(
+ screenshot_path,
+ extract_text=self.settings.get('extract_text', True),
+ extract_icons=self.settings.get('extract_icons', True)
+ )
+ processing_time = (time.time() - start_time) * 1000
+
+ results['screenshots_processed'] += 1
+ results['text_regions_detected'] += len(result.text_regions)
+ results['icons_detected'] += len(result.icon_regions)
+ results['processing_times'].append(processing_time)
+
+ # Store sample extractions
+ if i < 3: # Store first 3 as samples
+ sample = {
+ 'screenshot': str(screenshot_path),
+ 'text_count': len(result.text_regions),
+ 'icon_count': len(result.icon_regions),
+ 'processing_time_ms': result.processing_time_ms,
+ 'text_samples': [
+ {'text': t.text, 'confidence': t.confidence}
+ for t in result.text_regions[:5] # First 5 texts
+ ]
+ }
+ results['sample_extractions'].append(sample)
+
+ except Exception as e:
+ results['errors'].append(f"{screenshot_path.name}: {str(e)}")
+ logger.error(f"Failed to process {screenshot_path}: {e}")
+
+ # Calculate statistics
+ if results['processing_times']:
+ results['avg_processing_time'] = np.mean(results['processing_times'])
+ results['min_processing_time'] = np.min(results['processing_times'])
+ results['max_processing_time'] = np.max(results['processing_times'])
+
+ self.progress.emit(100, "Calibration complete!")
+ self.calibration_complete.emit(results)
+
+ except Exception as e:
+ self.error_occurred.emit(str(e))
+
+ def cancel(self):
+ self._cancelled = True
+
+
+class WelcomePage(QWizardPage):
+ """Welcome page of calibration wizard."""
+
+ def __init__(self, parent=None):
+ super().__init__(parent)
+ self.setTitle("Vision Calibration Wizard")
+ self.setSubTitle("Calibrate Game Vision AI for your game setup")
+ self.setup_ui()
+
+ def setup_ui(self):
+ layout = QVBoxLayout(self)
+
+ welcome_label = QLabel(
+ "
Welcome to Vision Calibration
"
+ "This wizard will help you calibrate the Game Vision AI system "
+ "for optimal performance with your Entropia Universe setup.
"
+ "You will need:
"
+ ""
+ "- A few sample screenshots from the game
"
+ "- Screenshots should include: loot windows, inventory, chat
"
+ "- About 2-5 minutes to complete
"
+ "
"
+ )
+ welcome_label.setWordWrap(True)
+ layout.addWidget(welcome_label)
+
+ # Info box
+ info_group = QGroupBox("What will be calibrated?")
+ info_layout = QVBoxLayout(info_group)
+
+ info_text = QLabel(
+ ""
+ "- OCR Accuracy: Text detection confidence and parameters
"
+ "- Icon Detection: Loot window and item icon recognition
"
+ "- Performance: Processing time optimization
"
+ "- GPU Setup: Verify GPU acceleration is working
"
+ "
"
+ )
+ info_text.setWordWrap(True)
+ info_layout.addWidget(info_text)
+
+ layout.addWidget(info_group)
+ layout.addStretch()
+
+
+class ScreenshotSelectionPage(QWizardPage):
+ """Page for selecting sample screenshots."""
+
+ def __init__(self, parent=None):
+ super().__init__(parent)
+ self.setTitle("Select Sample Screenshots")
+ self.setSubTitle("Choose screenshots from your game for calibration")
+ self.screenshot_paths: List[Path] = []
+ self.setup_ui()
+
+ def setup_ui(self):
+ layout = QVBoxLayout(self)
+
+ # Instructions
+ instructions = QLabel(
+ "Select 3-10 screenshots that represent typical game situations:\n"
+ "• Loot windows with items\n"
+ "• Inventory screens\n"
+ "• Chat windows with text\n"
+ "• HUD with gear equipped"
+ )
+ instructions.setWordWrap(True)
+ layout.addWidget(instructions)
+
+ # File list
+ list_group = QGroupBox("Selected Screenshots")
+ list_layout = QVBoxLayout(list_group)
+
+ self.file_list = QListWidget()
+ list_layout.addWidget(self.file_list)
+
+ # Buttons
+ btn_layout = QHBoxLayout()
+
+ self.add_btn = QPushButton("Add Screenshots...")
+ self.add_btn.clicked.connect(self.add_screenshots)
+ btn_layout.addWidget(self.add_btn)
+
+ self.add_dir_btn = QPushButton("Add Directory...")
+ self.add_dir_btn.clicked.connect(self.add_directory)
+ btn_layout.addWidget(self.add_dir_btn)
+
+ self.remove_btn = QPushButton("Remove Selected")
+ self.remove_btn.clicked.connect(self.remove_selected)
+ btn_layout.addWidget(self.remove_btn)
+
+ self.clear_btn = QPushButton("Clear All")
+ self.clear_btn.clicked.connect(self.clear_all)
+ btn_layout.addWidget(self.clear_btn)
+
+ btn_layout.addStretch()
+ list_layout.addLayout(btn_layout)
+
+ layout.addWidget(list_group)
+
+ # Status
+ self.status_label = QLabel("No screenshots selected")
+ layout.addWidget(self.status_label)
+
+ def add_screenshots(self):
+ """Add individual screenshot files."""
+ files, _ = QFileDialog.getOpenFileNames(
+ self, "Select Screenshots",
+ str(Path.home()),
+ "Images (*.png *.jpg *.jpeg *.bmp)"
+ )
+
+ for file_path in files:
+ path = Path(file_path)
+ if path not in self.screenshot_paths:
+ self.screenshot_paths.append(path)
+ self.file_list.addItem(path.name)
+
+ self.update_status()
+
+ def add_directory(self):
+ """Add all images from a directory."""
+ dir_path = QFileDialog.getExistingDirectory(
+ self, "Select Screenshot Directory",
+ str(Path.home())
+ )
+
+ if dir_path:
+ path = Path(dir_path)
+ for ext in ['*.png', '*.jpg', '*.jpeg', '*.bmp']:
+ for file_path in path.glob(ext):
+ if file_path not in self.screenshot_paths:
+ self.screenshot_paths.append(file_path)
+ self.file_list.addItem(file_path.name)
+
+ self.update_status()
+
+ def remove_selected(self):
+ """Remove selected screenshots."""
+ selected = self.file_list.currentRow()
+ if selected >= 0:
+ self.file_list.takeItem(selected)
+ del self.screenshot_paths[selected]
+ self.update_status()
+
+ def clear_all(self):
+ """Clear all screenshots."""
+ self.file_list.clear()
+ self.screenshot_paths.clear()
+ self.update_status()
+
+ def update_status(self):
+ """Update status label."""
+ count = len(self.screenshot_paths)
+ if count == 0:
+ self.status_label.setText("No screenshots selected")
+ elif count < 3:
+ self.status_label.setText(f"⚠️ {count} screenshot(s) selected (recommend at least 3)")
+ else:
+ self.status_label.setText(f"✅ {count} screenshot(s) selected")
+
+ def validatePage(self) -> bool:
+ """Validate page before proceeding."""
+ if len(self.screenshot_paths) < 1:
+ QMessageBox.warning(self, "No Screenshots",
+ "Please select at least one screenshot.")
+ return False
+ return True
+
+ def get_screenshot_paths(self) -> List[Path]:
+ """Get selected screenshot paths."""
+ return self.screenshot_paths
+
+
+class SettingsPage(QWizardPage):
+ """Page for configuring calibration settings."""
+
+ def __init__(self, parent=None):
+ super().__init__(parent)
+ self.setTitle("Calibration Settings")
+ self.setSubTitle("Configure vision processing options")
+ self.setup_ui()
+
+ def setup_ui(self):
+ layout = QVBoxLayout(self)
+
+ # GPU Settings
+ gpu_group = QGroupBox("GPU Acceleration")
+ gpu_layout = QFormLayout(gpu_group)
+
+ self.use_gpu_cb = QCheckBox("Use GPU for processing")
+ self.use_gpu_cb.setChecked(True)
+ self.use_gpu_cb.setToolTip(
+ "Enable GPU acceleration for faster processing"
+ )
+ gpu_layout.addRow(self.use_gpu_cb)
+
+ self.gpu_info_label = QLabel("GPU info will be detected during calibration")
+ gpu_layout.addRow("GPU:", self.gpu_info_label)
+
+ layout.addWidget(gpu_group)
+
+ # OCR Settings
+ ocr_group = QGroupBox("OCR (Text Recognition)")
+ ocr_layout = QFormLayout(ocr_group)
+
+ self.extract_text_cb = QCheckBox("Enable text extraction")
+ self.extract_text_cb.setChecked(True)
+ ocr_layout.addRow(self.extract_text_cb)
+
+ self.ocr_lang_combo = QComboBox()
+ self.ocr_lang_combo.addItem("English", "en")
+ self.ocr_lang_combo.addItem("Swedish", "sv")
+ ocr_layout.addRow("Language:", self.ocr_lang_combo)
+
+ layout.addWidget(ocr_group)
+
+ # Icon Settings
+ icon_group = QGroupBox("Icon Detection")
+ icon_layout = QFormLayout(icon_group)
+
+ self.extract_icons_cb = QCheckBox("Enable icon extraction")
+ self.extract_icons_cb.setChecked(True)
+ icon_layout.addRow(self.extract_icons_cb)
+
+ self.icon_size_combo = QComboBox()
+ self.icon_size_combo.addItem("Small (32x32)", "small")
+ self.icon_size_combo.addItem("Medium (48x48)", "medium")
+ self.icon_size_combo.addItem("Large (64x64)", "large")
+ icon_layout.addRow("Icon Size:", self.icon_size_combo)
+
+ self.auto_detect_window_cb = QCheckBox("Auto-detect loot windows")
+ self.auto_detect_window_cb.setChecked(True)
+ icon_layout.addRow(self.auto_detect_window_cb)
+
+ layout.addWidget(icon_group)
+ layout.addStretch()
+
+ def get_settings(self) -> Dict[str, Any]:
+ """Get calibration settings."""
+ return {
+ 'use_gpu': self.use_gpu_cb.isChecked(),
+ 'extract_text': self.extract_text_cb.isChecked(),
+ 'extract_icons': self.extract_icons_cb.isChecked(),
+ 'ocr_lang': self.ocr_lang_combo.currentData(),
+ 'icon_size': self.icon_size_combo.currentData(),
+ 'auto_detect_window': self.auto_detect_window_cb.isChecked()
+ }
+
+
+class ProcessingPage(QWizardPage):
+ """Page for running calibration processing."""
+
+ def __init__(self, parent=None):
+ super().__init__(parent)
+ self.setTitle("Processing")
+ self.setSubTitle("Running calibration...")
+ self.is_complete = False
+ self.calibration_results: Optional[Dict] = None
+ self.setup_ui()
+
+ def setup_ui(self):
+ layout = QVBoxLayout(self)
+
+ # Progress
+ self.status_label = QLabel("Ready to start calibration")
+ layout.addWidget(self.status_label)
+
+ self.progress_bar = QProgressBar()
+ self.progress_bar.setRange(0, 100)
+ self.progress_bar.setValue(0)
+ layout.addWidget(self.progress_bar)
+
+ # Results area
+ self.results_text = QTextEdit()
+ self.results_text.setReadOnly(True)
+ self.results_text.setPlaceholderText("Calibration results will appear here...")
+ layout.addWidget(self.results_text)
+
+ # Buttons
+ btn_layout = QHBoxLayout()
+
+ self.start_btn = QPushButton("Start Calibration")
+ self.start_btn.clicked.connect(self.start_calibration)
+ btn_layout.addWidget(self.start_btn)
+
+ self.cancel_btn = QPushButton("Cancel")
+ self.cancel_btn.clicked.connect(self.cancel_calibration)
+ self.cancel_btn.setEnabled(False)
+ btn_layout.addWidget(self.cancel_btn)
+
+ btn_layout.addStretch()
+ layout.addLayout(btn_layout)
+
+ def initializePage(self):
+ """Called when page is shown."""
+ self.results_text.clear()
+ self.progress_bar.setValue(0)
+ self.status_label.setText("Ready to start calibration")
+ self.is_complete = False
+ self.start_btn.setEnabled(True)
+
+ def start_calibration(self):
+ """Start calibration processing."""
+ wizard = self.wizard()
+ screenshot_page = wizard.page(1) # ScreenshotSelectionPage
+ settings_page = wizard.page(2) # SettingsPage
+
+ screenshot_paths = screenshot_page.get_screenshot_paths()
+ settings = settings_page.get_settings()
+
+ if not screenshot_paths:
+ QMessageBox.warning(self, "No Screenshots",
+ "No screenshots selected!")
+ return
+
+ self.start_btn.setEnabled(False)
+ self.cancel_btn.setEnabled(True)
+ self.status_label.setText("Starting calibration...")
+
+ # Start worker thread
+ self.worker = CalibrationWorker(screenshot_paths, settings)
+ self.worker.progress.connect(self.on_progress)
+ self.worker.calibration_complete.connect(self.on_complete)
+ self.worker.error_occurred.connect(self.on_error)
+ self.worker.start()
+
+ def on_progress(self, percentage: int, message: str):
+ """Handle progress update."""
+ self.progress_bar.setValue(percentage)
+ self.status_label.setText(message)
+ self.results_text.append(message)
+
+ def on_complete(self, results: Dict):
+ """Handle calibration completion."""
+ self.calibration_results = results
+ self.is_complete = True
+ self.cancel_btn.setEnabled(False)
+
+ # Display results
+ summary = f"""
+Calibration Complete!
+
+Screenshots processed: {results['screenshots_processed']}
+Text regions detected: {results['text_regions_detected']}
+Icons detected: {results['icons_detected']}
+"""
+ if 'avg_processing_time' in results:
+ summary += f"Average processing time: {results['avg_processing_time']:.1f}ms\n"
+
+ if results.get('errors'):
+ summary += f"\nErrors: {len(results['errors'])}"
+
+ self.results_text.append(summary)
+
+ # Enable next button
+ self.completeChanged.emit()
+
+ def on_error(self, error: str):
+ """Handle calibration error."""
+ self.status_label.setText(f"Error: {error}")
+ self.results_text.append(f"❌ Error: {error}")
+ self.start_btn.setEnabled(True)
+ self.cancel_btn.setEnabled(False)
+
+ def cancel_calibration(self):
+ """Cancel calibration."""
+ if hasattr(self, 'worker'):
+ self.worker.cancel()
+ self.status_label.setText("Cancelling...")
+
+ def isComplete(self) -> bool:
+ return self.is_complete
+
+ def get_results(self) -> Optional[Dict]:
+ """Get calibration results."""
+ return self.calibration_results
+
+
+class ResultsPage(QWizardPage):
+ """Final page showing calibration results."""
+
+ def __init__(self, parent=None):
+ super().__init__(parent)
+ self.setTitle("Calibration Results")
+ self.setSubTitle("Review and save calibration results")
+ self.setup_ui()
+
+ def setup_ui(self):
+ layout = QVBoxLayout(self)
+
+ self.results_label = QLabel("Processing results will appear here...")
+ self.results_label.setWordWrap(True)
+ layout.addWidget(self.results_label)
+
+ # Recommendations
+ self.recommendations_label = QLabel("")
+ self.recommendations_label.setWordWrap(True)
+ layout.addWidget(self.recommendations_label)
+
+ layout.addStretch()
+
+ def initializePage(self):
+ """Called when page is shown."""
+ wizard = self.wizard()
+ processing_page = wizard.page(3) # ProcessingPage
+ results = processing_page.get_results()
+
+ if results:
+ # Format results
+ text = f"""
+Calibration Results
+
+Processing Summary:
+
+- Screenshots processed: {results['screenshots_processed']}
+- Text regions detected: {results['text_regions_detected']}
+- Icons detected: {results['icons_detected']}
+
+"""
+ if 'avg_processing_time' in results:
+ text += f"""
+Performance:
+
+- Average processing time: {results['avg_processing_time']:.1f}ms
+- Min processing time: {results['min_processing_time']:.1f}ms
+- Max processing time: {results['max_processing_time']:.1f}ms
+
+"""
+ self.results_label.setText(text)
+
+ # Generate recommendations
+ recommendations = self._generate_recommendations(results)
+ self.recommendations_label.setText(recommendations)
+
+ # Save results to settings
+ self._save_calibration_results(results)
+
+ def _generate_recommendations(self, results: Dict) -> str:
+ """Generate calibration recommendations."""
+ recs = ["Recommendations
"]
+
+ # Performance recommendations
+ if 'avg_processing_time' in results:
+ avg_time = results['avg_processing_time']
+ if avg_time < 100:
+ recs.append("- ✅ Excellent performance! GPU acceleration is working well.
")
+ elif avg_time < 500:
+ recs.append("- ✅ Good performance. Processing is reasonably fast.
")
+ else:
+ recs.append("- ⚠️ Processing is slow. Consider enabling GPU or reducing screenshot resolution.
")
+
+ # Detection recommendations
+ total_regions = results['text_regions_detected'] + results['icons_detected']
+ if total_regions == 0:
+ recs.append("- ⚠️ No text or icons detected. Check screenshot quality and game UI visibility.
")
+ elif results['text_regions_detected'] == 0:
+ recs.append("- ⚠️ No text detected. Try adjusting OCR thresholds or check image clarity.
")
+ elif results['icons_detected'] == 0:
+ recs.append("- ⚠️ No icons detected. Ensure screenshots include loot windows.
")
+ else:
+ recs.append("- ✅ Detection is working. Text and icons are being recognized.
")
+
+ recs.append("
")
+ return "".join(recs)
+
+ def _save_calibration_results(self, results: Dict):
+ """Save calibration results to settings."""
+ settings = QSettings("Lemontropia", "GameVision")
+ settings.setValue("calibration/last_run", time.time())
+ settings.setValue("calibration/screenshots_processed", results['screenshots_processed'])
+ settings.setValue("calibration/avg_processing_time", results.get('avg_processing_time', 0))
+ settings.setValue("calibration/text_detection_rate", results['text_regions_detected'])
+ settings.setValue("calibration/icon_detection_rate", results['icons_detected'])
+ settings.sync()
+
+
+class VisionCalibrationWizard(QWizard):
+ """
+ Wizard for calibrating Game Vision AI.
+ """
+
+ calibration_complete = pyqtSignal(dict)
+
+ def __init__(self, parent=None):
+ super().__init__(parent)
+ self.setWindowTitle("Vision Calibration Wizard")
+ self.setMinimumSize(700, 550)
+
+ # Add pages
+ self.addPage(WelcomePage())
+ self.addPage(ScreenshotSelectionPage())
+ self.addPage(SettingsPage())
+ self.addPage(ProcessingPage())
+ self.addPage(ResultsPage())
+
+ self.setWizardStyle(QWizard.WizardStyle.ModernStyle)
+
+ def accept(self):
+ """Handle wizard completion."""
+ processing_page = self.page(3)
+ results = processing_page.get_results()
+
+ if results:
+ self.calibration_complete.emit(results)
+
+ super().accept()
+
+
+# Export
+__all__ = ['VisionCalibrationWizard', 'CalibrationWorker']
diff --git a/ui/vision_settings_dialog.py b/ui/vision_settings_dialog.py
new file mode 100644
index 0000000..25c18b1
--- /dev/null
+++ b/ui/vision_settings_dialog.py
@@ -0,0 +1,645 @@
+"""
+Lemontropia Suite - Vision Settings Dialog
+Settings panel for configuring Game Vision AI.
+"""
+
+import sys
+from pathlib import Path
+from typing import Optional
+
+from PyQt6.QtWidgets import (
+ QDialog, QVBoxLayout, QHBoxLayout, QLabel, QLineEdit,
+ QPushButton, QComboBox, QCheckBox, QGroupBox, QFormLayout,
+ QMessageBox, QSpinBox, QDoubleSpinBox, QTabWidget,
+ QFileDialog, QTextEdit, QProgressBar, QWidget, QSlider
+)
+from PyQt6.QtCore import Qt, QSettings, QThread, pyqtSignal
+from PyQt6.QtGui import QFont, QPixmap
+
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+class GPUInfoThread(QThread):
+ """Thread to gather GPU information."""
+
+ info_ready = pyqtSignal(dict)
+ error_occurred = pyqtSignal(str)
+
+ def run(self):
+ try:
+ from modules.game_vision_ai import GPUDetector
+ info = GPUDetector.get_gpu_info()
+ self.info_ready.emit(info)
+ except Exception as e:
+ self.error_occurred.emit(str(e))
+
+
+class VisionSettingsDialog(QDialog):
+ """
+ Settings dialog for Game Vision AI configuration.
+ """
+
+ settings_saved = pyqtSignal()
+
+ def __init__(self, parent=None):
+ super().__init__(parent)
+ self.setWindowTitle("Game Vision Settings")
+ self.setMinimumSize(600, 500)
+
+ self.settings = QSettings("Lemontropia", "GameVision")
+ self.gpu_info = {}
+
+ self.setup_ui()
+ self.load_settings()
+ self.refresh_gpu_info()
+
+ def setup_ui(self):
+ """Setup the dialog UI."""
+ layout = QVBoxLayout(self)
+ layout.setSpacing(15)
+
+ # Title
+ title_label = QLabel("🎮 Game Vision AI Settings")
+ title_font = QFont()
+ title_font.setPointSize(14)
+ title_font.setBold(True)
+ title_label.setFont(title_font)
+ layout.addWidget(title_label)
+
+ # Description
+ desc_label = QLabel(
+ "Configure AI-powered computer vision for automatic game UI analysis."
+ )
+ desc_label.setWordWrap(True)
+ layout.addWidget(desc_label)
+
+ # Tabs
+ self.tabs = QTabWidget()
+ layout.addWidget(self.tabs)
+
+ # General tab
+ self.tabs.addTab(self._create_general_tab(), "General")
+
+ # GPU tab
+ self.tabs.addTab(self._create_gpu_tab(), "GPU & Performance")
+
+ # OCR tab
+ self.tabs.addTab(self._create_ocr_tab(), "OCR Settings")
+
+ # Icon Detection tab
+ self.tabs.addTab(self._create_icon_tab(), "Icon Detection")
+
+ # Buttons
+ button_layout = QHBoxLayout()
+ button_layout.addStretch()
+
+ self.reset_btn = QPushButton("Reset to Defaults")
+ self.reset_btn.clicked.connect(self.reset_settings)
+ button_layout.addWidget(self.reset_btn)
+
+ self.test_btn = QPushButton("Test Vision...")
+ self.test_btn.clicked.connect(self.open_test_dialog)
+ button_layout.addWidget(self.test_btn)
+
+ self.save_btn = QPushButton("Save")
+ self.save_btn.clicked.connect(self.save_settings)
+ self.save_btn.setDefault(True)
+ button_layout.addWidget(self.save_btn)
+
+ self.cancel_btn = QPushButton("Cancel")
+ self.cancel_btn.clicked.connect(self.reject)
+ button_layout.addWidget(self.cancel_btn)
+
+ layout.addLayout(button_layout)
+
+ def _create_general_tab(self) -> QWidget:
+ """Create general settings tab."""
+ tab = QWidget()
+ layout = QVBoxLayout(tab)
+ layout.setSpacing(15)
+
+ # Enable Vision
+ self.enable_vision_cb = QCheckBox("Enable Game Vision AI")
+ self.enable_vision_cb.setToolTip(
+ "Enable automatic screenshot analysis using AI"
+ )
+ layout.addWidget(self.enable_vision_cb)
+
+ # Auto Processing
+ self.auto_process_cb = QCheckBox("Auto-process screenshots")
+ self.auto_process_cb.setToolTip(
+ "Automatically analyze screenshots when captured"
+ )
+ layout.addWidget(self.auto_process_cb)
+
+ # Data Directory
+ dir_group = QGroupBox("Data Directories")
+ dir_layout = QFormLayout(dir_group)
+
+ # Extracted icons directory
+ icons_dir_layout = QHBoxLayout()
+ self.icons_dir_input = QLineEdit()
+ self.icons_dir_input.setReadOnly(True)
+ icons_dir_layout.addWidget(self.icons_dir_input)
+
+ self.icons_dir_btn = QPushButton("Browse...")
+ self.icons_dir_btn.clicked.connect(self.browse_icons_dir)
+ icons_dir_layout.addWidget(self.icons_dir_btn)
+
+ dir_layout.addRow("Extracted Icons:", icons_dir_layout)
+
+ # Icon database directory
+ db_dir_layout = QHBoxLayout()
+ self.db_dir_input = QLineEdit()
+ self.db_dir_input.setReadOnly(True)
+ db_dir_layout.addWidget(self.db_dir_input)
+
+ self.db_dir_btn = QPushButton("Browse...")
+ self.db_dir_btn.clicked.connect(self.browse_db_dir)
+ db_dir_layout.addWidget(self.db_dir_btn)
+
+ dir_layout.addRow("Icon Database:", db_dir_layout)
+
+ layout.addWidget(dir_group)
+
+ # Processing Options
+ options_group = QGroupBox("Processing Options")
+ options_layout = QFormLayout(options_group)
+
+ self.extract_text_cb = QCheckBox("Extract text (OCR)")
+ self.extract_text_cb.setChecked(True)
+ options_layout.addRow(self.extract_text_cb)
+
+ self.extract_icons_cb = QCheckBox("Extract icons")
+ self.extract_icons_cb.setChecked(True)
+ options_layout.addRow(self.extract_icons_cb)
+
+ self.save_icons_cb = QCheckBox("Save extracted icons to disk")
+ self.save_icons_cb.setChecked(True)
+ options_layout.addRow(self.save_icons_cb)
+
+ self.match_icons_cb = QCheckBox("Match icons to database")
+ self.match_icons_cb.setChecked(True)
+ options_layout.addRow(self.match_icons_cb)
+
+ layout.addWidget(options_group)
+ layout.addStretch()
+
+ return tab
+
+ def _create_gpu_tab(self) -> QWidget:
+ """Create GPU settings tab."""
+ tab = QWidget()
+ layout = QVBoxLayout(tab)
+ layout.setSpacing(15)
+
+ # GPU Info Group
+ gpu_group = QGroupBox("GPU Information")
+ gpu_layout = QVBoxLayout(gpu_group)
+
+ self.gpu_info_label = QLabel("Detecting GPU...")
+ self.gpu_info_label.setWordWrap(True)
+ gpu_layout.addWidget(self.gpu_info_label)
+
+ self.gpu_details = QTextEdit()
+ self.gpu_details.setReadOnly(True)
+ self.gpu_details.setMaximumHeight(100)
+ gpu_layout.addWidget(self.gpu_details)
+
+ self.refresh_gpu_btn = QPushButton("Refresh GPU Info")
+ self.refresh_gpu_btn.clicked.connect(self.refresh_gpu_info)
+ gpu_layout.addWidget(self.refresh_gpu_btn)
+
+ layout.addWidget(gpu_group)
+
+ # GPU Settings
+ settings_group = QGroupBox("GPU Acceleration")
+ settings_layout = QFormLayout(settings_group)
+
+ self.use_gpu_cb = QCheckBox("Use GPU acceleration")
+ self.use_gpu_cb.setToolTip(
+ "Enable GPU acceleration for OCR and vision processing"
+ )
+ settings_layout.addRow(self.use_gpu_cb)
+
+ # GPU Backend selection
+ self.backend_combo = QComboBox()
+ self.backend_combo.addItem("Auto-detect", "auto")
+ self.backend_combo.addItem("CUDA (NVIDIA)", "cuda")
+ self.backend_combo.addItem("MPS (Apple Silicon)", "mps")
+ self.backend_combo.addItem("DirectML (Windows)", "directml")
+ self.backend_combo.addItem("CPU only", "cpu")
+ settings_layout.addRow("Preferred Backend:", self.backend_combo)
+
+ layout.addWidget(settings_group)
+
+ # Performance Settings
+ perf_group = QGroupBox("Performance")
+ perf_layout = QFormLayout(perf_group)
+
+ self.batch_size_spin = QSpinBox()
+ self.batch_size_spin.setRange(1, 16)
+ self.batch_size_spin.setValue(1)
+ self.batch_size_spin.setToolTip(
+ "Number of images to process in parallel (higher = faster but more VRAM)"
+ )
+ perf_layout.addRow("Batch Size:", self.batch_size_spin)
+
+ self.threads_spin = QSpinBox()
+ self.threads_spin.setRange(1, 8)
+ self.threads_spin.setValue(2)
+ perf_layout.addRow("Processing Threads:", self.threads_spin)
+
+ layout.addWidget(perf_group)
+ layout.addStretch()
+
+ return tab
+
+ def _create_ocr_tab(self) -> QWidget:
+ """Create OCR settings tab."""
+ tab = QWidget()
+ layout = QVBoxLayout(tab)
+ layout.setSpacing(15)
+
+ # Language Settings
+ lang_group = QGroupBox("Language Settings")
+ lang_layout = QFormLayout(lang_group)
+
+ self.ocr_lang_combo = QComboBox()
+ self.ocr_lang_combo.addItem("English", "en")
+ self.ocr_lang_combo.addItem("Swedish", "sv")
+ self.ocr_lang_combo.addItem("Latin Script (Generic)", "latin")
+ lang_layout.addRow("OCR Language:", self.ocr_lang_combo)
+
+ self.multi_lang_cb = QCheckBox("Enable multi-language detection")
+ lang_layout.addRow(self.multi_lang_cb)
+
+ layout.addWidget(lang_group)
+
+ # OCR Parameters
+ params_group = QGroupBox("OCR Parameters")
+ params_layout = QFormLayout(params_group)
+
+ self.det_thresh_spin = QDoubleSpinBox()
+ self.det_thresh_spin.setRange(0.1, 0.9)
+ self.det_thresh_spin.setValue(0.3)
+ self.det_thresh_spin.setSingleStep(0.05)
+ self.det_thresh_spin.setToolTip(
+ "Text detection threshold (lower = more sensitive)"
+ )
+ params_layout.addRow("Detection Threshold:", self.det_thresh_spin)
+
+ self.rec_thresh_spin = QDoubleSpinBox()
+ self.rec_thresh_spin.setRange(0.1, 0.9)
+ self.rec_thresh_spin.setValue(0.5)
+ self.rec_thresh_spin.setSingleStep(0.05)
+ self.rec_thresh_spin.setToolTip(
+ "Text recognition confidence threshold"
+ )
+ params_layout.addRow("Recognition Threshold:", self.rec_thresh_spin)
+
+ self.use_angle_cls_cb = QCheckBox("Use angle classifier")
+ self.use_angle_cls_cb.setChecked(True)
+ self.use_angle_cls_cb.setToolTip(
+ "Detect and correct rotated text (slower but more accurate)"
+ )
+ params_layout.addRow(self.use_angle_cls_cb)
+
+ layout.addWidget(params_group)
+
+ # Preprocessing
+ preprocess_group = QGroupBox("Preprocessing")
+ preprocess_layout = QFormLayout(preprocess_group)
+
+ self.denoise_cb = QCheckBox("Apply denoising")
+ self.denoise_cb.setChecked(True)
+ preprocess_layout.addRow(self.denoise_cb)
+
+ self.contrast_enhance_cb = QCheckBox("Enhance contrast")
+ self.contrast_enhance_cb.setChecked(True)
+ preprocess_layout.addRow(self.contrast_enhance_cb)
+
+ layout.addWidget(preprocess_group)
+ layout.addStretch()
+
+ return tab
+
+ def _create_icon_tab(self) -> QWidget:
+ """Create icon detection settings tab."""
+ tab = QWidget()
+ layout = QVBoxLayout(tab)
+ layout.setSpacing(15)
+
+ # Detection Settings
+ detect_group = QGroupBox("Detection Settings")
+ detect_layout = QFormLayout(detect_group)
+
+ self.auto_detect_window_cb = QCheckBox("Auto-detect loot windows")
+ self.auto_detect_window_cb.setChecked(True)
+ self.auto_detect_window_cb.setToolTip(
+ "Automatically detect loot windows in screenshots"
+ )
+ detect_layout.addRow(self.auto_detect_window_cb)
+
+ self.icon_size_combo = QComboBox()
+ self.icon_size_combo.addItem("Small (32x32)", "small")
+ self.icon_size_combo.addItem("Medium (48x48)", "medium")
+ self.icon_size_combo.addItem("Large (64x64)", "large")
+ self.icon_size_combo.addItem("HUD (40x40)", "hud")
+ detect_layout.addRow("Icon Size:", self.icon_size_combo)
+
+ self.confidence_thresh_spin = QDoubleSpinBox()
+ self.confidence_thresh_spin.setRange(0.1, 1.0)
+ self.confidence_thresh_spin.setValue(0.7)
+ self.confidence_thresh_spin.setSingleStep(0.05)
+ detect_layout.addRow("Detection Confidence:", self.confidence_thresh_spin)
+
+ layout.addWidget(detect_group)
+
+ # Matching Settings
+ match_group = QGroupBox("Icon Matching")
+ match_layout = QFormLayout(match_group)
+
+ self.hash_match_cb = QCheckBox("Use perceptual hashing")
+ self.hash_match_cb.setChecked(True)
+ match_layout.addRow(self.hash_match_cb)
+
+ self.feature_match_cb = QCheckBox("Use feature matching (ORB)")
+ self.feature_match_cb.setChecked(True)
+ match_layout.addRow(self.feature_match_cb)
+
+ self.template_match_cb = QCheckBox("Use template matching")
+ self.template_match_cb.setChecked(True)
+ match_layout.addRow(self.template_match_cb)
+
+ self.match_thresh_spin = QDoubleSpinBox()
+ self.match_thresh_spin.setRange(0.1, 1.0)
+ self.match_thresh_spin.setValue(0.70)
+ self.match_thresh_spin.setSingleStep(0.05)
+ self.match_thresh_spin.setToolTip(
+ "Minimum confidence for icon match"
+ )
+ match_layout.addRow("Match Threshold:", self.match_thresh_spin)
+
+ layout.addWidget(match_group)
+
+ # Template Directory
+ template_group = QGroupBox("Template Directory")
+ template_layout = QHBoxLayout(template_group)
+
+ self.template_dir_input = QLineEdit()
+ self.template_dir_input.setReadOnly(True)
+ template_layout.addWidget(self.template_dir_input)
+
+ self.template_dir_btn = QPushButton("Browse...")
+ self.template_dir_btn.clicked.connect(self.browse_template_dir)
+ template_layout.addWidget(self.template_dir_btn)
+
+ layout.addWidget(template_group)
+ layout.addStretch()
+
+ return tab
+
+ def refresh_gpu_info(self):
+ """Refresh GPU information display."""
+ self.gpu_info_label.setText("Detecting GPU...")
+ self.refresh_gpu_btn.setEnabled(False)
+
+ self.gpu_thread = GPUInfoThread()
+ self.gpu_thread.info_ready.connect(self.on_gpu_info_ready)
+ self.gpu_thread.error_occurred.connect(self.on_gpu_error)
+ self.gpu_thread.start()
+
+ def on_gpu_info_ready(self, info: dict):
+ """Handle GPU info received."""
+ self.gpu_info = info
+
+ backend = info.get('backend', 'cpu')
+ cuda_available = info.get('cuda_available', False)
+ mps_available = info.get('mps_available', False)
+
+ # Update label
+ if backend == 'cuda':
+ devices = info.get('devices', [])
+ if devices:
+ device_name = devices[0].get('name', 'Unknown')
+ memory_gb = devices[0].get('memory_total', 0) / (1024**3)
+ self.gpu_info_label.setText(
+ f"✅ GPU Detected: {device_name} ({memory_gb:.1f} GB)"
+ )
+ else:
+ self.gpu_info_label.setText("✅ CUDA Available")
+ elif backend == 'mps':
+ self.gpu_info_label.setText("✅ Apple MPS (Metal) Available")
+ elif backend == 'directml':
+ self.gpu_info_label.setText("✅ DirectML Available")
+ else:
+ self.gpu_info_label.setText("⚠️ No GPU detected - CPU only")
+
+ # Update details
+ details = f"Backend: {backend}\n"
+ details += f"CUDA Available: {cuda_available}\n"
+ details += f"MPS Available: {mps_available}\n"
+
+ if info.get('devices'):
+ for dev in info['devices']:
+ details += f"\nDevice {dev['id']}: {dev['name']}"
+
+ self.gpu_details.setText(details)
+ self.refresh_gpu_btn.setEnabled(True)
+
+ def on_gpu_error(self, error: str):
+ """Handle GPU detection error."""
+ self.gpu_info_label.setText(f"❌ Error detecting GPU: {error}")
+ self.refresh_gpu_btn.setEnabled(True)
+
+ def browse_icons_dir(self):
+ """Browse for extracted icons directory."""
+ dir_path = QFileDialog.getExistingDirectory(
+ self, "Select Extracted Icons Directory",
+ self.icons_dir_input.text() or str(Path.home())
+ )
+ if dir_path:
+ self.icons_dir_input.setText(dir_path)
+
+ def browse_db_dir(self):
+ """Browse for database directory."""
+ dir_path = QFileDialog.getExistingDirectory(
+ self, "Select Database Directory",
+ self.db_dir_input.text() or str(Path.home())
+ )
+ if dir_path:
+ self.db_dir_input.setText(dir_path)
+
+ def browse_template_dir(self):
+ """Browse for template directory."""
+ dir_path = QFileDialog.getExistingDirectory(
+ self, "Select Template Directory",
+ self.template_dir_input.text() or str(Path.home())
+ )
+ if dir_path:
+ self.template_dir_input.setText(dir_path)
+
+ def load_settings(self):
+ """Load settings from QSettings."""
+ # General
+ self.enable_vision_cb.setChecked(
+ self.settings.value("vision/enabled", True, bool)
+ )
+ self.auto_process_cb.setChecked(
+ self.settings.value("vision/auto_process", False, bool)
+ )
+ self.icons_dir_input.setText(
+ self.settings.value("vision/icons_dir", "", str)
+ )
+ self.db_dir_input.setText(
+ self.settings.value("vision/db_dir", "", str)
+ )
+ self.extract_text_cb.setChecked(
+ self.settings.value("vision/extract_text", True, bool)
+ )
+ self.extract_icons_cb.setChecked(
+ self.settings.value("vision/extract_icons", True, bool)
+ )
+ self.save_icons_cb.setChecked(
+ self.settings.value("vision/save_icons", True, bool)
+ )
+ self.match_icons_cb.setChecked(
+ self.settings.value("vision/match_icons", True, bool)
+ )
+
+ # GPU
+ self.use_gpu_cb.setChecked(
+ self.settings.value("vision/use_gpu", True, bool)
+ )
+ backend = self.settings.value("vision/gpu_backend", "auto", str)
+ index = self.backend_combo.findData(backend)
+ if index >= 0:
+ self.backend_combo.setCurrentIndex(index)
+ self.batch_size_spin.setValue(
+ self.settings.value("vision/batch_size", 1, int)
+ )
+ self.threads_spin.setValue(
+ self.settings.value("vision/threads", 2, int)
+ )
+
+ # OCR
+ lang = self.settings.value("vision/ocr_lang", "en", str)
+ index = self.ocr_lang_combo.findData(lang)
+ if index >= 0:
+ self.ocr_lang_combo.setCurrentIndex(index)
+ self.multi_lang_cb.setChecked(
+ self.settings.value("vision/multi_lang", False, bool)
+ )
+ self.det_thresh_spin.setValue(
+ self.settings.value("vision/det_thresh", 0.3, float)
+ )
+ self.rec_thresh_spin.setValue(
+ self.settings.value("vision/rec_thresh", 0.5, float)
+ )
+ self.use_angle_cls_cb.setChecked(
+ self.settings.value("vision/use_angle_cls", True, bool)
+ )
+ self.denoise_cb.setChecked(
+ self.settings.value("vision/denoise", True, bool)
+ )
+ self.contrast_enhance_cb.setChecked(
+ self.settings.value("vision/contrast_enhance", True, bool)
+ )
+
+ # Icon Detection
+ self.auto_detect_window_cb.setChecked(
+ self.settings.value("vision/auto_detect_window", True, bool)
+ )
+ icon_size = self.settings.value("vision/icon_size", "medium", str)
+ index = self.icon_size_combo.findData(icon_size)
+ if index >= 0:
+ self.icon_size_combo.setCurrentIndex(index)
+ self.confidence_thresh_spin.setValue(
+ self.settings.value("vision/confidence_thresh", 0.7, float)
+ )
+ self.hash_match_cb.setChecked(
+ self.settings.value("vision/hash_match", True, bool)
+ )
+ self.feature_match_cb.setChecked(
+ self.settings.value("vision/feature_match", True, bool)
+ )
+ self.template_match_cb.setChecked(
+ self.settings.value("vision/template_match", True, bool)
+ )
+ self.match_thresh_spin.setValue(
+ self.settings.value("vision/match_thresh", 0.70, float)
+ )
+ self.template_dir_input.setText(
+ self.settings.value("vision/template_dir", "", str)
+ )
+
+ def save_settings(self):
+ """Save settings to QSettings."""
+ # General
+ self.settings.setValue("vision/enabled", self.enable_vision_cb.isChecked())
+ self.settings.setValue("vision/auto_process", self.auto_process_cb.isChecked())
+ self.settings.setValue("vision/icons_dir", self.icons_dir_input.text())
+ self.settings.setValue("vision/db_dir", self.db_dir_input.text())
+ self.settings.setValue("vision/extract_text", self.extract_text_cb.isChecked())
+ self.settings.setValue("vision/extract_icons", self.extract_icons_cb.isChecked())
+ self.settings.setValue("vision/save_icons", self.save_icons_cb.isChecked())
+ self.settings.setValue("vision/match_icons", self.match_icons_cb.isChecked())
+
+ # GPU
+ self.settings.setValue("vision/use_gpu", self.use_gpu_cb.isChecked())
+ self.settings.setValue("vision/gpu_backend", self.backend_combo.currentData())
+ self.settings.setValue("vision/batch_size", self.batch_size_spin.value())
+ self.settings.setValue("vision/threads", self.threads_spin.value())
+
+ # OCR
+ self.settings.setValue("vision/ocr_lang", self.ocr_lang_combo.currentData())
+ self.settings.setValue("vision/multi_lang", self.multi_lang_cb.isChecked())
+ self.settings.setValue("vision/det_thresh", self.det_thresh_spin.value())
+ self.settings.setValue("vision/rec_thresh", self.rec_thresh_spin.value())
+ self.settings.setValue("vision/use_angle_cls", self.use_angle_cls_cb.isChecked())
+ self.settings.setValue("vision/denoise", self.denoise_cb.isChecked())
+ self.settings.setValue("vision/contrast_enhance", self.contrast_enhance_cb.isChecked())
+
+ # Icon Detection
+ self.settings.setValue("vision/auto_detect_window", self.auto_detect_window_cb.isChecked())
+ self.settings.setValue("vision/icon_size", self.icon_size_combo.currentData())
+ self.settings.setValue("vision/confidence_thresh", self.confidence_thresh_spin.value())
+ self.settings.setValue("vision/hash_match", self.hash_match_cb.isChecked())
+ self.settings.setValue("vision/feature_match", self.feature_match_cb.isChecked())
+ self.settings.setValue("vision/template_match", self.template_match_cb.isChecked())
+ self.settings.setValue("vision/match_thresh", self.match_thresh_spin.value())
+ self.settings.setValue("vision/template_dir", self.template_dir_input.text())
+
+ self.settings.sync()
+
+ self.settings_saved.emit()
+ self.accept()
+
+ logger.info("Vision settings saved")
+
+ def reset_settings(self):
+ """Reset settings to defaults."""
+ reply = QMessageBox.question(
+ self, "Reset Settings",
+ "Are you sure you want to reset all vision settings to defaults?",
+ QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No
+ )
+
+ if reply == QMessageBox.StandardButton.Yes:
+ self.settings.clear()
+ self.load_settings()
+ QMessageBox.information(self, "Reset Complete",
+ "Settings have been reset to defaults.")
+
+ def open_test_dialog(self):
+ """Open vision test dialog."""
+ from .vision_test_dialog import VisionTestDialog
+ dialog = VisionTestDialog(self)
+ dialog.exec()
+
+
+# Export
+__all__ = ['VisionSettingsDialog']
diff --git a/ui/vision_test_dialog.py b/ui/vision_test_dialog.py
new file mode 100644
index 0000000..e839330
--- /dev/null
+++ b/ui/vision_test_dialog.py
@@ -0,0 +1,470 @@
+"""
+Lemontropia Suite - Vision Test Dialog
+Test and debug Game Vision AI functionality.
+"""
+
+import time
+from pathlib import Path
+from typing import Optional
+
+from PyQt6.QtWidgets import (
+ QDialog, QVBoxLayout, QHBoxLayout, QLabel, QLineEdit,
+ QPushButton, QComboBox, QCheckBox, QGroupBox, QFormLayout,
+ QMessageBox, QFileDialog, QTextEdit, QProgressBar,
+ QListWidget, QListWidgetItem, QSplitter, QWidget,
+ QTableWidget, QTableWidgetItem, QHeaderView
+)
+from PyQt6.QtCore import Qt, QThread, pyqtSignal
+from PyQt6.QtGui import QPixmap, QImage, QFont
+import numpy as np
+
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+class VisionTestWorker(QThread):
+ """Worker thread for vision testing."""
+
+ test_complete = pyqtSignal(dict)
+ progress = pyqtSignal(str)
+ error_occurred = pyqtSignal(str)
+
+ def __init__(self, image_path: Path, settings: dict):
+ super().__init__()
+ self.image_path = image_path
+ self.settings = settings
+
+ def run(self):
+ try:
+ from modules.game_vision_ai import GameVisionAI
+
+ self.progress.emit("Initializing Game Vision AI...")
+
+ vision = GameVisionAI(
+ use_gpu=self.settings.get('use_gpu', True),
+ ocr_lang=self.settings.get('ocr_lang', 'en')
+ )
+
+ self.progress.emit("Processing image...")
+
+ start_time = time.time()
+ result = vision.process_screenshot(
+ self.image_path,
+ extract_text=self.settings.get('extract_text', True),
+ extract_icons=self.settings.get('extract_icons', True)
+ )
+ processing_time = (time.time() - start_time) * 1000
+
+ # Prepare results
+ test_results = {
+ 'success': True,
+ 'processing_time_ms': processing_time,
+ 'gpu_backend': result.gpu_backend,
+ 'text_regions': [
+ {
+ 'text': t.text,
+ 'confidence': t.confidence,
+ 'bbox': t.bbox,
+ 'language': t.language
+ }
+ for t in result.text_regions
+ ],
+ 'icon_regions': [
+ {
+ 'bbox': i.bbox,
+ 'confidence': i.confidence,
+ 'hash': i.icon_hash[:16] # Truncated hash
+ }
+ for i in result.icon_regions
+ ],
+ 'text_count': len(result.text_regions),
+ 'icon_count': len(result.icon_regions)
+ }
+
+ self.test_complete.emit(test_results)
+
+ except Exception as e:
+ self.error_occurred.emit(str(e))
+
+
+class VisionTestDialog(QDialog):
+ """
+ Dialog for testing and debugging Game Vision AI.
+ """
+
+ def __init__(self, parent=None):
+ super().__init__(parent)
+ self.setWindowTitle("Test Game Vision")
+ self.setMinimumSize(900, 700)
+
+ self.current_image_path: Optional[Path] = None
+ self.current_results: Optional[dict] = None
+
+ self.setup_ui()
+
+ def setup_ui(self):
+ """Setup dialog UI."""
+ layout = QVBoxLayout(self)
+ layout.setSpacing(10)
+
+ # Title
+ title_label = QLabel("🧪 Game Vision Test & Debug")
+ title_font = QFont()
+ title_font.setPointSize(14)
+ title_font.setBold(True)
+ title_label.setFont(title_font)
+ layout.addWidget(title_label)
+
+ # Main splitter
+ splitter = QSplitter(Qt.Orientation.Horizontal)
+ layout.addWidget(splitter)
+
+ # Left panel - Controls
+ left_panel = QWidget()
+ left_layout = QVBoxLayout(left_panel)
+ left_layout.setContentsMargins(5, 5, 5, 5)
+
+ # Image selection
+ image_group = QGroupBox("Test Image")
+ image_layout = QVBoxLayout(image_group)
+
+ self.image_path_label = QLabel("No image selected")
+ self.image_path_label.setWordWrap(True)
+ image_layout.addWidget(self.image_path_label)
+
+ image_btn_layout = QHBoxLayout()
+
+ self.browse_btn = QPushButton("Browse...")
+ self.browse_btn.clicked.connect(self.browse_image)
+ image_btn_layout.addWidget(self.browse_btn)
+
+ self.capture_btn = QPushButton("Capture Screen")
+ self.capture_btn.clicked.connect(self.capture_screen)
+ image_btn_layout.addWidget(self.capture_btn)
+
+ image_btn_layout.addStretch()
+ image_layout.addLayout(image_btn_layout)
+
+ left_layout.addWidget(image_group)
+
+ # Test settings
+ settings_group = QGroupBox("Test Settings")
+ settings_layout = QFormLayout(settings_group)
+
+ self.use_gpu_cb = QCheckBox("Use GPU acceleration")
+ self.use_gpu_cb.setChecked(True)
+ settings_layout.addRow(self.use_gpu_cb)
+
+ self.extract_text_cb = QCheckBox("Extract text (OCR)")
+ self.extract_text_cb.setChecked(True)
+ settings_layout.addRow(self.extract_text_cb)
+
+ self.extract_icons_cb = QCheckBox("Extract icons")
+ self.extract_icons_cb.setChecked(True)
+ settings_layout.addRow(self.extract_icons_cb)
+
+ self.ocr_lang_combo = QComboBox()
+ self.ocr_lang_combo.addItem("English", "en")
+ self.ocr_lang_combo.addItem("Swedish", "sv")
+ settings_layout.addRow("OCR Language:", self.ocr_lang_combo)
+
+ left_layout.addWidget(settings_group)
+
+ # Run test button
+ self.test_btn = QPushButton("▶ Run Vision Test")
+ self.test_btn.setStyleSheet("""
+ QPushButton {
+ background-color: #4CAF50;
+ color: white;
+ font-weight: bold;
+ padding: 10px;
+ }
+ QPushButton:hover {
+ background-color: #45a049;
+ }
+ QPushButton:disabled {
+ background-color: #cccccc;
+ }
+ """)
+ self.test_btn.clicked.connect(self.run_test)
+ self.test_btn.setEnabled(False)
+ left_layout.addWidget(self.test_btn)
+
+ # Progress
+ self.progress_label = QLabel("")
+ left_layout.addWidget(self.progress_label)
+
+ self.progress_bar = QProgressBar()
+ self.progress_bar.setRange(0, 0) # Indeterminate
+ self.progress_bar.setVisible(False)
+ left_layout.addWidget(self.progress_bar)
+
+ # GPU Info
+ gpu_group = QGroupBox("GPU Information")
+ gpu_layout = QVBoxLayout(gpu_group)
+
+ self.gpu_info_label = QLabel("Click 'Check GPU' to detect")
+ self.gpu_info_label.setWordWrap(True)
+ gpu_layout.addWidget(self.gpu_info_label)
+
+ self.check_gpu_btn = QPushButton("Check GPU")
+ self.check_gpu_btn.clicked.connect(self.check_gpu)
+ gpu_layout.addWidget(self.check_gpu_btn)
+
+ left_layout.addWidget(gpu_group)
+ left_layout.addStretch()
+
+ splitter.addWidget(left_panel)
+
+ # Right panel - Results
+ right_panel = QWidget()
+ right_layout = QVBoxLayout(right_panel)
+ right_layout.setContentsMargins(5, 5, 5, 5)
+
+ # Image preview
+ preview_group = QGroupBox("Image Preview")
+ preview_layout = QVBoxLayout(preview_group)
+
+ self.preview_label = QLabel("No image loaded")
+ self.preview_label.setAlignment(Qt.AlignmentFlag.AlignCenter)
+ self.preview_label.setMinimumHeight(200)
+ self.preview_label.setStyleSheet("background-color: #f0f0f0; border: 1px solid #ccc;")
+ preview_layout.addWidget(self.preview_label)
+
+ right_layout.addWidget(preview_group)
+
+ # Results tabs
+ from PyQt6.QtWidgets import QTabWidget
+ self.results_tabs = QTabWidget()
+ right_layout.addWidget(self.results_tabs)
+
+ # Summary tab
+ self.summary_tab = QTextEdit()
+ self.summary_tab.setReadOnly(True)
+ self.results_tabs.addTab(self.summary_tab, "Summary")
+
+ # Text regions tab
+ self.text_table = QTableWidget()
+ self.text_table.setColumnCount(4)
+ self.text_table.setHorizontalHeaderLabels(["Text", "Confidence", "Position", "Language"])
+ self.text_table.horizontalHeader().setSectionResizeMode(0, QHeaderView.ResizeMode.Stretch)
+ self.results_tabs.addTab(self.text_table, "Text Regions")
+
+ # Icon regions tab
+ self.icon_table = QTableWidget()
+ self.icon_table.setColumnCount(3)
+ self.icon_table.setHorizontalHeaderLabels(["Position", "Confidence", "Hash"])
+ self.icon_table.horizontalHeader().setSectionResizeMode(0, QHeaderView.ResizeMode.Stretch)
+ self.results_tabs.addTab(self.icon_table, "Icon Regions")
+
+ # Log tab
+ self.log_text = QTextEdit()
+ self.log_text.setReadOnly(True)
+ self.results_tabs.addTab(self.log_text, "Log")
+
+ splitter.addWidget(right_panel)
+ splitter.setSizes([300, 600])
+
+ # Close button
+ btn_layout = QHBoxLayout()
+ btn_layout.addStretch()
+
+ self.close_btn = QPushButton("Close")
+ self.close_btn.clicked.connect(self.accept)
+ btn_layout.addWidget(self.close_btn)
+
+ layout.addLayout(btn_layout)
+
+ def browse_image(self):
+ """Browse for test image."""
+ file_path, _ = QFileDialog.getOpenFileName(
+ self, "Select Test Image",
+ str(Path.home()),
+ "Images (*.png *.jpg *.jpeg *.bmp)"
+ )
+
+ if file_path:
+ self.load_image(Path(file_path))
+
+ def capture_screen(self):
+ """Capture screen for testing."""
+ try:
+ import mss
+ import numpy as np
+ import cv2
+ from PyQt6.QtGui import QImage, QPixmap
+
+ self.progress_label.setText("Capturing screen...")
+
+ with mss.mss() as sct:
+ monitor = sct.monitors[1] # Primary monitor
+ screenshot = sct.grab(monitor)
+
+ # Convert to numpy array
+ img = np.array(screenshot)
+ img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR)
+
+ # Save temporarily
+ temp_path = Path.home() / ".lemontropia" / "temp_capture.png"
+ temp_path.parent.mkdir(parents=True, exist_ok=True)
+ cv2.imwrite(str(temp_path), img)
+
+ self.load_image(temp_path)
+ self.progress_label.setText("Screen captured")
+
+ except Exception as e:
+ QMessageBox.critical(self, "Capture Failed", f"Failed to capture screen: {e}")
+ self.progress_label.setText("")
+
+ def load_image(self, image_path: Path):
+ """Load and display image."""
+ self.current_image_path = image_path
+ self.image_path_label.setText(str(image_path))
+
+ # Load and display preview
+ pixmap = QPixmap(str(image_path))
+ if not pixmap.isNull():
+ # Scale to fit
+ scaled = pixmap.scaled(
+ self.preview_label.size(),
+ Qt.AspectRatioMode.KeepAspectRatio,
+ Qt.TransformationMode.SmoothTransformation
+ )
+ self.preview_label.setPixmap(scaled)
+ self.test_btn.setEnabled(True)
+ else:
+ self.preview_label.setText("Failed to load image")
+ self.test_btn.setEnabled(False)
+
+ def run_test(self):
+ """Run vision test."""
+ if not self.current_image_path:
+ QMessageBox.warning(self, "No Image", "Please select an image first.")
+ return
+
+ # Collect settings
+ settings = {
+ 'use_gpu': self.use_gpu_cb.isChecked(),
+ 'extract_text': self.extract_text_cb.isChecked(),
+ 'extract_icons': self.extract_icons_cb.isChecked(),
+ 'ocr_lang': self.ocr_lang_combo.currentData()
+ }
+
+ # Disable controls
+ self.test_btn.setEnabled(False)
+ self.browse_btn.setEnabled(False)
+ self.capture_btn.setEnabled(False)
+ self.progress_bar.setVisible(True)
+ self.progress_label.setText("Running vision test...")
+
+ # Clear previous results
+ self.summary_tab.clear()
+ self.text_table.setRowCount(0)
+ self.icon_table.setRowCount(0)
+
+ # Start worker
+ self.worker = VisionTestWorker(self.current_image_path, settings)
+ self.worker.test_complete.connect(self.on_test_complete)
+ self.worker.progress.connect(self.on_test_progress)
+ self.worker.error_occurred.connect(self.on_test_error)
+ self.worker.start()
+
+ def on_test_progress(self, message: str):
+ """Handle test progress."""
+ self.progress_label.setText(message)
+ self.log_text.append(f"[{time.strftime('%H:%M:%S')}] {message}")
+
+ def on_test_complete(self, results: dict):
+ """Handle test completion."""
+ self.current_results = results
+
+ # Re-enable controls
+ self.test_btn.setEnabled(True)
+ self.browse_btn.setEnabled(True)
+ self.capture_btn.setEnabled(True)
+ self.progress_bar.setVisible(False)
+ self.progress_label.setText("Test complete!")
+
+ # Update summary
+ summary = f"""
+Vision Test Results
+
+Processing Time: {results['processing_time_ms']:.1f}ms
+GPU Backend: {results['gpu_backend']}
+Text Regions Detected: {results['text_count']}
+Icon Regions Detected: {results['icon_count']}
+"""
+ self.summary_tab.setHtml(summary)
+
+ # Update text table
+ self.text_table.setRowCount(len(results['text_regions']))
+ for i, text in enumerate(results['text_regions']):
+ self.text_table.setItem(i, 0, QTableWidgetItem(text['text']))
+ self.text_table.setItem(i, 1, QTableWidgetItem(f"{text['confidence']:.2%}"))
+ bbox_str = f"({text['bbox'][0]}, {text['bbox'][1]})"
+ self.text_table.setItem(i, 2, QTableWidgetItem(bbox_str))
+ self.text_table.setItem(i, 3, QTableWidgetItem(text['language']))
+
+ # Update icon table
+ self.icon_table.setRowCount(len(results['icon_regions']))
+ for i, icon in enumerate(results['icon_regions']):
+ bbox_str = f"({icon['bbox'][0]}, {icon['bbox'][1]}, {icon['bbox'][2]}x{icon['bbox'][3]})"
+ self.icon_table.setItem(i, 0, QTableWidgetItem(bbox_str))
+ self.icon_table.setItem(i, 1, QTableWidgetItem(f"{icon['confidence']:.2%}"))
+ self.icon_table.setItem(i, 2, QTableWidgetItem(icon['hash']))
+
+ logger.info(f"Vision test complete: {results['text_count']} texts, {results['icon_count']} icons")
+
+ def on_test_error(self, error: str):
+ """Handle test error."""
+ self.test_btn.setEnabled(True)
+ self.browse_btn.setEnabled(True)
+ self.capture_btn.setEnabled(True)
+ self.progress_bar.setVisible(False)
+ self.progress_label.setText(f"Error: {error}")
+
+ QMessageBox.critical(self, "Test Failed", f"Vision test failed:\n{error}")
+ self.log_text.append(f"[ERROR] {error}")
+
+ logger.error(f"Vision test failed: {error}")
+
+ def check_gpu(self):
+ """Check GPU availability."""
+ try:
+ from modules.game_vision_ai import GPUDetector
+
+ info = GPUDetector.get_gpu_info()
+
+ text = f"""
+GPU Information
+Backend: {info['backend']}
+CUDA Available: {info['cuda_available']}
+MPS Available: {info['mps_available']}
+"""
+ if info.get('devices'):
+ for dev in info['devices']:
+ mem_gb = dev.get('memory_total', 0) / (1024**3)
+ text += f"Device {dev['id']}: {dev['name']} ({mem_gb:.1f} GB)
"
+
+ self.gpu_info_label.setText(text)
+
+ except Exception as e:
+ self.gpu_info_label.setText(f"Error detecting GPU: {e}")
+ logger.error(f"GPU detection failed: {e}")
+
+ def resizeEvent(self, event):
+ """Handle resize to update preview."""
+ super().resizeEvent(event)
+ if self.current_image_path and self.preview_label.pixmap():
+ pixmap = QPixmap(str(self.current_image_path))
+ scaled = pixmap.scaled(
+ self.preview_label.size(),
+ Qt.AspectRatioMode.KeepAspectRatio,
+ Qt.TransformationMode.SmoothTransformation
+ )
+ self.preview_label.setPixmap(scaled)
+
+
+# Export
+__all__ = ['VisionTestDialog']
diff --git a/vision_example.py b/vision_example.py
new file mode 100644
index 0000000..53ea52e
--- /dev/null
+++ b/vision_example.py
@@ -0,0 +1,265 @@
+"""
+Lemontropia Suite - Game Vision AI Example
+Demonstrates usage of the Game Vision AI module.
+"""
+
+import sys
+from pathlib import Path
+import logging
+
+# Setup logging
+logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
+logger = logging.getLogger(__name__)
+
+
+def demo_gpu_detection():
+ """Demonstrate GPU detection."""
+ print("\n" + "="*60)
+ print("GPU DETECTION DEMO")
+ print("="*60)
+
+ from modules.game_vision_ai import GPUDetector, GPUBackend
+
+ # Detect GPU
+ backend = GPUDetector.detect_backend()
+ print(f"\nDetected GPU Backend: {backend.value}")
+
+ # Get detailed info
+ info = GPUDetector.get_gpu_info()
+ print(f"\nGPU Details:")
+ print(f" Backend: {info['backend']}")
+ print(f" CUDA Available: {info['cuda_available']}")
+ print(f" MPS Available: {info['mps_available']}")
+
+ if info.get('devices'):
+ print(f"\n Devices:")
+ for dev in info['devices']:
+ mem_gb = dev.get('memory_total', 0) / (1024**3)
+ print(f" [{dev['id']}] {dev['name']} ({mem_gb:.1f} GB)")
+
+ print(f"\n PyTorch Device String: {GPUDetector.get_device_string(backend)}")
+
+
+def demo_ocr(image_path: str = None):
+ """Demonstrate OCR functionality."""
+ print("\n" + "="*60)
+ print("OCR TEXT EXTRACTION DEMO")
+ print("="*60)
+
+ from modules.game_vision_ai import OCRProcessor
+
+ # Initialize OCR
+ print("\nInitializing OCR (this may take a moment on first run)...")
+ ocr = OCRProcessor(use_gpu=True, lang='en')
+
+ if image_path and Path(image_path).exists():
+ print(f"\nProcessing: {image_path}")
+ regions = ocr.extract_text(image_path)
+
+ print(f"\nDetected {len(regions)} text regions:")
+ for i, region in enumerate(regions, 1):
+ print(f" {i}. '{region.text}' (confidence: {region.confidence:.2%})")
+ print(f" Position: ({region.bbox[0]}, {region.bbox[1]}) {region.bbox[2]}x{region.bbox[3]}")
+ else:
+ print(f"\nNo image provided or file not found: {image_path}")
+ print("Usage: python vision_example.py --ocr path/to/screenshot.png")
+
+
+def demo_icon_detection(image_path: str = None):
+ """Demonstrate icon detection."""
+ print("\n" + "="*60)
+ print("ICON DETECTION DEMO")
+ print("="*60)
+
+ from modules.game_vision_ai import IconDetector
+ import cv2
+
+ detector = IconDetector()
+
+ if image_path and Path(image_path).exists():
+ print(f"\nProcessing: {image_path}")
+ image = cv2.imread(image_path)
+
+ # Detect loot window
+ window = detector.detect_loot_window(image)
+ if window:
+ print(f"\nDetected loot window at: {window}")
+
+ # Extract icons
+ icons = detector.extract_icons_from_region(image, window)
+ print(f"\nExtracted {len(icons)} icons:")
+
+ for i, icon in enumerate(icons, 1):
+ print(f" {i}. Position: {icon.bbox}")
+ print(f" Hash: {icon.icon_hash[:32]}...")
+ else:
+ print("\nNo loot window detected. Trying full image...")
+ h, w = image.shape[:2]
+ icons = detector.extract_icons_from_region(image, (0, 0, w, h))
+ print(f"Found {len(icons)} potential icons in full image")
+ else:
+ print(f"\nNo image provided or file not found: {image_path}")
+
+
+def demo_full_vision(image_path: str = None):
+ """Demonstrate full vision processing."""
+ print("\n" + "="*60)
+ print("FULL VISION PROCESSING DEMO")
+ print("="*60)
+
+ from modules.game_vision_ai import GameVisionAI
+
+ # Initialize vision AI
+ print("\nInitializing Game Vision AI...")
+ vision = GameVisionAI(use_gpu=True, ocr_lang='en')
+
+ print(f"GPU Available: {vision.is_gpu_available()}")
+ print(f"Backend: {vision.backend.value}")
+
+ if image_path and Path(image_path).exists():
+ print(f"\nProcessing: {image_path}")
+
+ # Process screenshot
+ result = vision.process_screenshot(image_path)
+
+ print(f"\n--- Results ---")
+ print(f"Processing Time: {result.processing_time_ms:.1f}ms")
+ print(f"GPU Backend: {result.gpu_backend}")
+
+ print(f"\nText Regions ({len(result.text_regions)}):")
+ for region in result.text_regions:
+ print(f" • '{region.text}' ({region.confidence:.2%})")
+
+ print(f"\nIcon Regions ({len(result.icon_regions)}):")
+ for region in result.icon_regions:
+ print(f" • Position: {region.bbox}")
+
+ print(f"\nExtracted icons saved to: {vision.extracted_icons_dir}")
+ else:
+ print(f"\nNo image provided or file not found: {image_path}")
+ print("Usage: python vision_example.py --full path/to/screenshot.png")
+
+
+def demo_icon_matching():
+ """Demonstrate icon matching."""
+ print("\n" + "="*60)
+ print("ICON MATCHING DEMO")
+ print("="*60)
+
+ from modules.icon_matcher import IconMatcher, PerceptualHash
+ import cv2
+ import numpy as np
+
+ # Create matcher
+ matcher = IconMatcher()
+
+ print(f"\nIcon Database Stats:")
+ stats = matcher.get_database_stats()
+ print(f" Total Icons: {stats['total_icons']}")
+ print(f" Database Path: {stats['database_path']}")
+
+ # Demonstrate perceptual hashing
+ print(f"\nPerceptual Hashing:")
+ # Create a sample image
+ sample = np.random.randint(0, 255, (64, 64, 3), dtype=np.uint8)
+
+ avg_hash = PerceptualHash.average_hash(sample)
+ diff_hash = PerceptualHash.difference_hash(sample)
+
+ print(f" Average Hash: {avg_hash[:32]}...")
+ print(f" Difference Hash: {diff_hash[:32]}...")
+
+ # Show similarity calculation
+ similar = np.random.randint(0, 255, (64, 64, 3), dtype=np.uint8)
+ similar[20:40, 20:40] = sample[20:40, 20:40] # Make it somewhat similar
+
+ hash1 = PerceptualHash.average_hash(sample)
+ hash2 = PerceptualHash.average_hash(similar)
+ similarity = PerceptualHash.similarity(hash1, hash2)
+
+ print(f" Similarity between two images: {similarity:.2%}")
+
+
+def demo_calibration():
+ """Demonstrate calibration."""
+ print("\n" + "="*60)
+ print("CALIBRATION DEMO")
+ print("="*60)
+
+ from modules.game_vision_ai import GameVisionAI
+
+ vision = GameVisionAI(use_gpu=True)
+
+ print("\nTo calibrate, provide sample screenshots:")
+ print(" vision.calibrate_for_game([path1, path2, ...])")
+ print("\nThis will:")
+ print(" 1. Process each screenshot")
+ print(" 2. Measure detection accuracy")
+ print(" 3. Calculate average processing time")
+ print(" 4. Provide recommendations")
+
+
+def main():
+ """Main entry point."""
+ import argparse
+
+ parser = argparse.ArgumentParser(
+ description="Game Vision AI Examples",
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ epilog="""
+Examples:
+ python vision_example.py --gpu # GPU detection demo
+ python vision_example.py --ocr image.png # OCR demo
+ python vision_example.py --icons image.png # Icon detection demo
+ python vision_example.py --full image.png # Full vision demo
+ python vision_example.py --matching # Icon matching demo
+ python vision_example.py --all # Run all demos
+ """
+ )
+
+ parser.add_argument('--gpu', action='store_true', help='GPU detection demo')
+ parser.add_argument('--ocr', type=str, metavar='IMAGE', help='OCR demo with image')
+ parser.add_argument('--icons', type=str, metavar='IMAGE', help='Icon detection demo')
+ parser.add_argument('--full', type=str, metavar='IMAGE', help='Full vision demo')
+ parser.add_argument('--matching', action='store_true', help='Icon matching demo')
+ parser.add_argument('--calibration', action='store_true', help='Calibration demo')
+ parser.add_argument('--all', action='store_true', help='Run all demos')
+
+ args = parser.parse_args()
+
+ # If no args, show help
+ if not any([args.gpu, args.ocr, args.icons, args.full, args.matching, args.calibration, args.all]):
+ parser.print_help()
+ return
+
+ try:
+ if args.all or args.gpu:
+ demo_gpu_detection()
+
+ if args.all or args.ocr:
+ demo_ocr(args.ocr)
+
+ if args.all or args.icons:
+ demo_icon_detection(args.icons)
+
+ if args.all or args.full:
+ demo_full_vision(args.full)
+
+ if args.all or args.matching:
+ demo_icon_matching()
+
+ if args.all or args.calibration:
+ demo_calibration()
+
+ except ImportError as e:
+ print(f"\n❌ Import Error: {e}")
+ print("\nMake sure all dependencies are installed:")
+ print(" pip install -r requirements.txt")
+ except Exception as e:
+ print(f"\n❌ Error: {e}")
+ import traceback
+ traceback.print_exc()
+
+
+if __name__ == "__main__":
+ main()