feat: add AI Computer Vision with local GPU support

- modules/game_vision_ai.py - Main AI vision engine with OCR and icon detection
- modules/icon_matcher.py - Icon similarity matching using perceptual hashing
- ui/vision_settings_dialog.py - GPU/OCR settings panel
- ui/vision_calibration_dialog.py - Calibration wizard
- ui/vision_test_dialog.py - Test and debug dialog
- vision_example.py - Usage examples
- Update requirements.txt with paddlepaddle, opencv, torch dependencies

Features:
- GPU auto-detection (CUDA, MPS, DirectML)
- PaddleOCR for text extraction (English/Swedish)
- Icon detection from loot windows
- Icon matching against database
- Real-time screenshot processing
This commit is contained in:
LemonNexus 2026-02-11 11:29:10 +00:00
parent 82a7a5fc86
commit 522ee8e719
7 changed files with 3369 additions and 5 deletions

722
modules/game_vision_ai.py Normal file
View File

@ -0,0 +1,722 @@
"""
Lemontropia Suite - Game Vision AI Module
Advanced computer vision with local GPU-accelerated AI models.
Supports OCR (PaddleOCR) and icon detection for game UI analysis.
"""
import cv2
import numpy as np
import logging
import torch
import time
from pathlib import Path
from dataclasses import dataclass, field
from typing import Optional, Tuple, List, Dict, Any, Union
from enum import Enum
import json
import hashlib
logger = logging.getLogger(__name__)
class GPUBackend(Enum):
"""Supported GPU backends."""
CUDA = "cuda" # NVIDIA CUDA
MPS = "mps" # Apple Metal Performance Shaders
DIRECTML = "directml" # Windows DirectML
CPU = "cpu" # Fallback CPU
@dataclass
class TextRegion:
"""Detected text region with metadata."""
text: str
confidence: float
bbox: Tuple[int, int, int, int] # x, y, w, h
language: str = "en"
def to_dict(self) -> Dict[str, Any]:
return {
'text': self.text,
'confidence': self.confidence,
'bbox': self.bbox,
'language': self.language
}
@dataclass
class IconRegion:
"""Detected icon region with metadata."""
image: np.ndarray
bbox: Tuple[int, int, int, int] # x, y, w, h
confidence: float
icon_hash: str = ""
def __post_init__(self):
if not self.icon_hash:
self.icon_hash = self._compute_hash()
def _compute_hash(self) -> str:
"""Compute perceptual hash of icon."""
if self.image is None or self.image.size == 0:
return ""
# Resize to standard size and compute average hash
small = cv2.resize(self.image, (16, 16), interpolation=cv2.INTER_AREA)
gray = cv2.cvtColor(small, cv2.COLOR_BGR2GRAY) if len(small.shape) == 3 else small
avg = gray.mean()
hash_bits = (gray > avg).flatten()
return ''.join(['1' if b else '0' for b in hash_bits])
@dataclass
class ItemMatch:
"""Result of matching an icon to database."""
name: str
confidence: float
item_id: Optional[str] = None
category: Optional[str] = None
matched_hash: str = ""
@dataclass
class VisionResult:
"""Complete vision processing result."""
text_regions: List[TextRegion] = field(default_factory=list)
icon_regions: List[IconRegion] = field(default_factory=list)
processing_time_ms: float = 0.0
gpu_backend: str = "cpu"
timestamp: float = field(default_factory=time.time)
def to_dict(self) -> Dict[str, Any]:
return {
'text_regions': [t.to_dict() for t in self.text_regions],
'icon_count': len(self.icon_regions),
'processing_time_ms': self.processing_time_ms,
'gpu_backend': self.gpu_backend,
'timestamp': self.timestamp
}
class GPUDetector:
"""Detect and manage GPU availability."""
@staticmethod
def detect_backend() -> GPUBackend:
"""Detect best available GPU backend."""
# Check CUDA first (most common)
if torch.cuda.is_available():
logger.info(f"CUDA available: {torch.cuda.get_device_name(0)}")
return GPUBackend.CUDA
# Check Apple MPS
if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
logger.info("Apple MPS (Metal) available")
return GPUBackend.MPS
# Check DirectML on Windows
try:
import torch_directml
if torch_directml.is_available():
logger.info("DirectML available")
return GPUBackend.DIRECTML
except ImportError:
pass
logger.info("No GPU backend available, using CPU")
return GPUBackend.CPU
@staticmethod
def get_device_string(backend: GPUBackend) -> str:
"""Get PyTorch device string for backend."""
if backend == GPUBackend.CUDA:
return "cuda:0"
elif backend == GPUBackend.MPS:
return "mps"
elif backend == GPUBackend.DIRECTML:
return "privateuseone:0" # DirectML device
return "cpu"
@staticmethod
def get_gpu_info() -> Dict[str, Any]:
"""Get detailed GPU information."""
info = {
'backend': GPUDetector.detect_backend().value,
'cuda_available': torch.cuda.is_available(),
'mps_available': hasattr(torch.backends, 'mps') and torch.backends.mps.is_available(),
'devices': []
}
if torch.cuda.is_available():
for i in range(torch.cuda.device_count()):
info['devices'].append({
'id': i,
'name': torch.cuda.get_device_name(i),
'memory_total': torch.cuda.get_device_properties(i).total_memory
})
return info
class OCRProcessor:
"""OCR text extraction using PaddleOCR with GPU support."""
SUPPORTED_LANGUAGES = ['en', 'sv', 'latin'] # English, Swedish, Latin script
def __init__(self, use_gpu: bool = True, lang: str = 'en'):
self.use_gpu = use_gpu
self.lang = lang if lang in self.SUPPORTED_LANGUAGES else 'en'
self.ocr = None
self.backend = GPUBackend.CPU
self._init_ocr()
def _init_ocr(self):
"""Initialize PaddleOCR with appropriate backend."""
try:
from paddleocr import PaddleOCR
# Detect GPU
if self.use_gpu:
self.backend = GPUDetector.detect_backend()
use_gpu_flag = self.backend != GPUBackend.CPU
else:
use_gpu_flag = False
# Map language codes
lang_map = {
'en': 'en',
'sv': 'latin', # Swedish uses latin script model
'latin': 'latin'
}
paddle_lang = lang_map.get(self.lang, 'en')
logger.info(f"Initializing PaddleOCR (lang={paddle_lang}, gpu={use_gpu_flag})")
self.ocr = PaddleOCR(
lang=paddle_lang,
use_gpu=use_gpu_flag,
show_log=False,
use_angle_cls=True,
det_db_thresh=0.3,
det_db_box_thresh=0.5,
rec_thresh=0.5,
)
logger.info(f"PaddleOCR initialized successfully (backend: {self.backend.value})")
except ImportError:
logger.error("PaddleOCR not installed. Install with: pip install paddleocr")
self.ocr = None
except Exception as e:
logger.error(f"Failed to initialize PaddleOCR: {e}")
self.ocr = None
def preprocess_for_ocr(self, image: np.ndarray) -> np.ndarray:
"""Preprocess image for better OCR results."""
# Convert to grayscale if needed
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
gray = image
# Denoise
denoised = cv2.fastNlMeansDenoising(gray, None, 10, 7, 21)
# Adaptive threshold for better text contrast
binary = cv2.adaptiveThreshold(
denoised, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 11, 2
)
return binary
def extract_text(self, image: Union[str, np.ndarray, Path]) -> List[TextRegion]:
"""
Extract text from image.
Args:
image: Image path or numpy array
Returns:
List of detected text regions
"""
if self.ocr is None:
logger.warning("OCR not available")
return []
# Load image if path provided
if isinstance(image, (str, Path)):
img = cv2.imread(str(image))
if img is None:
logger.error(f"Failed to load image: {image}")
return []
else:
img = image.copy()
# Preprocess
processed = self.preprocess_for_ocr(img)
try:
# Run OCR
result = self.ocr.ocr(processed, cls=True)
detected = []
if result and result[0]:
for line in result[0]:
if line is None:
continue
bbox, (text, confidence) = line
# Calculate bounding box
x_coords = [p[0] for p in bbox]
y_coords = [p[1] for p in bbox]
x, y = int(min(x_coords)), int(min(y_coords))
w = int(max(x_coords) - x)
h = int(max(y_coords) - y)
detected.append(TextRegion(
text=text.strip(),
confidence=float(confidence),
bbox=(x, y, w, h),
language=self.lang
))
return detected
except Exception as e:
logger.error(f"OCR processing failed: {e}")
return []
def extract_text_from_region(self, image: np.ndarray,
region: Tuple[int, int, int, int]) -> List[TextRegion]:
"""Extract text from specific region of image."""
x, y, w, h = region
roi = image[y:y+h, x:x+w]
if roi.size == 0:
return []
regions = self.extract_text(roi)
# Adjust coordinates back to original image
for r in regions:
rx, ry, rw, rh = r.bbox
r.bbox = (x + rx, y + ry, rw, rh)
return regions
class IconDetector:
"""Detect and extract item icons from game UI."""
# Typical Entropia Universe loot window icon sizes
ICON_SIZES = {
'small': (32, 32),
'medium': (48, 48),
'large': (64, 64),
'hud': (40, 40)
}
def __init__(self, template_dir: Optional[Path] = None):
self.template_dir = template_dir or Path(__file__).parent / "templates" / "icons"
self.templates: Dict[str, np.ndarray] = {}
self._load_templates()
def _load_templates(self):
"""Load icon templates for matching."""
if not self.template_dir.exists():
logger.warning(f"Template directory not found: {self.template_dir}")
return
for template_file in self.template_dir.glob("*.png"):
try:
name = template_file.stem
template = cv2.imread(str(template_file), cv2.IMREAD_COLOR)
if template is not None:
self.templates[name] = template
logger.debug(f"Loaded icon template: {name}")
except Exception as e:
logger.error(f"Failed to load template {template_file}: {e}")
def detect_loot_window(self, image: np.ndarray) -> Optional[Tuple[int, int, int, int]]:
"""
Detect loot window in screenshot.
Returns bounding box of loot window or None if not found.
"""
# Look for common loot window indicators
# Method 1: Template matching for "Loot" text or window frame
if 'loot_window' in self.templates:
result = cv2.matchTemplate(
image, self.templates['loot_window'], cv2.TM_CCOEFF_NORMED
)
_, max_val, _, max_loc = cv2.minMaxLoc(result)
if max_val > 0.7:
h, w = self.templates['loot_window'].shape[:2]
return (*max_loc, w, h)
# Method 2: Detect based on typical loot window characteristics
# Loot windows usually have a grid of items with consistent spacing
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Look for high-contrast regions that could be icons
_, thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
# Find contours
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Filter for icon-sized squares
potential_icons = []
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
aspect = w / h if h > 0 else 0
# Check if dimensions match typical icon sizes
for size_name, (sw, sh) in self.ICON_SIZES.items():
if abs(w - sw) < 5 and abs(h - sh) < 5 and 0.8 < aspect < 1.2:
potential_icons.append((x, y, w, h))
break
# If we found multiple icons in a grid pattern, assume loot window
if len(potential_icons) >= 2:
# Calculate bounding box of all icons
xs = [p[0] for p in potential_icons]
ys = [p[1] for p in potential_icons]
ws = [p[2] for p in potential_icons]
hs = [p[3] for p in potential_icons]
min_x, max_x = min(xs), max(xs) + max(ws)
min_y, max_y = min(ys), max(ys) + max(hs)
# Add padding
padding = 20
return (
max(0, min_x - padding),
max(0, min_y - padding),
max_x - min_x + padding * 2,
max_y - min_y + padding * 2
)
return None
def extract_icons_from_region(self, image: np.ndarray,
region: Tuple[int, int, int, int],
icon_size: str = 'medium') -> List[IconRegion]:
"""
Extract icons from a specific region (e.g., loot window).
Args:
image: Full screenshot
region: Bounding box (x, y, w, h)
icon_size: Size preset ('small', 'medium', 'large')
Returns:
List of detected icon regions
"""
x, y, w, h = region
roi = image[y:y+h, x:x+w]
if roi.size == 0:
return []
target_size = self.ICON_SIZES.get(icon_size, (48, 48))
gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
# Multiple threshold attempts for different icon styles
icons = []
thresholds = [(200, 255), (180, 255), (150, 255)]
for thresh_low, thresh_high in thresholds:
_, thresh = cv2.threshold(gray, thresh_low, thresh_high, cv2.THRESH_BINARY)
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
for cnt in contours:
cx, cy, cw, ch = cv2.boundingRect(cnt)
aspect = cw / ch if ch > 0 else 0
# Match icon size with tolerance
if (abs(cw - target_size[0]) < 8 and
abs(ch - target_size[1]) < 8 and
0.7 < aspect < 1.3):
# Extract icon image
icon_img = roi[cy:cy+ch, cx:cx+cw]
# Resize to standard size
icon_img = cv2.resize(icon_img, target_size, interpolation=cv2.INTER_AREA)
icons.append(IconRegion(
image=icon_img,
bbox=(x + cx, y + cy, cw, ch),
confidence=0.8 # Placeholder confidence
))
# Remove duplicates (icons that overlap significantly)
unique_icons = self._remove_duplicate_icons(icons)
return unique_icons
def _remove_duplicate_icons(self, icons: List[IconRegion],
iou_threshold: float = 0.5) -> List[IconRegion]:
"""Remove duplicate icons based on IoU."""
if not icons:
return []
# Sort by confidence
sorted_icons = sorted(icons, key=lambda x: x.confidence, reverse=True)
kept = []
for icon in sorted_icons:
is_duplicate = False
for kept_icon in kept:
if self._calculate_iou(icon.bbox, kept_icon.bbox) > iou_threshold:
is_duplicate = True
break
if not is_duplicate:
kept.append(icon)
return kept
def _calculate_iou(self, box1: Tuple[int, int, int, int],
box2: Tuple[int, int, int, int]) -> float:
"""Calculate Intersection over Union of two bounding boxes."""
x1, y1, w1, h1 = box1
x2, y2, w2, h2 = box2
xi1 = max(x1, x2)
yi1 = max(y1, y2)
xi2 = min(x1 + w1, x2 + w2)
yi2 = min(y1 + h1, y2 + h2)
inter_area = max(0, xi2 - xi1) * max(0, yi2 - yi1)
box1_area = w1 * h1
box2_area = w2 * h2
union_area = box1_area + box2_area - inter_area
return inter_area / union_area if union_area > 0 else 0
def detect_icons_yolo(self, image: np.ndarray,
model_path: Optional[str] = None) -> List[IconRegion]:
"""
Detect icons using YOLO model (if available).
This is a placeholder for future YOLO integration.
"""
# TODO: Implement YOLO detection when model is trained
logger.debug("YOLO detection not yet implemented")
return []
class GameVisionAI:
"""
Main AI vision interface for game screenshot analysis.
Combines OCR and icon detection with GPU acceleration.
"""
def __init__(self, use_gpu: bool = True, ocr_lang: str = 'en',
data_dir: Optional[Path] = None):
"""
Initialize Game Vision AI.
Args:
use_gpu: Enable GPU acceleration if available
ocr_lang: Language for OCR ('en', 'sv', 'latin')
data_dir: Directory for storing extracted data
"""
self.use_gpu = use_gpu
self.data_dir = data_dir or Path.home() / ".lemontropia"
self.extracted_icons_dir = self.data_dir / "extracted_icons"
self.extracted_icons_dir.mkdir(parents=True, exist_ok=True)
# Detect GPU
self.backend = GPUDetector.detect_backend() if use_gpu else GPUBackend.CPU
# Initialize processors
self.ocr = OCRProcessor(use_gpu=use_gpu, lang=ocr_lang)
self.icon_detector = IconDetector()
# Icon matching cache
self.icon_cache: Dict[str, ItemMatch] = {}
logger.info(f"GameVisionAI initialized (GPU: {self.backend.value})")
def extract_text_from_image(self, image_path: Union[str, Path]) -> List[TextRegion]:
"""
Extract all text from an image.
Args:
image_path: Path to screenshot image
Returns:
List of detected text regions
"""
return self.ocr.extract_text(image_path)
def extract_icons_from_image(self, image_path: Union[str, Path],
auto_detect_window: bool = True) -> List[IconRegion]:
"""
Extract item icons from image.
Args:
image_path: Path to screenshot image
auto_detect_window: Automatically detect loot window
Returns:
List of detected icon regions
"""
image = cv2.imread(str(image_path))
if image is None:
logger.error(f"Failed to load image: {image_path}")
return []
if auto_detect_window:
window_region = self.icon_detector.detect_loot_window(image)
if window_region:
logger.debug(f"Detected loot window: {window_region}")
return self.icon_detector.extract_icons_from_region(
image, window_region
)
else:
logger.debug("No loot window detected, scanning full image")
# Scan full image
h, w = image.shape[:2]
return self.icon_detector.extract_icons_from_region(
image, (0, 0, w, h)
)
else:
h, w = image.shape[:2]
return self.icon_detector.extract_icons_from_region(
image, (0, 0, w, h)
)
def match_icon_to_database(self, icon_image: np.ndarray,
database_path: Optional[Path] = None) -> Optional[ItemMatch]:
"""
Match extracted icon to item database.
Args:
icon_image: Icon image (numpy array)
database_path: Path to icon database directory
Returns:
ItemMatch if found, None otherwise
"""
from .icon_matcher import IconMatcher
# Lazy load matcher
if not hasattr(self, '_icon_matcher'):
self._icon_matcher = IconMatcher(database_path)
return self._icon_matcher.match_icon(icon_image)
def process_screenshot(self, image_path: Union[str, Path],
extract_text: bool = True,
extract_icons: bool = True) -> VisionResult:
"""
Process screenshot with all vision capabilities.
Args:
image_path: Path to screenshot
extract_text: Enable text extraction
extract_icons: Enable icon extraction
Returns:
VisionResult with all detections
"""
start_time = time.time()
result = VisionResult(gpu_backend=self.backend.value)
# Load image once
image = cv2.imread(str(image_path))
if image is None:
logger.error(f"Failed to load image: {image_path}")
return result
# Extract text
if extract_text:
result.text_regions = self.ocr.extract_text(image)
logger.debug(f"Extracted {len(result.text_regions)} text regions")
# Extract icons
if extract_icons:
result.icon_regions = self.extract_icons_from_image(image_path)
logger.debug(f"Extracted {len(result.icon_regions)} icons")
# Save extracted icons
self._save_extracted_icons(result.icon_regions)
result.processing_time_ms = (time.time() - start_time) * 1000
return result
def _save_extracted_icons(self, icons: List[IconRegion]):
"""Save extracted icons to disk."""
for i, icon in enumerate(icons):
filename = f"icon_{icon.icon_hash[:16]}_{int(time.time())}_{i}.png"
filepath = self.extracted_icons_dir / filename
cv2.imwrite(str(filepath), icon.image)
logger.debug(f"Saved icon: {filepath}")
def get_gpu_info(self) -> Dict[str, Any]:
"""Get GPU information."""
return GPUDetector.get_gpu_info()
def is_gpu_available(self) -> bool:
"""Check if GPU acceleration is available."""
return self.backend != GPUBackend.CPU
def calibrate_for_game(self, sample_screenshots: List[Path]) -> Dict[str, Any]:
"""
Calibrate vision system using sample screenshots.
Args:
sample_screenshots: List of sample game screenshots
Returns:
Calibration results
"""
calibration = {
'screenshots_processed': 0,
'text_regions_detected': 0,
'icons_detected': 0,
'average_processing_time_ms': 0,
'detected_regions': {}
}
total_time = 0
for screenshot_path in sample_screenshots:
try:
start = time.time()
result = self.process_screenshot(screenshot_path)
elapsed = (time.time() - start) * 1000
calibration['screenshots_processed'] += 1
calibration['text_regions_detected'] += len(result.text_regions)
calibration['icons_detected'] += len(result.icon_regions)
total_time += elapsed
except Exception as e:
logger.error(f"Failed to process {screenshot_path}: {e}")
if calibration['screenshots_processed'] > 0:
calibration['average_processing_time_ms'] = (
total_time / calibration['screenshots_processed']
)
return calibration
# Export main classes
__all__ = [
'GameVisionAI',
'TextRegion',
'IconRegion',
'ItemMatch',
'VisionResult',
'GPUBackend',
'GPUDetector',
'OCRProcessor',
'IconDetector'
]

614
modules/icon_matcher.py Normal file
View File

@ -0,0 +1,614 @@
"""
Lemontropia Suite - Icon Matcher Module
Icon similarity matching using multiple algorithms.
Supports perceptual hashing, template matching, and feature-based matching.
"""
import cv2
import numpy as np
import logging
import json
from pathlib import Path
from dataclasses import dataclass, asdict
from typing import Optional, List, Dict, Tuple, Any
import sqlite3
import pickle
logger = logging.getLogger(__name__)
@dataclass
class MatchResult:
"""Icon match result."""
item_name: str
confidence: float
match_method: str
item_id: Optional[str] = None
category: Optional[str] = None
metadata: Dict[str, Any] = None
def __post_init__(self):
if self.metadata is None:
self.metadata = {}
class PerceptualHash:
"""Perceptual hash implementation for icon matching."""
@staticmethod
def average_hash(image: np.ndarray, hash_size: int = 16) -> str:
"""Compute average hash (aHash)."""
# Convert to grayscale
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
gray = image
# Resize
resized = cv2.resize(gray, (hash_size, hash_size), interpolation=cv2.INTER_AREA)
# Compute average
avg = resized.mean()
# Create hash
hash_bits = (resized > avg).flatten()
return ''.join(['1' if b else '0' for b in hash_bits])
@staticmethod
def difference_hash(image: np.ndarray, hash_size: int = 16) -> str:
"""Compute difference hash (dHash)."""
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
gray = image
# Resize (hash_size+1 for horizontal differences)
resized = cv2.resize(gray, (hash_size + 1, hash_size), interpolation=cv2.INTER_AREA)
# Compute differences
diff = resized[:, 1:] > resized[:, :-1]
return ''.join(['1' if b else '0' for b in diff.flatten()])
@staticmethod
def wavelet_hash(image: np.ndarray, hash_size: int = 16) -> str:
"""Compute wavelet hash (wHash) using Haar wavelet."""
try:
import pywt
except ImportError:
logger.debug("PyWavelets not available, falling back to average hash")
return PerceptualHash.average_hash(image, hash_size)
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
gray = image
# Resize to power of 2
size = 2 ** (hash_size - 1).bit_length()
resized = cv2.resize(gray, (size, size), interpolation=cv2.INTER_AREA)
# Apply Haar wavelet transform
coeffs = pywt.dwt2(resized, 'haar')
cA, (cH, cV, cD) = coeffs
# Use approximation coefficients
avg = cA.mean()
hash_bits = (cA > avg).flatten()
return ''.join(['1' if b else '0' for b in hash_bits])
@staticmethod
def hamming_distance(hash1: str, hash2: str) -> int:
"""Calculate Hamming distance between two hashes."""
if len(hash1) != len(hash2):
raise ValueError("Hashes must be same length")
return sum(c1 != c2 for c1, c2 in zip(hash1, hash2))
@staticmethod
def similarity(hash1: str, hash2: str) -> float:
"""Calculate similarity between 0 and 1."""
distance = PerceptualHash.hamming_distance(hash1, hash2)
max_distance = len(hash1)
return 1.0 - (distance / max_distance)
class FeatureMatcher:
"""Feature-based icon matching using ORB/SIFT."""
def __init__(self):
self.orb = cv2.ORB_create(nfeatures=500)
self.matcher = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
def extract_features(self, image: np.ndarray) -> Tuple[List, np.ndarray]:
"""Extract ORB features from image."""
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
gray = image
keypoints, descriptors = self.orb.detectAndCompute(gray, None)
return keypoints, descriptors
def match_features(self, desc1: np.ndarray, desc2: np.ndarray,
threshold: float = 0.7) -> float:
"""
Match features between two descriptors.
Returns confidence score (0-1).
"""
if desc1 is None or desc2 is None:
return 0.0
try:
matches = self.matcher.match(desc1, desc2)
matches = sorted(matches, key=lambda x: x.distance)
# Calculate match ratio
if len(matches) < 4:
return 0.0
# Good matches have distance below threshold
good_matches = [m for m in matches if m.distance < 50]
if not good_matches:
return 0.0
# Score based on number of good matches vs minimum needed
score = min(len(good_matches) / 20, 1.0) # Normalize to 20 matches
return score
except Exception as e:
logger.debug(f"Feature matching failed: {e}")
return 0.0
class TemplateMatcher:
"""Template matching for icons."""
@staticmethod
def match(template: np.ndarray, image: np.ndarray,
methods: List[int] = None) -> float:
"""
Match template to image using multiple methods.
Returns best confidence score.
"""
if methods is None:
methods = [
cv2.TM_CCOEFF_NORMED,
cv2.TM_CCORR_NORMED,
cv2.TM_SQDIFF_NORMED
]
# Ensure same size
h, w = template.shape[:2]
image = cv2.resize(image, (w, h), interpolation=cv2.INTER_AREA)
best_score = 0.0
for method in methods:
try:
result = cv2.matchTemplate(image, template, method)
_, max_val, _, _ = cv2.minMaxLoc(result)
# Normalize SQDIFF (lower is better)
if method == cv2.TM_SQDIFF_NORMED:
max_val = 1.0 - max_val
best_score = max(best_score, max_val)
except Exception as e:
logger.debug(f"Template matching failed: {e}")
continue
return best_score
class IconDatabase:
"""Database for storing and retrieving icon hashes."""
def __init__(self, db_path: Optional[Path] = None):
self.db_path = db_path or Path.home() / ".lemontropia" / "icon_database.db"
self.db_path.parent.mkdir(parents=True, exist_ok=True)
self._init_database()
def _init_database(self):
"""Initialize SQLite database."""
conn = sqlite3.connect(str(self.db_path))
cursor = conn.cursor()
cursor.execute('''
CREATE TABLE IF NOT EXISTS icons (
id INTEGER PRIMARY KEY AUTOINCREMENT,
item_name TEXT NOT NULL,
item_id TEXT,
category TEXT,
avg_hash TEXT,
diff_hash TEXT,
wavelet_hash TEXT,
features BLOB,
metadata TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
''')
cursor.execute('''
CREATE INDEX IF NOT EXISTS idx_avg_hash ON icons(avg_hash)
''')
cursor.execute('''
CREATE INDEX IF NOT EXISTS idx_item_name ON icons(item_name)
''')
conn.commit()
conn.close()
def add_icon(self, item_name: str, image: np.ndarray,
item_id: Optional[str] = None,
category: Optional[str] = None,
metadata: Optional[Dict] = None) -> bool:
"""Add icon to database."""
try:
# Compute hashes
avg_hash = PerceptualHash.average_hash(image)
diff_hash = PerceptualHash.difference_hash(image)
wavelet_hash = PerceptualHash.wavelet_hash(image)
# Extract features
feature_matcher = FeatureMatcher()
_, features = feature_matcher.extract_features(image)
features_blob = pickle.dumps(features) if features is not None else None
conn = sqlite3.connect(str(self.db_path))
cursor = conn.cursor()
cursor.execute('''
INSERT INTO icons
(item_name, item_id, category, avg_hash, diff_hash, wavelet_hash, features, metadata)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
''', (
item_name, item_id, category,
avg_hash, diff_hash, wavelet_hash,
features_blob,
json.dumps(metadata) if metadata else None
))
conn.commit()
conn.close()
logger.debug(f"Added icon to database: {item_name}")
return True
except Exception as e:
logger.error(f"Failed to add icon: {e}")
return False
def find_by_hash(self, avg_hash: str, max_distance: int = 10) -> List[Tuple[str, float, Dict]]:
"""Find icons by hash similarity."""
conn = sqlite3.connect(str(self.db_path))
cursor = conn.cursor()
cursor.execute('SELECT item_name, avg_hash, diff_hash, item_id, category, metadata FROM icons')
results = []
for row in cursor.fetchall():
item_name, db_avg_hash, db_diff_hash, item_id, category, metadata_json = row
# Check average hash similarity
distance = PerceptualHash.hamming_distance(avg_hash, db_avg_hash)
if distance <= max_distance:
similarity = 1.0 - (distance / len(avg_hash))
metadata = json.loads(metadata_json) if metadata_json else {}
results.append((item_name, similarity, {
'item_id': item_id,
'category': category,
'metadata': metadata
}))
conn.close()
# Sort by similarity
results.sort(key=lambda x: x[1], reverse=True)
return results
def get_all_icons(self) -> List[Dict]:
"""Get all icons from database."""
conn = sqlite3.connect(str(self.db_path))
cursor = conn.cursor()
cursor.execute('''
SELECT item_name, item_id, category, avg_hash, metadata
FROM icons
''')
results = []
for row in cursor.fetchall():
results.append({
'item_name': row[0],
'item_id': row[1],
'category': row[2],
'avg_hash': row[3],
'metadata': json.loads(row[4]) if row[4] else {}
})
conn.close()
return results
def get_icon_count(self) -> int:
"""Get total number of icons in database."""
conn = sqlite3.connect(str(self.db_path))
cursor = conn.cursor()
cursor.execute('SELECT COUNT(*) FROM icons')
count = cursor.fetchone()[0]
conn.close()
return count
def delete_icon(self, item_name: str) -> bool:
"""Delete icon from database."""
conn = sqlite3.connect(str(self.db_path))
cursor = conn.cursor()
cursor.execute('DELETE FROM icons WHERE item_name = ?', (item_name,))
conn.commit()
deleted = cursor.rowcount > 0
conn.close()
return deleted
class IconMatcher:
"""
Main icon matching interface.
Combines multiple matching algorithms for best results.
"""
# Confidence thresholds
CONFIDENCE_HIGH = 0.85
CONFIDENCE_MEDIUM = 0.70
CONFIDENCE_LOW = 0.50
def __init__(self, database_path: Optional[Path] = None,
icons_dir: Optional[Path] = None):
"""
Initialize icon matcher.
Args:
database_path: Path to icon database
icons_dir: Directory containing icon images for matching
"""
self.database = IconDatabase(database_path)
self.icons_dir = icons_dir or Path.home() / ".lemontropia" / "icons"
self.feature_matcher = FeatureMatcher()
# Cache for loaded icons
self._icon_cache: Dict[str, np.ndarray] = {}
def match_icon(self, image: np.ndarray,
match_methods: List[str] = None) -> Optional[MatchResult]:
"""
Match an icon image against the database.
Args:
image: Icon image (numpy array)
match_methods: List of methods to use ('hash', 'feature', 'template')
Returns:
MatchResult if match found, None otherwise
"""
if match_methods is None:
match_methods = ['hash', 'feature', 'template']
results = []
# Method 1: Perceptual Hash Matching
if 'hash' in match_methods:
hash_result = self._match_by_hash(image)
if hash_result:
results.append(hash_result)
# Method 2: Feature Matching
if 'feature' in match_methods:
feature_result = self._match_by_features(image)
if feature_result:
results.append(feature_result)
# Method 3: Template Matching
if 'template' in match_methods:
template_result = self._match_by_template(image)
if template_result:
results.append(template_result)
if not results:
return None
# Return best match
best = max(results, key=lambda x: x.confidence)
return best
def _match_by_hash(self, image: np.ndarray) -> Optional[MatchResult]:
"""Match using perceptual hashing."""
avg_hash = PerceptualHash.average_hash(image)
# Query database
matches = self.database.find_by_hash(avg_hash, max_distance=15)
if not matches:
return None
best_match = matches[0]
item_name, similarity, meta = best_match
if similarity >= self.CONFIDENCE_LOW:
return MatchResult(
item_name=item_name,
confidence=similarity,
match_method='hash',
item_id=meta.get('item_id'),
category=meta.get('category'),
metadata=meta.get('metadata', {})
)
return None
def _match_by_features(self, image: np.ndarray) -> Optional[MatchResult]:
"""Match using ORB features."""
_, query_desc = self.feature_matcher.extract_features(image)
if query_desc is None:
return None
# Get all icons with features from database
conn = sqlite3.connect(str(self.database.db_path))
cursor = conn.cursor()
cursor.execute('''
SELECT item_name, features, item_id, category, metadata
FROM icons WHERE features IS NOT NULL
''')
best_match = None
best_score = 0.0
best_meta = {}
for row in cursor.fetchall():
item_name, features_blob, item_id, category, metadata_json = row
db_desc = pickle.loads(features_blob)
score = self.feature_matcher.match_features(query_desc, db_desc)
if score > best_score:
best_score = score
best_match = item_name
best_meta = {
'item_id': item_id,
'category': category,
'metadata': json.loads(metadata_json) if metadata_json else {}
}
conn.close()
if best_match and best_score >= self.CONFIDENCE_LOW:
return MatchResult(
item_name=best_match,
confidence=best_score,
match_method='feature',
item_id=best_meta.get('item_id'),
category=best_meta.get('category'),
metadata=best_meta.get('metadata', {})
)
return None
def _match_by_template(self, image: np.ndarray) -> Optional[MatchResult]:
"""Match using template matching against icon files."""
if not self.icons_dir.exists():
return None
# Resize query to standard size
standard_size = (64, 64)
query_resized = cv2.resize(image, standard_size, interpolation=cv2.INTER_AREA)
best_match = None
best_score = 0.0
for icon_file in self.icons_dir.glob("**/*.png"):
try:
template = cv2.imread(str(icon_file), cv2.IMREAD_COLOR)
if template is None:
continue
template_resized = cv2.resize(template, standard_size, interpolation=cv2.INTER_AREA)
score = TemplateMatcher.match(query_resized, template_resized)
if score > best_score:
best_score = score
best_match = icon_file.stem
except Exception as e:
logger.debug(f"Template matching failed for {icon_file}: {e}")
continue
if best_match and best_score >= self.CONFIDENCE_MEDIUM:
return MatchResult(
item_name=best_match,
confidence=best_score,
match_method='template'
)
return None
def add_icon_to_database(self, item_name: str, image: np.ndarray,
item_id: Optional[str] = None,
category: Optional[str] = None,
metadata: Optional[Dict] = None) -> bool:
"""Add a new icon to the database."""
return self.database.add_icon(item_name, image, item_id, category, metadata)
def batch_add_icons(self, icons_dir: Path,
category: Optional[str] = None) -> Tuple[int, int]:
"""
Batch add icons from directory.
Returns:
Tuple of (success_count, fail_count)
"""
success = 0
failed = 0
for icon_file in icons_dir.glob("**/*.png"):
try:
image = cv2.imread(str(icon_file), cv2.IMREAD_COLOR)
if image is None:
failed += 1
continue
item_name = icon_file.stem.replace('_', ' ').title()
if self.add_icon_to_database(item_name, image, category=category):
success += 1
else:
failed += 1
except Exception as e:
logger.error(f"Failed to add icon {icon_file}: {e}")
failed += 1
logger.info(f"Batch add complete: {success} success, {failed} failed")
return success, failed
def get_database_stats(self) -> Dict[str, Any]:
"""Get database statistics."""
return {
'total_icons': self.database.get_icon_count(),
'database_path': str(self.database.db_path),
'icons_directory': str(self.icons_dir)
}
def find_similar_icons(self, image: np.ndarray,
top_k: int = 5) -> List[MatchResult]:
"""Find top-k similar icons."""
avg_hash = PerceptualHash.average_hash(image)
# Get all matches
matches = self.database.find_by_hash(avg_hash, max_distance=20)
results = []
for item_name, similarity, meta in matches[:top_k]:
results.append(MatchResult(
item_name=item_name,
confidence=similarity,
match_method='hash',
item_id=meta.get('item_id'),
category=meta.get('category'),
metadata=meta.get('metadata', {})
))
return results
# Export main classes
__all__ = [
'IconMatcher',
'MatchResult',
'PerceptualHash',
'FeatureMatcher',
'TemplateMatcher',
'IconDatabase'
]

View File

@ -11,13 +11,33 @@ PyQt6>=6.4.0
pytest>=7.0.0 pytest>=7.0.0
pytest-asyncio>=0.21.0 pytest-asyncio>=0.21.0
# GUI Framework # OCR Engines - PaddleOCR for GPU-accelerated text recognition
PyQt6>=6.4.0 paddlepaddle-gpu>=2.5.0; sys_platform != 'darwin' # CUDA version for Linux/Windows
paddlepaddle>=2.5.0; sys_platform == 'darwin' # CPU version for macOS
# OCR Engines paddleocr>=2.7.0
paddleocr>=2.6.0
pytesseract>=0.3.10 pytesseract>=0.3.10
# Computer Vision
opencv-python>=4.8.0
numpy>=1.24.0
Pillow>=10.0.0
# Deep Learning Framework (for GPU detection and YOLO support)
torch>=2.0.0
torchvision>=0.15.0
# Windows DirectML support (optional)
# torch-directml>=0.3.0; sys_platform == 'win32'
# Screen capture
mss>=9.0.0
# Image hashing and processing
imagehash>=4.3.1
# Wavelet transforms (for wHash)
PyWavelets>=1.4.0
# Async support # Async support
aiofiles>=23.0.0 aiofiles>=23.0.0

View File

@ -0,0 +1,628 @@
"""
Lemontropia Suite - Vision Calibration Dialog
Wizard for calibrating Game Vision AI to user's game setup.
"""
import sys
import time
from pathlib import Path
from typing import Optional, List, Dict, Any
from PyQt6.QtWidgets import (
QWizard, QWizardPage, QVBoxLayout, QHBoxLayout, QLabel, QLineEdit,
QPushButton, QComboBox, QCheckBox, QProgressBar, QGroupBox,
QFormLayout, QTextEdit, QMessageBox, QFileDialog, QListWidget,
QListWidgetItem, QSpinBox, QDoubleSpinBox, QWidget
)
from PyQt6.QtCore import Qt, QSettings, QThread, pyqtSignal
from PyQt6.QtGui import QFont, QPixmap, QImage
import numpy as np
import logging
logger = logging.getLogger(__name__)
class CalibrationWorker(QThread):
"""Background worker for calibration processing."""
progress = pyqtSignal(int, str) # percentage, message
calibration_complete = pyqtSignal(dict)
error_occurred = pyqtSignal(str)
def __init__(self, screenshot_paths: List[Path], settings: Dict[str, Any]):
super().__init__()
self.screenshot_paths = screenshot_paths
self.settings = settings
self._cancelled = False
def run(self):
try:
from modules.game_vision_ai import GameVisionAI
self.progress.emit(0, "Initializing Game Vision AI...")
vision = GameVisionAI(
use_gpu=self.settings.get('use_gpu', True),
ocr_lang=self.settings.get('ocr_lang', 'en')
)
results = {
'screenshots_processed': 0,
'text_regions_detected': 0,
'icons_detected': 0,
'processing_times': [],
'errors': [],
'detected_regions': {},
'sample_extractions': []
}
total = len(self.screenshot_paths)
for i, screenshot_path in enumerate(self.screenshot_paths):
if self._cancelled:
self.error_occurred.emit("Calibration cancelled")
return
progress = int((i / total) * 100)
self.progress.emit(progress, f"Processing {screenshot_path.name}...")
try:
start_time = time.time()
result = vision.process_screenshot(
screenshot_path,
extract_text=self.settings.get('extract_text', True),
extract_icons=self.settings.get('extract_icons', True)
)
processing_time = (time.time() - start_time) * 1000
results['screenshots_processed'] += 1
results['text_regions_detected'] += len(result.text_regions)
results['icons_detected'] += len(result.icon_regions)
results['processing_times'].append(processing_time)
# Store sample extractions
if i < 3: # Store first 3 as samples
sample = {
'screenshot': str(screenshot_path),
'text_count': len(result.text_regions),
'icon_count': len(result.icon_regions),
'processing_time_ms': result.processing_time_ms,
'text_samples': [
{'text': t.text, 'confidence': t.confidence}
for t in result.text_regions[:5] # First 5 texts
]
}
results['sample_extractions'].append(sample)
except Exception as e:
results['errors'].append(f"{screenshot_path.name}: {str(e)}")
logger.error(f"Failed to process {screenshot_path}: {e}")
# Calculate statistics
if results['processing_times']:
results['avg_processing_time'] = np.mean(results['processing_times'])
results['min_processing_time'] = np.min(results['processing_times'])
results['max_processing_time'] = np.max(results['processing_times'])
self.progress.emit(100, "Calibration complete!")
self.calibration_complete.emit(results)
except Exception as e:
self.error_occurred.emit(str(e))
def cancel(self):
self._cancelled = True
class WelcomePage(QWizardPage):
"""Welcome page of calibration wizard."""
def __init__(self, parent=None):
super().__init__(parent)
self.setTitle("Vision Calibration Wizard")
self.setSubTitle("Calibrate Game Vision AI for your game setup")
self.setup_ui()
def setup_ui(self):
layout = QVBoxLayout(self)
welcome_label = QLabel(
"<h2>Welcome to Vision Calibration</h2>"
"<p>This wizard will help you calibrate the Game Vision AI system "
"for optimal performance with your Entropia Universe setup.</p>"
"<p>You will need:</p>"
"<ul>"
"<li>A few sample screenshots from the game</li>"
"<li>Screenshots should include: loot windows, inventory, chat</li>"
"<li>About 2-5 minutes to complete</li>"
"</ul>"
)
welcome_label.setWordWrap(True)
layout.addWidget(welcome_label)
# Info box
info_group = QGroupBox("What will be calibrated?")
info_layout = QVBoxLayout(info_group)
info_text = QLabel(
"<ul>"
"<li><b>OCR Accuracy:</b> Text detection confidence and parameters</li>"
"<li><b>Icon Detection:</b> Loot window and item icon recognition</li>"
"<li><b>Performance:</b> Processing time optimization</li>"
"<li><b>GPU Setup:</b> Verify GPU acceleration is working</li>"
"</ul>"
)
info_text.setWordWrap(True)
info_layout.addWidget(info_text)
layout.addWidget(info_group)
layout.addStretch()
class ScreenshotSelectionPage(QWizardPage):
"""Page for selecting sample screenshots."""
def __init__(self, parent=None):
super().__init__(parent)
self.setTitle("Select Sample Screenshots")
self.setSubTitle("Choose screenshots from your game for calibration")
self.screenshot_paths: List[Path] = []
self.setup_ui()
def setup_ui(self):
layout = QVBoxLayout(self)
# Instructions
instructions = QLabel(
"Select 3-10 screenshots that represent typical game situations:\n"
"• Loot windows with items\n"
"• Inventory screens\n"
"• Chat windows with text\n"
"• HUD with gear equipped"
)
instructions.setWordWrap(True)
layout.addWidget(instructions)
# File list
list_group = QGroupBox("Selected Screenshots")
list_layout = QVBoxLayout(list_group)
self.file_list = QListWidget()
list_layout.addWidget(self.file_list)
# Buttons
btn_layout = QHBoxLayout()
self.add_btn = QPushButton("Add Screenshots...")
self.add_btn.clicked.connect(self.add_screenshots)
btn_layout.addWidget(self.add_btn)
self.add_dir_btn = QPushButton("Add Directory...")
self.add_dir_btn.clicked.connect(self.add_directory)
btn_layout.addWidget(self.add_dir_btn)
self.remove_btn = QPushButton("Remove Selected")
self.remove_btn.clicked.connect(self.remove_selected)
btn_layout.addWidget(self.remove_btn)
self.clear_btn = QPushButton("Clear All")
self.clear_btn.clicked.connect(self.clear_all)
btn_layout.addWidget(self.clear_btn)
btn_layout.addStretch()
list_layout.addLayout(btn_layout)
layout.addWidget(list_group)
# Status
self.status_label = QLabel("No screenshots selected")
layout.addWidget(self.status_label)
def add_screenshots(self):
"""Add individual screenshot files."""
files, _ = QFileDialog.getOpenFileNames(
self, "Select Screenshots",
str(Path.home()),
"Images (*.png *.jpg *.jpeg *.bmp)"
)
for file_path in files:
path = Path(file_path)
if path not in self.screenshot_paths:
self.screenshot_paths.append(path)
self.file_list.addItem(path.name)
self.update_status()
def add_directory(self):
"""Add all images from a directory."""
dir_path = QFileDialog.getExistingDirectory(
self, "Select Screenshot Directory",
str(Path.home())
)
if dir_path:
path = Path(dir_path)
for ext in ['*.png', '*.jpg', '*.jpeg', '*.bmp']:
for file_path in path.glob(ext):
if file_path not in self.screenshot_paths:
self.screenshot_paths.append(file_path)
self.file_list.addItem(file_path.name)
self.update_status()
def remove_selected(self):
"""Remove selected screenshots."""
selected = self.file_list.currentRow()
if selected >= 0:
self.file_list.takeItem(selected)
del self.screenshot_paths[selected]
self.update_status()
def clear_all(self):
"""Clear all screenshots."""
self.file_list.clear()
self.screenshot_paths.clear()
self.update_status()
def update_status(self):
"""Update status label."""
count = len(self.screenshot_paths)
if count == 0:
self.status_label.setText("No screenshots selected")
elif count < 3:
self.status_label.setText(f"⚠️ {count} screenshot(s) selected (recommend at least 3)")
else:
self.status_label.setText(f"{count} screenshot(s) selected")
def validatePage(self) -> bool:
"""Validate page before proceeding."""
if len(self.screenshot_paths) < 1:
QMessageBox.warning(self, "No Screenshots",
"Please select at least one screenshot.")
return False
return True
def get_screenshot_paths(self) -> List[Path]:
"""Get selected screenshot paths."""
return self.screenshot_paths
class SettingsPage(QWizardPage):
"""Page for configuring calibration settings."""
def __init__(self, parent=None):
super().__init__(parent)
self.setTitle("Calibration Settings")
self.setSubTitle("Configure vision processing options")
self.setup_ui()
def setup_ui(self):
layout = QVBoxLayout(self)
# GPU Settings
gpu_group = QGroupBox("GPU Acceleration")
gpu_layout = QFormLayout(gpu_group)
self.use_gpu_cb = QCheckBox("Use GPU for processing")
self.use_gpu_cb.setChecked(True)
self.use_gpu_cb.setToolTip(
"Enable GPU acceleration for faster processing"
)
gpu_layout.addRow(self.use_gpu_cb)
self.gpu_info_label = QLabel("GPU info will be detected during calibration")
gpu_layout.addRow("GPU:", self.gpu_info_label)
layout.addWidget(gpu_group)
# OCR Settings
ocr_group = QGroupBox("OCR (Text Recognition)")
ocr_layout = QFormLayout(ocr_group)
self.extract_text_cb = QCheckBox("Enable text extraction")
self.extract_text_cb.setChecked(True)
ocr_layout.addRow(self.extract_text_cb)
self.ocr_lang_combo = QComboBox()
self.ocr_lang_combo.addItem("English", "en")
self.ocr_lang_combo.addItem("Swedish", "sv")
ocr_layout.addRow("Language:", self.ocr_lang_combo)
layout.addWidget(ocr_group)
# Icon Settings
icon_group = QGroupBox("Icon Detection")
icon_layout = QFormLayout(icon_group)
self.extract_icons_cb = QCheckBox("Enable icon extraction")
self.extract_icons_cb.setChecked(True)
icon_layout.addRow(self.extract_icons_cb)
self.icon_size_combo = QComboBox()
self.icon_size_combo.addItem("Small (32x32)", "small")
self.icon_size_combo.addItem("Medium (48x48)", "medium")
self.icon_size_combo.addItem("Large (64x64)", "large")
icon_layout.addRow("Icon Size:", self.icon_size_combo)
self.auto_detect_window_cb = QCheckBox("Auto-detect loot windows")
self.auto_detect_window_cb.setChecked(True)
icon_layout.addRow(self.auto_detect_window_cb)
layout.addWidget(icon_group)
layout.addStretch()
def get_settings(self) -> Dict[str, Any]:
"""Get calibration settings."""
return {
'use_gpu': self.use_gpu_cb.isChecked(),
'extract_text': self.extract_text_cb.isChecked(),
'extract_icons': self.extract_icons_cb.isChecked(),
'ocr_lang': self.ocr_lang_combo.currentData(),
'icon_size': self.icon_size_combo.currentData(),
'auto_detect_window': self.auto_detect_window_cb.isChecked()
}
class ProcessingPage(QWizardPage):
"""Page for running calibration processing."""
def __init__(self, parent=None):
super().__init__(parent)
self.setTitle("Processing")
self.setSubTitle("Running calibration...")
self.is_complete = False
self.calibration_results: Optional[Dict] = None
self.setup_ui()
def setup_ui(self):
layout = QVBoxLayout(self)
# Progress
self.status_label = QLabel("Ready to start calibration")
layout.addWidget(self.status_label)
self.progress_bar = QProgressBar()
self.progress_bar.setRange(0, 100)
self.progress_bar.setValue(0)
layout.addWidget(self.progress_bar)
# Results area
self.results_text = QTextEdit()
self.results_text.setReadOnly(True)
self.results_text.setPlaceholderText("Calibration results will appear here...")
layout.addWidget(self.results_text)
# Buttons
btn_layout = QHBoxLayout()
self.start_btn = QPushButton("Start Calibration")
self.start_btn.clicked.connect(self.start_calibration)
btn_layout.addWidget(self.start_btn)
self.cancel_btn = QPushButton("Cancel")
self.cancel_btn.clicked.connect(self.cancel_calibration)
self.cancel_btn.setEnabled(False)
btn_layout.addWidget(self.cancel_btn)
btn_layout.addStretch()
layout.addLayout(btn_layout)
def initializePage(self):
"""Called when page is shown."""
self.results_text.clear()
self.progress_bar.setValue(0)
self.status_label.setText("Ready to start calibration")
self.is_complete = False
self.start_btn.setEnabled(True)
def start_calibration(self):
"""Start calibration processing."""
wizard = self.wizard()
screenshot_page = wizard.page(1) # ScreenshotSelectionPage
settings_page = wizard.page(2) # SettingsPage
screenshot_paths = screenshot_page.get_screenshot_paths()
settings = settings_page.get_settings()
if not screenshot_paths:
QMessageBox.warning(self, "No Screenshots",
"No screenshots selected!")
return
self.start_btn.setEnabled(False)
self.cancel_btn.setEnabled(True)
self.status_label.setText("Starting calibration...")
# Start worker thread
self.worker = CalibrationWorker(screenshot_paths, settings)
self.worker.progress.connect(self.on_progress)
self.worker.calibration_complete.connect(self.on_complete)
self.worker.error_occurred.connect(self.on_error)
self.worker.start()
def on_progress(self, percentage: int, message: str):
"""Handle progress update."""
self.progress_bar.setValue(percentage)
self.status_label.setText(message)
self.results_text.append(message)
def on_complete(self, results: Dict):
"""Handle calibration completion."""
self.calibration_results = results
self.is_complete = True
self.cancel_btn.setEnabled(False)
# Display results
summary = f"""
<b>Calibration Complete!</b>
Screenshots processed: {results['screenshots_processed']}
Text regions detected: {results['text_regions_detected']}
Icons detected: {results['icons_detected']}
"""
if 'avg_processing_time' in results:
summary += f"Average processing time: {results['avg_processing_time']:.1f}ms\n"
if results.get('errors'):
summary += f"\nErrors: {len(results['errors'])}"
self.results_text.append(summary)
# Enable next button
self.completeChanged.emit()
def on_error(self, error: str):
"""Handle calibration error."""
self.status_label.setText(f"Error: {error}")
self.results_text.append(f"❌ Error: {error}")
self.start_btn.setEnabled(True)
self.cancel_btn.setEnabled(False)
def cancel_calibration(self):
"""Cancel calibration."""
if hasattr(self, 'worker'):
self.worker.cancel()
self.status_label.setText("Cancelling...")
def isComplete(self) -> bool:
return self.is_complete
def get_results(self) -> Optional[Dict]:
"""Get calibration results."""
return self.calibration_results
class ResultsPage(QWizardPage):
"""Final page showing calibration results."""
def __init__(self, parent=None):
super().__init__(parent)
self.setTitle("Calibration Results")
self.setSubTitle("Review and save calibration results")
self.setup_ui()
def setup_ui(self):
layout = QVBoxLayout(self)
self.results_label = QLabel("Processing results will appear here...")
self.results_label.setWordWrap(True)
layout.addWidget(self.results_label)
# Recommendations
self.recommendations_label = QLabel("")
self.recommendations_label.setWordWrap(True)
layout.addWidget(self.recommendations_label)
layout.addStretch()
def initializePage(self):
"""Called when page is shown."""
wizard = self.wizard()
processing_page = wizard.page(3) # ProcessingPage
results = processing_page.get_results()
if results:
# Format results
text = f"""
<h3>Calibration Results</h3>
<p><b>Processing Summary:</b></p>
<ul>
<li>Screenshots processed: {results['screenshots_processed']}</li>
<li>Text regions detected: {results['text_regions_detected']}</li>
<li>Icons detected: {results['icons_detected']}</li>
</ul>
"""
if 'avg_processing_time' in results:
text += f"""
<p><b>Performance:</b></p>
<ul>
<li>Average processing time: {results['avg_processing_time']:.1f}ms</li>
<li>Min processing time: {results['min_processing_time']:.1f}ms</li>
<li>Max processing time: {results['max_processing_time']:.1f}ms</li>
</ul>
"""
self.results_label.setText(text)
# Generate recommendations
recommendations = self._generate_recommendations(results)
self.recommendations_label.setText(recommendations)
# Save results to settings
self._save_calibration_results(results)
def _generate_recommendations(self, results: Dict) -> str:
"""Generate calibration recommendations."""
recs = ["<h3>Recommendations</h3><ul>"]
# Performance recommendations
if 'avg_processing_time' in results:
avg_time = results['avg_processing_time']
if avg_time < 100:
recs.append("<li>✅ Excellent performance! GPU acceleration is working well.</li>")
elif avg_time < 500:
recs.append("<li>✅ Good performance. Processing is reasonably fast.</li>")
else:
recs.append("<li>⚠️ Processing is slow. Consider enabling GPU or reducing screenshot resolution.</li>")
# Detection recommendations
total_regions = results['text_regions_detected'] + results['icons_detected']
if total_regions == 0:
recs.append("<li>⚠️ No text or icons detected. Check screenshot quality and game UI visibility.</li>")
elif results['text_regions_detected'] == 0:
recs.append("<li>⚠️ No text detected. Try adjusting OCR thresholds or check image clarity.</li>")
elif results['icons_detected'] == 0:
recs.append("<li>⚠️ No icons detected. Ensure screenshots include loot windows.</li>")
else:
recs.append("<li>✅ Detection is working. Text and icons are being recognized.</li>")
recs.append("</ul>")
return "".join(recs)
def _save_calibration_results(self, results: Dict):
"""Save calibration results to settings."""
settings = QSettings("Lemontropia", "GameVision")
settings.setValue("calibration/last_run", time.time())
settings.setValue("calibration/screenshots_processed", results['screenshots_processed'])
settings.setValue("calibration/avg_processing_time", results.get('avg_processing_time', 0))
settings.setValue("calibration/text_detection_rate", results['text_regions_detected'])
settings.setValue("calibration/icon_detection_rate", results['icons_detected'])
settings.sync()
class VisionCalibrationWizard(QWizard):
"""
Wizard for calibrating Game Vision AI.
"""
calibration_complete = pyqtSignal(dict)
def __init__(self, parent=None):
super().__init__(parent)
self.setWindowTitle("Vision Calibration Wizard")
self.setMinimumSize(700, 550)
# Add pages
self.addPage(WelcomePage())
self.addPage(ScreenshotSelectionPage())
self.addPage(SettingsPage())
self.addPage(ProcessingPage())
self.addPage(ResultsPage())
self.setWizardStyle(QWizard.WizardStyle.ModernStyle)
def accept(self):
"""Handle wizard completion."""
processing_page = self.page(3)
results = processing_page.get_results()
if results:
self.calibration_complete.emit(results)
super().accept()
# Export
__all__ = ['VisionCalibrationWizard', 'CalibrationWorker']

View File

@ -0,0 +1,645 @@
"""
Lemontropia Suite - Vision Settings Dialog
Settings panel for configuring Game Vision AI.
"""
import sys
from pathlib import Path
from typing import Optional
from PyQt6.QtWidgets import (
QDialog, QVBoxLayout, QHBoxLayout, QLabel, QLineEdit,
QPushButton, QComboBox, QCheckBox, QGroupBox, QFormLayout,
QMessageBox, QSpinBox, QDoubleSpinBox, QTabWidget,
QFileDialog, QTextEdit, QProgressBar, QWidget, QSlider
)
from PyQt6.QtCore import Qt, QSettings, QThread, pyqtSignal
from PyQt6.QtGui import QFont, QPixmap
import logging
logger = logging.getLogger(__name__)
class GPUInfoThread(QThread):
"""Thread to gather GPU information."""
info_ready = pyqtSignal(dict)
error_occurred = pyqtSignal(str)
def run(self):
try:
from modules.game_vision_ai import GPUDetector
info = GPUDetector.get_gpu_info()
self.info_ready.emit(info)
except Exception as e:
self.error_occurred.emit(str(e))
class VisionSettingsDialog(QDialog):
"""
Settings dialog for Game Vision AI configuration.
"""
settings_saved = pyqtSignal()
def __init__(self, parent=None):
super().__init__(parent)
self.setWindowTitle("Game Vision Settings")
self.setMinimumSize(600, 500)
self.settings = QSettings("Lemontropia", "GameVision")
self.gpu_info = {}
self.setup_ui()
self.load_settings()
self.refresh_gpu_info()
def setup_ui(self):
"""Setup the dialog UI."""
layout = QVBoxLayout(self)
layout.setSpacing(15)
# Title
title_label = QLabel("🎮 Game Vision AI Settings")
title_font = QFont()
title_font.setPointSize(14)
title_font.setBold(True)
title_label.setFont(title_font)
layout.addWidget(title_label)
# Description
desc_label = QLabel(
"Configure AI-powered computer vision for automatic game UI analysis."
)
desc_label.setWordWrap(True)
layout.addWidget(desc_label)
# Tabs
self.tabs = QTabWidget()
layout.addWidget(self.tabs)
# General tab
self.tabs.addTab(self._create_general_tab(), "General")
# GPU tab
self.tabs.addTab(self._create_gpu_tab(), "GPU & Performance")
# OCR tab
self.tabs.addTab(self._create_ocr_tab(), "OCR Settings")
# Icon Detection tab
self.tabs.addTab(self._create_icon_tab(), "Icon Detection")
# Buttons
button_layout = QHBoxLayout()
button_layout.addStretch()
self.reset_btn = QPushButton("Reset to Defaults")
self.reset_btn.clicked.connect(self.reset_settings)
button_layout.addWidget(self.reset_btn)
self.test_btn = QPushButton("Test Vision...")
self.test_btn.clicked.connect(self.open_test_dialog)
button_layout.addWidget(self.test_btn)
self.save_btn = QPushButton("Save")
self.save_btn.clicked.connect(self.save_settings)
self.save_btn.setDefault(True)
button_layout.addWidget(self.save_btn)
self.cancel_btn = QPushButton("Cancel")
self.cancel_btn.clicked.connect(self.reject)
button_layout.addWidget(self.cancel_btn)
layout.addLayout(button_layout)
def _create_general_tab(self) -> QWidget:
"""Create general settings tab."""
tab = QWidget()
layout = QVBoxLayout(tab)
layout.setSpacing(15)
# Enable Vision
self.enable_vision_cb = QCheckBox("Enable Game Vision AI")
self.enable_vision_cb.setToolTip(
"Enable automatic screenshot analysis using AI"
)
layout.addWidget(self.enable_vision_cb)
# Auto Processing
self.auto_process_cb = QCheckBox("Auto-process screenshots")
self.auto_process_cb.setToolTip(
"Automatically analyze screenshots when captured"
)
layout.addWidget(self.auto_process_cb)
# Data Directory
dir_group = QGroupBox("Data Directories")
dir_layout = QFormLayout(dir_group)
# Extracted icons directory
icons_dir_layout = QHBoxLayout()
self.icons_dir_input = QLineEdit()
self.icons_dir_input.setReadOnly(True)
icons_dir_layout.addWidget(self.icons_dir_input)
self.icons_dir_btn = QPushButton("Browse...")
self.icons_dir_btn.clicked.connect(self.browse_icons_dir)
icons_dir_layout.addWidget(self.icons_dir_btn)
dir_layout.addRow("Extracted Icons:", icons_dir_layout)
# Icon database directory
db_dir_layout = QHBoxLayout()
self.db_dir_input = QLineEdit()
self.db_dir_input.setReadOnly(True)
db_dir_layout.addWidget(self.db_dir_input)
self.db_dir_btn = QPushButton("Browse...")
self.db_dir_btn.clicked.connect(self.browse_db_dir)
db_dir_layout.addWidget(self.db_dir_btn)
dir_layout.addRow("Icon Database:", db_dir_layout)
layout.addWidget(dir_group)
# Processing Options
options_group = QGroupBox("Processing Options")
options_layout = QFormLayout(options_group)
self.extract_text_cb = QCheckBox("Extract text (OCR)")
self.extract_text_cb.setChecked(True)
options_layout.addRow(self.extract_text_cb)
self.extract_icons_cb = QCheckBox("Extract icons")
self.extract_icons_cb.setChecked(True)
options_layout.addRow(self.extract_icons_cb)
self.save_icons_cb = QCheckBox("Save extracted icons to disk")
self.save_icons_cb.setChecked(True)
options_layout.addRow(self.save_icons_cb)
self.match_icons_cb = QCheckBox("Match icons to database")
self.match_icons_cb.setChecked(True)
options_layout.addRow(self.match_icons_cb)
layout.addWidget(options_group)
layout.addStretch()
return tab
def _create_gpu_tab(self) -> QWidget:
"""Create GPU settings tab."""
tab = QWidget()
layout = QVBoxLayout(tab)
layout.setSpacing(15)
# GPU Info Group
gpu_group = QGroupBox("GPU Information")
gpu_layout = QVBoxLayout(gpu_group)
self.gpu_info_label = QLabel("Detecting GPU...")
self.gpu_info_label.setWordWrap(True)
gpu_layout.addWidget(self.gpu_info_label)
self.gpu_details = QTextEdit()
self.gpu_details.setReadOnly(True)
self.gpu_details.setMaximumHeight(100)
gpu_layout.addWidget(self.gpu_details)
self.refresh_gpu_btn = QPushButton("Refresh GPU Info")
self.refresh_gpu_btn.clicked.connect(self.refresh_gpu_info)
gpu_layout.addWidget(self.refresh_gpu_btn)
layout.addWidget(gpu_group)
# GPU Settings
settings_group = QGroupBox("GPU Acceleration")
settings_layout = QFormLayout(settings_group)
self.use_gpu_cb = QCheckBox("Use GPU acceleration")
self.use_gpu_cb.setToolTip(
"Enable GPU acceleration for OCR and vision processing"
)
settings_layout.addRow(self.use_gpu_cb)
# GPU Backend selection
self.backend_combo = QComboBox()
self.backend_combo.addItem("Auto-detect", "auto")
self.backend_combo.addItem("CUDA (NVIDIA)", "cuda")
self.backend_combo.addItem("MPS (Apple Silicon)", "mps")
self.backend_combo.addItem("DirectML (Windows)", "directml")
self.backend_combo.addItem("CPU only", "cpu")
settings_layout.addRow("Preferred Backend:", self.backend_combo)
layout.addWidget(settings_group)
# Performance Settings
perf_group = QGroupBox("Performance")
perf_layout = QFormLayout(perf_group)
self.batch_size_spin = QSpinBox()
self.batch_size_spin.setRange(1, 16)
self.batch_size_spin.setValue(1)
self.batch_size_spin.setToolTip(
"Number of images to process in parallel (higher = faster but more VRAM)"
)
perf_layout.addRow("Batch Size:", self.batch_size_spin)
self.threads_spin = QSpinBox()
self.threads_spin.setRange(1, 8)
self.threads_spin.setValue(2)
perf_layout.addRow("Processing Threads:", self.threads_spin)
layout.addWidget(perf_group)
layout.addStretch()
return tab
def _create_ocr_tab(self) -> QWidget:
"""Create OCR settings tab."""
tab = QWidget()
layout = QVBoxLayout(tab)
layout.setSpacing(15)
# Language Settings
lang_group = QGroupBox("Language Settings")
lang_layout = QFormLayout(lang_group)
self.ocr_lang_combo = QComboBox()
self.ocr_lang_combo.addItem("English", "en")
self.ocr_lang_combo.addItem("Swedish", "sv")
self.ocr_lang_combo.addItem("Latin Script (Generic)", "latin")
lang_layout.addRow("OCR Language:", self.ocr_lang_combo)
self.multi_lang_cb = QCheckBox("Enable multi-language detection")
lang_layout.addRow(self.multi_lang_cb)
layout.addWidget(lang_group)
# OCR Parameters
params_group = QGroupBox("OCR Parameters")
params_layout = QFormLayout(params_group)
self.det_thresh_spin = QDoubleSpinBox()
self.det_thresh_spin.setRange(0.1, 0.9)
self.det_thresh_spin.setValue(0.3)
self.det_thresh_spin.setSingleStep(0.05)
self.det_thresh_spin.setToolTip(
"Text detection threshold (lower = more sensitive)"
)
params_layout.addRow("Detection Threshold:", self.det_thresh_spin)
self.rec_thresh_spin = QDoubleSpinBox()
self.rec_thresh_spin.setRange(0.1, 0.9)
self.rec_thresh_spin.setValue(0.5)
self.rec_thresh_spin.setSingleStep(0.05)
self.rec_thresh_spin.setToolTip(
"Text recognition confidence threshold"
)
params_layout.addRow("Recognition Threshold:", self.rec_thresh_spin)
self.use_angle_cls_cb = QCheckBox("Use angle classifier")
self.use_angle_cls_cb.setChecked(True)
self.use_angle_cls_cb.setToolTip(
"Detect and correct rotated text (slower but more accurate)"
)
params_layout.addRow(self.use_angle_cls_cb)
layout.addWidget(params_group)
# Preprocessing
preprocess_group = QGroupBox("Preprocessing")
preprocess_layout = QFormLayout(preprocess_group)
self.denoise_cb = QCheckBox("Apply denoising")
self.denoise_cb.setChecked(True)
preprocess_layout.addRow(self.denoise_cb)
self.contrast_enhance_cb = QCheckBox("Enhance contrast")
self.contrast_enhance_cb.setChecked(True)
preprocess_layout.addRow(self.contrast_enhance_cb)
layout.addWidget(preprocess_group)
layout.addStretch()
return tab
def _create_icon_tab(self) -> QWidget:
"""Create icon detection settings tab."""
tab = QWidget()
layout = QVBoxLayout(tab)
layout.setSpacing(15)
# Detection Settings
detect_group = QGroupBox("Detection Settings")
detect_layout = QFormLayout(detect_group)
self.auto_detect_window_cb = QCheckBox("Auto-detect loot windows")
self.auto_detect_window_cb.setChecked(True)
self.auto_detect_window_cb.setToolTip(
"Automatically detect loot windows in screenshots"
)
detect_layout.addRow(self.auto_detect_window_cb)
self.icon_size_combo = QComboBox()
self.icon_size_combo.addItem("Small (32x32)", "small")
self.icon_size_combo.addItem("Medium (48x48)", "medium")
self.icon_size_combo.addItem("Large (64x64)", "large")
self.icon_size_combo.addItem("HUD (40x40)", "hud")
detect_layout.addRow("Icon Size:", self.icon_size_combo)
self.confidence_thresh_spin = QDoubleSpinBox()
self.confidence_thresh_spin.setRange(0.1, 1.0)
self.confidence_thresh_spin.setValue(0.7)
self.confidence_thresh_spin.setSingleStep(0.05)
detect_layout.addRow("Detection Confidence:", self.confidence_thresh_spin)
layout.addWidget(detect_group)
# Matching Settings
match_group = QGroupBox("Icon Matching")
match_layout = QFormLayout(match_group)
self.hash_match_cb = QCheckBox("Use perceptual hashing")
self.hash_match_cb.setChecked(True)
match_layout.addRow(self.hash_match_cb)
self.feature_match_cb = QCheckBox("Use feature matching (ORB)")
self.feature_match_cb.setChecked(True)
match_layout.addRow(self.feature_match_cb)
self.template_match_cb = QCheckBox("Use template matching")
self.template_match_cb.setChecked(True)
match_layout.addRow(self.template_match_cb)
self.match_thresh_spin = QDoubleSpinBox()
self.match_thresh_spin.setRange(0.1, 1.0)
self.match_thresh_spin.setValue(0.70)
self.match_thresh_spin.setSingleStep(0.05)
self.match_thresh_spin.setToolTip(
"Minimum confidence for icon match"
)
match_layout.addRow("Match Threshold:", self.match_thresh_spin)
layout.addWidget(match_group)
# Template Directory
template_group = QGroupBox("Template Directory")
template_layout = QHBoxLayout(template_group)
self.template_dir_input = QLineEdit()
self.template_dir_input.setReadOnly(True)
template_layout.addWidget(self.template_dir_input)
self.template_dir_btn = QPushButton("Browse...")
self.template_dir_btn.clicked.connect(self.browse_template_dir)
template_layout.addWidget(self.template_dir_btn)
layout.addWidget(template_group)
layout.addStretch()
return tab
def refresh_gpu_info(self):
"""Refresh GPU information display."""
self.gpu_info_label.setText("Detecting GPU...")
self.refresh_gpu_btn.setEnabled(False)
self.gpu_thread = GPUInfoThread()
self.gpu_thread.info_ready.connect(self.on_gpu_info_ready)
self.gpu_thread.error_occurred.connect(self.on_gpu_error)
self.gpu_thread.start()
def on_gpu_info_ready(self, info: dict):
"""Handle GPU info received."""
self.gpu_info = info
backend = info.get('backend', 'cpu')
cuda_available = info.get('cuda_available', False)
mps_available = info.get('mps_available', False)
# Update label
if backend == 'cuda':
devices = info.get('devices', [])
if devices:
device_name = devices[0].get('name', 'Unknown')
memory_gb = devices[0].get('memory_total', 0) / (1024**3)
self.gpu_info_label.setText(
f"✅ GPU Detected: {device_name} ({memory_gb:.1f} GB)"
)
else:
self.gpu_info_label.setText("✅ CUDA Available")
elif backend == 'mps':
self.gpu_info_label.setText("✅ Apple MPS (Metal) Available")
elif backend == 'directml':
self.gpu_info_label.setText("✅ DirectML Available")
else:
self.gpu_info_label.setText("⚠️ No GPU detected - CPU only")
# Update details
details = f"Backend: {backend}\n"
details += f"CUDA Available: {cuda_available}\n"
details += f"MPS Available: {mps_available}\n"
if info.get('devices'):
for dev in info['devices']:
details += f"\nDevice {dev['id']}: {dev['name']}"
self.gpu_details.setText(details)
self.refresh_gpu_btn.setEnabled(True)
def on_gpu_error(self, error: str):
"""Handle GPU detection error."""
self.gpu_info_label.setText(f"❌ Error detecting GPU: {error}")
self.refresh_gpu_btn.setEnabled(True)
def browse_icons_dir(self):
"""Browse for extracted icons directory."""
dir_path = QFileDialog.getExistingDirectory(
self, "Select Extracted Icons Directory",
self.icons_dir_input.text() or str(Path.home())
)
if dir_path:
self.icons_dir_input.setText(dir_path)
def browse_db_dir(self):
"""Browse for database directory."""
dir_path = QFileDialog.getExistingDirectory(
self, "Select Database Directory",
self.db_dir_input.text() or str(Path.home())
)
if dir_path:
self.db_dir_input.setText(dir_path)
def browse_template_dir(self):
"""Browse for template directory."""
dir_path = QFileDialog.getExistingDirectory(
self, "Select Template Directory",
self.template_dir_input.text() or str(Path.home())
)
if dir_path:
self.template_dir_input.setText(dir_path)
def load_settings(self):
"""Load settings from QSettings."""
# General
self.enable_vision_cb.setChecked(
self.settings.value("vision/enabled", True, bool)
)
self.auto_process_cb.setChecked(
self.settings.value("vision/auto_process", False, bool)
)
self.icons_dir_input.setText(
self.settings.value("vision/icons_dir", "", str)
)
self.db_dir_input.setText(
self.settings.value("vision/db_dir", "", str)
)
self.extract_text_cb.setChecked(
self.settings.value("vision/extract_text", True, bool)
)
self.extract_icons_cb.setChecked(
self.settings.value("vision/extract_icons", True, bool)
)
self.save_icons_cb.setChecked(
self.settings.value("vision/save_icons", True, bool)
)
self.match_icons_cb.setChecked(
self.settings.value("vision/match_icons", True, bool)
)
# GPU
self.use_gpu_cb.setChecked(
self.settings.value("vision/use_gpu", True, bool)
)
backend = self.settings.value("vision/gpu_backend", "auto", str)
index = self.backend_combo.findData(backend)
if index >= 0:
self.backend_combo.setCurrentIndex(index)
self.batch_size_spin.setValue(
self.settings.value("vision/batch_size", 1, int)
)
self.threads_spin.setValue(
self.settings.value("vision/threads", 2, int)
)
# OCR
lang = self.settings.value("vision/ocr_lang", "en", str)
index = self.ocr_lang_combo.findData(lang)
if index >= 0:
self.ocr_lang_combo.setCurrentIndex(index)
self.multi_lang_cb.setChecked(
self.settings.value("vision/multi_lang", False, bool)
)
self.det_thresh_spin.setValue(
self.settings.value("vision/det_thresh", 0.3, float)
)
self.rec_thresh_spin.setValue(
self.settings.value("vision/rec_thresh", 0.5, float)
)
self.use_angle_cls_cb.setChecked(
self.settings.value("vision/use_angle_cls", True, bool)
)
self.denoise_cb.setChecked(
self.settings.value("vision/denoise", True, bool)
)
self.contrast_enhance_cb.setChecked(
self.settings.value("vision/contrast_enhance", True, bool)
)
# Icon Detection
self.auto_detect_window_cb.setChecked(
self.settings.value("vision/auto_detect_window", True, bool)
)
icon_size = self.settings.value("vision/icon_size", "medium", str)
index = self.icon_size_combo.findData(icon_size)
if index >= 0:
self.icon_size_combo.setCurrentIndex(index)
self.confidence_thresh_spin.setValue(
self.settings.value("vision/confidence_thresh", 0.7, float)
)
self.hash_match_cb.setChecked(
self.settings.value("vision/hash_match", True, bool)
)
self.feature_match_cb.setChecked(
self.settings.value("vision/feature_match", True, bool)
)
self.template_match_cb.setChecked(
self.settings.value("vision/template_match", True, bool)
)
self.match_thresh_spin.setValue(
self.settings.value("vision/match_thresh", 0.70, float)
)
self.template_dir_input.setText(
self.settings.value("vision/template_dir", "", str)
)
def save_settings(self):
"""Save settings to QSettings."""
# General
self.settings.setValue("vision/enabled", self.enable_vision_cb.isChecked())
self.settings.setValue("vision/auto_process", self.auto_process_cb.isChecked())
self.settings.setValue("vision/icons_dir", self.icons_dir_input.text())
self.settings.setValue("vision/db_dir", self.db_dir_input.text())
self.settings.setValue("vision/extract_text", self.extract_text_cb.isChecked())
self.settings.setValue("vision/extract_icons", self.extract_icons_cb.isChecked())
self.settings.setValue("vision/save_icons", self.save_icons_cb.isChecked())
self.settings.setValue("vision/match_icons", self.match_icons_cb.isChecked())
# GPU
self.settings.setValue("vision/use_gpu", self.use_gpu_cb.isChecked())
self.settings.setValue("vision/gpu_backend", self.backend_combo.currentData())
self.settings.setValue("vision/batch_size", self.batch_size_spin.value())
self.settings.setValue("vision/threads", self.threads_spin.value())
# OCR
self.settings.setValue("vision/ocr_lang", self.ocr_lang_combo.currentData())
self.settings.setValue("vision/multi_lang", self.multi_lang_cb.isChecked())
self.settings.setValue("vision/det_thresh", self.det_thresh_spin.value())
self.settings.setValue("vision/rec_thresh", self.rec_thresh_spin.value())
self.settings.setValue("vision/use_angle_cls", self.use_angle_cls_cb.isChecked())
self.settings.setValue("vision/denoise", self.denoise_cb.isChecked())
self.settings.setValue("vision/contrast_enhance", self.contrast_enhance_cb.isChecked())
# Icon Detection
self.settings.setValue("vision/auto_detect_window", self.auto_detect_window_cb.isChecked())
self.settings.setValue("vision/icon_size", self.icon_size_combo.currentData())
self.settings.setValue("vision/confidence_thresh", self.confidence_thresh_spin.value())
self.settings.setValue("vision/hash_match", self.hash_match_cb.isChecked())
self.settings.setValue("vision/feature_match", self.feature_match_cb.isChecked())
self.settings.setValue("vision/template_match", self.template_match_cb.isChecked())
self.settings.setValue("vision/match_thresh", self.match_thresh_spin.value())
self.settings.setValue("vision/template_dir", self.template_dir_input.text())
self.settings.sync()
self.settings_saved.emit()
self.accept()
logger.info("Vision settings saved")
def reset_settings(self):
"""Reset settings to defaults."""
reply = QMessageBox.question(
self, "Reset Settings",
"Are you sure you want to reset all vision settings to defaults?",
QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No
)
if reply == QMessageBox.StandardButton.Yes:
self.settings.clear()
self.load_settings()
QMessageBox.information(self, "Reset Complete",
"Settings have been reset to defaults.")
def open_test_dialog(self):
"""Open vision test dialog."""
from .vision_test_dialog import VisionTestDialog
dialog = VisionTestDialog(self)
dialog.exec()
# Export
__all__ = ['VisionSettingsDialog']

470
ui/vision_test_dialog.py Normal file
View File

@ -0,0 +1,470 @@
"""
Lemontropia Suite - Vision Test Dialog
Test and debug Game Vision AI functionality.
"""
import time
from pathlib import Path
from typing import Optional
from PyQt6.QtWidgets import (
QDialog, QVBoxLayout, QHBoxLayout, QLabel, QLineEdit,
QPushButton, QComboBox, QCheckBox, QGroupBox, QFormLayout,
QMessageBox, QFileDialog, QTextEdit, QProgressBar,
QListWidget, QListWidgetItem, QSplitter, QWidget,
QTableWidget, QTableWidgetItem, QHeaderView
)
from PyQt6.QtCore import Qt, QThread, pyqtSignal
from PyQt6.QtGui import QPixmap, QImage, QFont
import numpy as np
import logging
logger = logging.getLogger(__name__)
class VisionTestWorker(QThread):
"""Worker thread for vision testing."""
test_complete = pyqtSignal(dict)
progress = pyqtSignal(str)
error_occurred = pyqtSignal(str)
def __init__(self, image_path: Path, settings: dict):
super().__init__()
self.image_path = image_path
self.settings = settings
def run(self):
try:
from modules.game_vision_ai import GameVisionAI
self.progress.emit("Initializing Game Vision AI...")
vision = GameVisionAI(
use_gpu=self.settings.get('use_gpu', True),
ocr_lang=self.settings.get('ocr_lang', 'en')
)
self.progress.emit("Processing image...")
start_time = time.time()
result = vision.process_screenshot(
self.image_path,
extract_text=self.settings.get('extract_text', True),
extract_icons=self.settings.get('extract_icons', True)
)
processing_time = (time.time() - start_time) * 1000
# Prepare results
test_results = {
'success': True,
'processing_time_ms': processing_time,
'gpu_backend': result.gpu_backend,
'text_regions': [
{
'text': t.text,
'confidence': t.confidence,
'bbox': t.bbox,
'language': t.language
}
for t in result.text_regions
],
'icon_regions': [
{
'bbox': i.bbox,
'confidence': i.confidence,
'hash': i.icon_hash[:16] # Truncated hash
}
for i in result.icon_regions
],
'text_count': len(result.text_regions),
'icon_count': len(result.icon_regions)
}
self.test_complete.emit(test_results)
except Exception as e:
self.error_occurred.emit(str(e))
class VisionTestDialog(QDialog):
"""
Dialog for testing and debugging Game Vision AI.
"""
def __init__(self, parent=None):
super().__init__(parent)
self.setWindowTitle("Test Game Vision")
self.setMinimumSize(900, 700)
self.current_image_path: Optional[Path] = None
self.current_results: Optional[dict] = None
self.setup_ui()
def setup_ui(self):
"""Setup dialog UI."""
layout = QVBoxLayout(self)
layout.setSpacing(10)
# Title
title_label = QLabel("🧪 Game Vision Test & Debug")
title_font = QFont()
title_font.setPointSize(14)
title_font.setBold(True)
title_label.setFont(title_font)
layout.addWidget(title_label)
# Main splitter
splitter = QSplitter(Qt.Orientation.Horizontal)
layout.addWidget(splitter)
# Left panel - Controls
left_panel = QWidget()
left_layout = QVBoxLayout(left_panel)
left_layout.setContentsMargins(5, 5, 5, 5)
# Image selection
image_group = QGroupBox("Test Image")
image_layout = QVBoxLayout(image_group)
self.image_path_label = QLabel("No image selected")
self.image_path_label.setWordWrap(True)
image_layout.addWidget(self.image_path_label)
image_btn_layout = QHBoxLayout()
self.browse_btn = QPushButton("Browse...")
self.browse_btn.clicked.connect(self.browse_image)
image_btn_layout.addWidget(self.browse_btn)
self.capture_btn = QPushButton("Capture Screen")
self.capture_btn.clicked.connect(self.capture_screen)
image_btn_layout.addWidget(self.capture_btn)
image_btn_layout.addStretch()
image_layout.addLayout(image_btn_layout)
left_layout.addWidget(image_group)
# Test settings
settings_group = QGroupBox("Test Settings")
settings_layout = QFormLayout(settings_group)
self.use_gpu_cb = QCheckBox("Use GPU acceleration")
self.use_gpu_cb.setChecked(True)
settings_layout.addRow(self.use_gpu_cb)
self.extract_text_cb = QCheckBox("Extract text (OCR)")
self.extract_text_cb.setChecked(True)
settings_layout.addRow(self.extract_text_cb)
self.extract_icons_cb = QCheckBox("Extract icons")
self.extract_icons_cb.setChecked(True)
settings_layout.addRow(self.extract_icons_cb)
self.ocr_lang_combo = QComboBox()
self.ocr_lang_combo.addItem("English", "en")
self.ocr_lang_combo.addItem("Swedish", "sv")
settings_layout.addRow("OCR Language:", self.ocr_lang_combo)
left_layout.addWidget(settings_group)
# Run test button
self.test_btn = QPushButton("▶ Run Vision Test")
self.test_btn.setStyleSheet("""
QPushButton {
background-color: #4CAF50;
color: white;
font-weight: bold;
padding: 10px;
}
QPushButton:hover {
background-color: #45a049;
}
QPushButton:disabled {
background-color: #cccccc;
}
""")
self.test_btn.clicked.connect(self.run_test)
self.test_btn.setEnabled(False)
left_layout.addWidget(self.test_btn)
# Progress
self.progress_label = QLabel("")
left_layout.addWidget(self.progress_label)
self.progress_bar = QProgressBar()
self.progress_bar.setRange(0, 0) # Indeterminate
self.progress_bar.setVisible(False)
left_layout.addWidget(self.progress_bar)
# GPU Info
gpu_group = QGroupBox("GPU Information")
gpu_layout = QVBoxLayout(gpu_group)
self.gpu_info_label = QLabel("Click 'Check GPU' to detect")
self.gpu_info_label.setWordWrap(True)
gpu_layout.addWidget(self.gpu_info_label)
self.check_gpu_btn = QPushButton("Check GPU")
self.check_gpu_btn.clicked.connect(self.check_gpu)
gpu_layout.addWidget(self.check_gpu_btn)
left_layout.addWidget(gpu_group)
left_layout.addStretch()
splitter.addWidget(left_panel)
# Right panel - Results
right_panel = QWidget()
right_layout = QVBoxLayout(right_panel)
right_layout.setContentsMargins(5, 5, 5, 5)
# Image preview
preview_group = QGroupBox("Image Preview")
preview_layout = QVBoxLayout(preview_group)
self.preview_label = QLabel("No image loaded")
self.preview_label.setAlignment(Qt.AlignmentFlag.AlignCenter)
self.preview_label.setMinimumHeight(200)
self.preview_label.setStyleSheet("background-color: #f0f0f0; border: 1px solid #ccc;")
preview_layout.addWidget(self.preview_label)
right_layout.addWidget(preview_group)
# Results tabs
from PyQt6.QtWidgets import QTabWidget
self.results_tabs = QTabWidget()
right_layout.addWidget(self.results_tabs)
# Summary tab
self.summary_tab = QTextEdit()
self.summary_tab.setReadOnly(True)
self.results_tabs.addTab(self.summary_tab, "Summary")
# Text regions tab
self.text_table = QTableWidget()
self.text_table.setColumnCount(4)
self.text_table.setHorizontalHeaderLabels(["Text", "Confidence", "Position", "Language"])
self.text_table.horizontalHeader().setSectionResizeMode(0, QHeaderView.ResizeMode.Stretch)
self.results_tabs.addTab(self.text_table, "Text Regions")
# Icon regions tab
self.icon_table = QTableWidget()
self.icon_table.setColumnCount(3)
self.icon_table.setHorizontalHeaderLabels(["Position", "Confidence", "Hash"])
self.icon_table.horizontalHeader().setSectionResizeMode(0, QHeaderView.ResizeMode.Stretch)
self.results_tabs.addTab(self.icon_table, "Icon Regions")
# Log tab
self.log_text = QTextEdit()
self.log_text.setReadOnly(True)
self.results_tabs.addTab(self.log_text, "Log")
splitter.addWidget(right_panel)
splitter.setSizes([300, 600])
# Close button
btn_layout = QHBoxLayout()
btn_layout.addStretch()
self.close_btn = QPushButton("Close")
self.close_btn.clicked.connect(self.accept)
btn_layout.addWidget(self.close_btn)
layout.addLayout(btn_layout)
def browse_image(self):
"""Browse for test image."""
file_path, _ = QFileDialog.getOpenFileName(
self, "Select Test Image",
str(Path.home()),
"Images (*.png *.jpg *.jpeg *.bmp)"
)
if file_path:
self.load_image(Path(file_path))
def capture_screen(self):
"""Capture screen for testing."""
try:
import mss
import numpy as np
import cv2
from PyQt6.QtGui import QImage, QPixmap
self.progress_label.setText("Capturing screen...")
with mss.mss() as sct:
monitor = sct.monitors[1] # Primary monitor
screenshot = sct.grab(monitor)
# Convert to numpy array
img = np.array(screenshot)
img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR)
# Save temporarily
temp_path = Path.home() / ".lemontropia" / "temp_capture.png"
temp_path.parent.mkdir(parents=True, exist_ok=True)
cv2.imwrite(str(temp_path), img)
self.load_image(temp_path)
self.progress_label.setText("Screen captured")
except Exception as e:
QMessageBox.critical(self, "Capture Failed", f"Failed to capture screen: {e}")
self.progress_label.setText("")
def load_image(self, image_path: Path):
"""Load and display image."""
self.current_image_path = image_path
self.image_path_label.setText(str(image_path))
# Load and display preview
pixmap = QPixmap(str(image_path))
if not pixmap.isNull():
# Scale to fit
scaled = pixmap.scaled(
self.preview_label.size(),
Qt.AspectRatioMode.KeepAspectRatio,
Qt.TransformationMode.SmoothTransformation
)
self.preview_label.setPixmap(scaled)
self.test_btn.setEnabled(True)
else:
self.preview_label.setText("Failed to load image")
self.test_btn.setEnabled(False)
def run_test(self):
"""Run vision test."""
if not self.current_image_path:
QMessageBox.warning(self, "No Image", "Please select an image first.")
return
# Collect settings
settings = {
'use_gpu': self.use_gpu_cb.isChecked(),
'extract_text': self.extract_text_cb.isChecked(),
'extract_icons': self.extract_icons_cb.isChecked(),
'ocr_lang': self.ocr_lang_combo.currentData()
}
# Disable controls
self.test_btn.setEnabled(False)
self.browse_btn.setEnabled(False)
self.capture_btn.setEnabled(False)
self.progress_bar.setVisible(True)
self.progress_label.setText("Running vision test...")
# Clear previous results
self.summary_tab.clear()
self.text_table.setRowCount(0)
self.icon_table.setRowCount(0)
# Start worker
self.worker = VisionTestWorker(self.current_image_path, settings)
self.worker.test_complete.connect(self.on_test_complete)
self.worker.progress.connect(self.on_test_progress)
self.worker.error_occurred.connect(self.on_test_error)
self.worker.start()
def on_test_progress(self, message: str):
"""Handle test progress."""
self.progress_label.setText(message)
self.log_text.append(f"[{time.strftime('%H:%M:%S')}] {message}")
def on_test_complete(self, results: dict):
"""Handle test completion."""
self.current_results = results
# Re-enable controls
self.test_btn.setEnabled(True)
self.browse_btn.setEnabled(True)
self.capture_btn.setEnabled(True)
self.progress_bar.setVisible(False)
self.progress_label.setText("Test complete!")
# Update summary
summary = f"""
<h2>Vision Test Results</h2>
<p><b>Processing Time:</b> {results['processing_time_ms']:.1f}ms</p>
<p><b>GPU Backend:</b> {results['gpu_backend']}</p>
<p><b>Text Regions Detected:</b> {results['text_count']}</p>
<p><b>Icon Regions Detected:</b> {results['icon_count']}</p>
"""
self.summary_tab.setHtml(summary)
# Update text table
self.text_table.setRowCount(len(results['text_regions']))
for i, text in enumerate(results['text_regions']):
self.text_table.setItem(i, 0, QTableWidgetItem(text['text']))
self.text_table.setItem(i, 1, QTableWidgetItem(f"{text['confidence']:.2%}"))
bbox_str = f"({text['bbox'][0]}, {text['bbox'][1]})"
self.text_table.setItem(i, 2, QTableWidgetItem(bbox_str))
self.text_table.setItem(i, 3, QTableWidgetItem(text['language']))
# Update icon table
self.icon_table.setRowCount(len(results['icon_regions']))
for i, icon in enumerate(results['icon_regions']):
bbox_str = f"({icon['bbox'][0]}, {icon['bbox'][1]}, {icon['bbox'][2]}x{icon['bbox'][3]})"
self.icon_table.setItem(i, 0, QTableWidgetItem(bbox_str))
self.icon_table.setItem(i, 1, QTableWidgetItem(f"{icon['confidence']:.2%}"))
self.icon_table.setItem(i, 2, QTableWidgetItem(icon['hash']))
logger.info(f"Vision test complete: {results['text_count']} texts, {results['icon_count']} icons")
def on_test_error(self, error: str):
"""Handle test error."""
self.test_btn.setEnabled(True)
self.browse_btn.setEnabled(True)
self.capture_btn.setEnabled(True)
self.progress_bar.setVisible(False)
self.progress_label.setText(f"Error: {error}")
QMessageBox.critical(self, "Test Failed", f"Vision test failed:\n{error}")
self.log_text.append(f"[ERROR] {error}")
logger.error(f"Vision test failed: {error}")
def check_gpu(self):
"""Check GPU availability."""
try:
from modules.game_vision_ai import GPUDetector
info = GPUDetector.get_gpu_info()
text = f"""
<b>GPU Information</b><br>
Backend: {info['backend']}<br>
CUDA Available: {info['cuda_available']}<br>
MPS Available: {info['mps_available']}<br>
"""
if info.get('devices'):
for dev in info['devices']:
mem_gb = dev.get('memory_total', 0) / (1024**3)
text += f"Device {dev['id']}: {dev['name']} ({mem_gb:.1f} GB)<br>"
self.gpu_info_label.setText(text)
except Exception as e:
self.gpu_info_label.setText(f"Error detecting GPU: {e}")
logger.error(f"GPU detection failed: {e}")
def resizeEvent(self, event):
"""Handle resize to update preview."""
super().resizeEvent(event)
if self.current_image_path and self.preview_label.pixmap():
pixmap = QPixmap(str(self.current_image_path))
scaled = pixmap.scaled(
self.preview_label.size(),
Qt.AspectRatioMode.KeepAspectRatio,
Qt.TransformationMode.SmoothTransformation
)
self.preview_label.setPixmap(scaled)
# Export
__all__ = ['VisionTestDialog']

265
vision_example.py Normal file
View File

@ -0,0 +1,265 @@
"""
Lemontropia Suite - Game Vision AI Example
Demonstrates usage of the Game Vision AI module.
"""
import sys
from pathlib import Path
import logging
# Setup logging
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
logger = logging.getLogger(__name__)
def demo_gpu_detection():
"""Demonstrate GPU detection."""
print("\n" + "="*60)
print("GPU DETECTION DEMO")
print("="*60)
from modules.game_vision_ai import GPUDetector, GPUBackend
# Detect GPU
backend = GPUDetector.detect_backend()
print(f"\nDetected GPU Backend: {backend.value}")
# Get detailed info
info = GPUDetector.get_gpu_info()
print(f"\nGPU Details:")
print(f" Backend: {info['backend']}")
print(f" CUDA Available: {info['cuda_available']}")
print(f" MPS Available: {info['mps_available']}")
if info.get('devices'):
print(f"\n Devices:")
for dev in info['devices']:
mem_gb = dev.get('memory_total', 0) / (1024**3)
print(f" [{dev['id']}] {dev['name']} ({mem_gb:.1f} GB)")
print(f"\n PyTorch Device String: {GPUDetector.get_device_string(backend)}")
def demo_ocr(image_path: str = None):
"""Demonstrate OCR functionality."""
print("\n" + "="*60)
print("OCR TEXT EXTRACTION DEMO")
print("="*60)
from modules.game_vision_ai import OCRProcessor
# Initialize OCR
print("\nInitializing OCR (this may take a moment on first run)...")
ocr = OCRProcessor(use_gpu=True, lang='en')
if image_path and Path(image_path).exists():
print(f"\nProcessing: {image_path}")
regions = ocr.extract_text(image_path)
print(f"\nDetected {len(regions)} text regions:")
for i, region in enumerate(regions, 1):
print(f" {i}. '{region.text}' (confidence: {region.confidence:.2%})")
print(f" Position: ({region.bbox[0]}, {region.bbox[1]}) {region.bbox[2]}x{region.bbox[3]}")
else:
print(f"\nNo image provided or file not found: {image_path}")
print("Usage: python vision_example.py --ocr path/to/screenshot.png")
def demo_icon_detection(image_path: str = None):
"""Demonstrate icon detection."""
print("\n" + "="*60)
print("ICON DETECTION DEMO")
print("="*60)
from modules.game_vision_ai import IconDetector
import cv2
detector = IconDetector()
if image_path and Path(image_path).exists():
print(f"\nProcessing: {image_path}")
image = cv2.imread(image_path)
# Detect loot window
window = detector.detect_loot_window(image)
if window:
print(f"\nDetected loot window at: {window}")
# Extract icons
icons = detector.extract_icons_from_region(image, window)
print(f"\nExtracted {len(icons)} icons:")
for i, icon in enumerate(icons, 1):
print(f" {i}. Position: {icon.bbox}")
print(f" Hash: {icon.icon_hash[:32]}...")
else:
print("\nNo loot window detected. Trying full image...")
h, w = image.shape[:2]
icons = detector.extract_icons_from_region(image, (0, 0, w, h))
print(f"Found {len(icons)} potential icons in full image")
else:
print(f"\nNo image provided or file not found: {image_path}")
def demo_full_vision(image_path: str = None):
"""Demonstrate full vision processing."""
print("\n" + "="*60)
print("FULL VISION PROCESSING DEMO")
print("="*60)
from modules.game_vision_ai import GameVisionAI
# Initialize vision AI
print("\nInitializing Game Vision AI...")
vision = GameVisionAI(use_gpu=True, ocr_lang='en')
print(f"GPU Available: {vision.is_gpu_available()}")
print(f"Backend: {vision.backend.value}")
if image_path and Path(image_path).exists():
print(f"\nProcessing: {image_path}")
# Process screenshot
result = vision.process_screenshot(image_path)
print(f"\n--- Results ---")
print(f"Processing Time: {result.processing_time_ms:.1f}ms")
print(f"GPU Backend: {result.gpu_backend}")
print(f"\nText Regions ({len(result.text_regions)}):")
for region in result.text_regions:
print(f"'{region.text}' ({region.confidence:.2%})")
print(f"\nIcon Regions ({len(result.icon_regions)}):")
for region in result.icon_regions:
print(f" • Position: {region.bbox}")
print(f"\nExtracted icons saved to: {vision.extracted_icons_dir}")
else:
print(f"\nNo image provided or file not found: {image_path}")
print("Usage: python vision_example.py --full path/to/screenshot.png")
def demo_icon_matching():
"""Demonstrate icon matching."""
print("\n" + "="*60)
print("ICON MATCHING DEMO")
print("="*60)
from modules.icon_matcher import IconMatcher, PerceptualHash
import cv2
import numpy as np
# Create matcher
matcher = IconMatcher()
print(f"\nIcon Database Stats:")
stats = matcher.get_database_stats()
print(f" Total Icons: {stats['total_icons']}")
print(f" Database Path: {stats['database_path']}")
# Demonstrate perceptual hashing
print(f"\nPerceptual Hashing:")
# Create a sample image
sample = np.random.randint(0, 255, (64, 64, 3), dtype=np.uint8)
avg_hash = PerceptualHash.average_hash(sample)
diff_hash = PerceptualHash.difference_hash(sample)
print(f" Average Hash: {avg_hash[:32]}...")
print(f" Difference Hash: {diff_hash[:32]}...")
# Show similarity calculation
similar = np.random.randint(0, 255, (64, 64, 3), dtype=np.uint8)
similar[20:40, 20:40] = sample[20:40, 20:40] # Make it somewhat similar
hash1 = PerceptualHash.average_hash(sample)
hash2 = PerceptualHash.average_hash(similar)
similarity = PerceptualHash.similarity(hash1, hash2)
print(f" Similarity between two images: {similarity:.2%}")
def demo_calibration():
"""Demonstrate calibration."""
print("\n" + "="*60)
print("CALIBRATION DEMO")
print("="*60)
from modules.game_vision_ai import GameVisionAI
vision = GameVisionAI(use_gpu=True)
print("\nTo calibrate, provide sample screenshots:")
print(" vision.calibrate_for_game([path1, path2, ...])")
print("\nThis will:")
print(" 1. Process each screenshot")
print(" 2. Measure detection accuracy")
print(" 3. Calculate average processing time")
print(" 4. Provide recommendations")
def main():
"""Main entry point."""
import argparse
parser = argparse.ArgumentParser(
description="Game Vision AI Examples",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python vision_example.py --gpu # GPU detection demo
python vision_example.py --ocr image.png # OCR demo
python vision_example.py --icons image.png # Icon detection demo
python vision_example.py --full image.png # Full vision demo
python vision_example.py --matching # Icon matching demo
python vision_example.py --all # Run all demos
"""
)
parser.add_argument('--gpu', action='store_true', help='GPU detection demo')
parser.add_argument('--ocr', type=str, metavar='IMAGE', help='OCR demo with image')
parser.add_argument('--icons', type=str, metavar='IMAGE', help='Icon detection demo')
parser.add_argument('--full', type=str, metavar='IMAGE', help='Full vision demo')
parser.add_argument('--matching', action='store_true', help='Icon matching demo')
parser.add_argument('--calibration', action='store_true', help='Calibration demo')
parser.add_argument('--all', action='store_true', help='Run all demos')
args = parser.parse_args()
# If no args, show help
if not any([args.gpu, args.ocr, args.icons, args.full, args.matching, args.calibration, args.all]):
parser.print_help()
return
try:
if args.all or args.gpu:
demo_gpu_detection()
if args.all or args.ocr:
demo_ocr(args.ocr)
if args.all or args.icons:
demo_icon_detection(args.icons)
if args.all or args.full:
demo_full_vision(args.full)
if args.all or args.matching:
demo_icon_matching()
if args.all or args.calibration:
demo_calibration()
except ImportError as e:
print(f"\n❌ Import Error: {e}")
print("\nMake sure all dependencies are installed:")
print(" pip install -r requirements.txt")
except Exception as e:
print(f"\n❌ Error: {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
main()