780 lines
27 KiB
Python
780 lines
27 KiB
Python
"""
|
|
Lemontropia Suite - Game Vision AI Module
|
|
Advanced computer vision with local GPU-accelerated AI models.
|
|
Supports OCR (PaddleOCR) and icon detection for game UI analysis.
|
|
"""
|
|
|
|
import cv2
|
|
import numpy as np
|
|
import logging
|
|
import time
|
|
from pathlib import Path
|
|
from dataclasses import dataclass, field
|
|
from typing import Optional, Tuple, List, Dict, Any, Union
|
|
from enum import Enum
|
|
import json
|
|
import hashlib
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Optional PyTorch import with fallback
|
|
try:
|
|
import torch
|
|
TORCH_AVAILABLE = True
|
|
except Exception as e:
|
|
logger.warning(f"PyTorch not available: {e}")
|
|
TORCH_AVAILABLE = False
|
|
torch = None
|
|
|
|
# Import OpenCV text detector as fallback
|
|
from .opencv_text_detector import OpenCVTextDetector, TextDetection as OpenCVTextDetection
|
|
|
|
# Optional PaddleOCR import with fallback
|
|
try:
|
|
from paddleocr import PaddleOCR
|
|
PADDLE_AVAILABLE = True
|
|
except Exception as e:
|
|
logger.warning(f"PaddleOCR not available: {e}")
|
|
PADDLE_AVAILABLE = False
|
|
PaddleOCR = None
|
|
|
|
|
|
class GPUBackend(Enum):
|
|
"""Supported GPU backends."""
|
|
CUDA = "cuda" # NVIDIA CUDA
|
|
MPS = "mps" # Apple Metal Performance Shaders
|
|
DIRECTML = "directml" # Windows DirectML
|
|
CPU = "cpu" # Fallback CPU
|
|
|
|
|
|
@dataclass
|
|
class TextRegion:
|
|
"""Detected text region with metadata."""
|
|
text: str
|
|
confidence: float
|
|
bbox: Tuple[int, int, int, int] # x, y, w, h
|
|
language: str = "en"
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
return {
|
|
'text': self.text,
|
|
'confidence': self.confidence,
|
|
'bbox': self.bbox,
|
|
'language': self.language
|
|
}
|
|
|
|
|
|
@dataclass
|
|
class IconRegion:
|
|
"""Detected icon region with metadata."""
|
|
image: np.ndarray
|
|
bbox: Tuple[int, int, int, int] # x, y, w, h
|
|
confidence: float
|
|
icon_hash: str = ""
|
|
|
|
def __post_init__(self):
|
|
if not self.icon_hash:
|
|
self.icon_hash = self._compute_hash()
|
|
|
|
def _compute_hash(self) -> str:
|
|
"""Compute perceptual hash of icon."""
|
|
if self.image is None or self.image.size == 0:
|
|
return ""
|
|
# Resize to standard size and compute average hash
|
|
small = cv2.resize(self.image, (16, 16), interpolation=cv2.INTER_AREA)
|
|
gray = cv2.cvtColor(small, cv2.COLOR_BGR2GRAY) if len(small.shape) == 3 else small
|
|
avg = gray.mean()
|
|
hash_bits = (gray > avg).flatten()
|
|
return ''.join(['1' if b else '0' for b in hash_bits])
|
|
|
|
|
|
@dataclass
|
|
class ItemMatch:
|
|
"""Result of matching an icon to database."""
|
|
name: str
|
|
confidence: float
|
|
item_id: Optional[str] = None
|
|
category: Optional[str] = None
|
|
matched_hash: str = ""
|
|
|
|
|
|
@dataclass
|
|
class VisionResult:
|
|
"""Complete vision processing result."""
|
|
text_regions: List[TextRegion] = field(default_factory=list)
|
|
icon_regions: List[IconRegion] = field(default_factory=list)
|
|
processing_time_ms: float = 0.0
|
|
gpu_backend: str = "cpu"
|
|
timestamp: float = field(default_factory=time.time)
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
return {
|
|
'text_regions': [t.to_dict() for t in self.text_regions],
|
|
'icon_count': len(self.icon_regions),
|
|
'processing_time_ms': self.processing_time_ms,
|
|
'gpu_backend': self.gpu_backend,
|
|
'timestamp': self.timestamp
|
|
}
|
|
|
|
|
|
class GPUDetector:
|
|
"""Detect and manage GPU availability."""
|
|
|
|
@staticmethod
|
|
def detect_backend() -> GPUBackend:
|
|
"""Detect best available GPU backend."""
|
|
# Check CUDA first (most common)
|
|
if torch.cuda.is_available():
|
|
logger.info(f"CUDA available: {torch.cuda.get_device_name(0)}")
|
|
return GPUBackend.CUDA
|
|
|
|
# Check Apple MPS
|
|
if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
|
|
logger.info("Apple MPS (Metal) available")
|
|
return GPUBackend.MPS
|
|
|
|
# Check DirectML on Windows
|
|
try:
|
|
import torch_directml
|
|
if torch_directml.is_available():
|
|
logger.info("DirectML available")
|
|
return GPUBackend.DIRECTML
|
|
except ImportError:
|
|
pass
|
|
|
|
logger.info("No GPU backend available, using CPU")
|
|
return GPUBackend.CPU
|
|
|
|
@staticmethod
|
|
def get_device_string(backend: GPUBackend) -> str:
|
|
"""Get PyTorch device string for backend."""
|
|
if backend == GPUBackend.CUDA:
|
|
return "cuda:0"
|
|
elif backend == GPUBackend.MPS:
|
|
return "mps"
|
|
elif backend == GPUBackend.DIRECTML:
|
|
return "privateuseone:0" # DirectML device
|
|
return "cpu"
|
|
|
|
@staticmethod
|
|
def get_gpu_info() -> Dict[str, Any]:
|
|
"""Get detailed GPU information."""
|
|
info = {
|
|
'backend': GPUDetector.detect_backend().value,
|
|
'cuda_available': torch.cuda.is_available(),
|
|
'mps_available': hasattr(torch.backends, 'mps') and torch.backends.mps.is_available(),
|
|
'devices': []
|
|
}
|
|
|
|
if torch.cuda.is_available():
|
|
for i in range(torch.cuda.device_count()):
|
|
info['devices'].append({
|
|
'id': i,
|
|
'name': torch.cuda.get_device_name(i),
|
|
'memory_total': torch.cuda.get_device_properties(i).total_memory
|
|
})
|
|
|
|
return info
|
|
|
|
|
|
class OCRProcessor:
|
|
"""OCR text extraction using PaddleOCR or OpenCV fallback with GPU support."""
|
|
|
|
SUPPORTED_LANGUAGES = ['en', 'sv', 'latin'] # English, Swedish, Latin script
|
|
|
|
def __init__(self, use_gpu: bool = True, lang: str = 'en'):
|
|
self.use_gpu = use_gpu
|
|
self.lang = lang if lang in self.SUPPORTED_LANGUAGES else 'en'
|
|
self.ocr = None
|
|
self.backend = GPUBackend.CPU
|
|
self.opencv_detector = None
|
|
self._primary_backend = None # 'paddle' or 'opencv'
|
|
self._init_ocr()
|
|
|
|
def _init_ocr(self):
|
|
"""Initialize OCR with PaddleOCR or OpenCV fallback."""
|
|
# Try PaddleOCR first (better accuracy)
|
|
if PADDLE_AVAILABLE:
|
|
try:
|
|
self._init_paddle()
|
|
if self.ocr is not None:
|
|
self._primary_backend = 'paddle'
|
|
return
|
|
except Exception as e:
|
|
logger.warning(f"PaddleOCR init failed: {e}")
|
|
|
|
# Fallback to OpenCV text detection
|
|
logger.info("Using OpenCV text detection as fallback")
|
|
self.opencv_detector = OpenCVTextDetector(use_gpu=self.use_gpu)
|
|
if self.opencv_detector.is_available():
|
|
self._primary_backend = 'opencv'
|
|
self.backend = GPUBackend.CUDA if self.opencv_detector.check_gpu_available() else GPUBackend.CPU
|
|
logger.info(f"OpenCV text detector ready (GPU: {self.backend == GPUBackend.CUDA})")
|
|
else:
|
|
logger.error("No OCR backend available")
|
|
|
|
def _init_paddle(self):
|
|
"""Initialize PaddleOCR with appropriate backend."""
|
|
# Detect GPU
|
|
if self.use_gpu:
|
|
self.backend = GPUDetector.detect_backend()
|
|
use_gpu_flag = self.backend != GPUBackend.CPU
|
|
else:
|
|
use_gpu_flag = False
|
|
|
|
# Map language codes
|
|
lang_map = {
|
|
'en': 'en',
|
|
'sv': 'latin', # Swedish uses latin script model
|
|
'latin': 'latin'
|
|
}
|
|
paddle_lang = lang_map.get(self.lang, 'en')
|
|
|
|
logger.info(f"Initializing PaddleOCR (lang={paddle_lang}, gpu={use_gpu_flag})")
|
|
|
|
self.ocr = PaddleOCR(
|
|
lang=paddle_lang,
|
|
use_gpu=use_gpu_flag,
|
|
show_log=False,
|
|
use_angle_cls=True,
|
|
det_db_thresh=0.3,
|
|
det_db_box_thresh=0.5,
|
|
rec_thresh=0.5,
|
|
)
|
|
|
|
logger.info(f"PaddleOCR initialized successfully (backend: {self.backend.value})")
|
|
|
|
def preprocess_for_ocr(self, image: np.ndarray) -> np.ndarray:
|
|
"""Preprocess image for better OCR results."""
|
|
# Convert to grayscale if needed
|
|
if len(image.shape) == 3:
|
|
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
|
else:
|
|
gray = image
|
|
|
|
# Denoise
|
|
denoised = cv2.fastNlMeansDenoising(gray, None, 10, 7, 21)
|
|
|
|
# Adaptive threshold for better text contrast
|
|
binary = cv2.adaptiveThreshold(
|
|
denoised, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
|
cv2.THRESH_BINARY, 11, 2
|
|
)
|
|
|
|
return binary
|
|
|
|
def extract_text(self, image: Union[str, np.ndarray, Path]) -> List[TextRegion]:
|
|
"""
|
|
Extract text from image using PaddleOCR or OpenCV fallback.
|
|
|
|
Args:
|
|
image: Image path or numpy array
|
|
|
|
Returns:
|
|
List of detected text regions
|
|
"""
|
|
# Load image if path provided
|
|
if isinstance(image, (str, Path)):
|
|
img = cv2.imread(str(image))
|
|
if img is None:
|
|
logger.error(f"Failed to load image: {image}")
|
|
return []
|
|
else:
|
|
img = image.copy()
|
|
|
|
# Use appropriate backend
|
|
if self._primary_backend == 'paddle' and self.ocr is not None:
|
|
return self._extract_text_paddle(img)
|
|
elif self._primary_backend == 'opencv' and self.opencv_detector is not None:
|
|
return self._extract_text_opencv(img)
|
|
else:
|
|
logger.warning("No OCR backend available")
|
|
return []
|
|
|
|
def _extract_text_opencv(self, img: np.ndarray) -> List[TextRegion]:
|
|
"""Extract text using OpenCV EAST detector."""
|
|
detections = self.opencv_detector.detect_text(img)
|
|
|
|
# Convert to TextRegion format (no text recognition, just detection)
|
|
regions = []
|
|
for det in detections:
|
|
regions.append(TextRegion(
|
|
text="", # OpenCV detector doesn't recognize text, just finds regions
|
|
confidence=det.confidence,
|
|
bbox=det.bbox,
|
|
language=self.lang
|
|
))
|
|
|
|
return regions
|
|
|
|
def _extract_text_paddle(self, img: np.ndarray) -> List[TextRegion]:
|
|
"""Extract text using PaddleOCR."""
|
|
# Preprocess
|
|
processed = self.preprocess_for_ocr(img)
|
|
|
|
try:
|
|
# Run OCR
|
|
result = self.ocr.ocr(processed, cls=True)
|
|
|
|
detected = []
|
|
if result and result[0]:
|
|
for line in result[0]:
|
|
if line is None:
|
|
continue
|
|
bbox, (text, confidence) = line
|
|
|
|
# Calculate bounding box
|
|
x_coords = [p[0] for p in bbox]
|
|
y_coords = [p[1] for p in bbox]
|
|
x, y = int(min(x_coords)), int(min(y_coords))
|
|
w = int(max(x_coords) - x)
|
|
h = int(max(y_coords) - y)
|
|
|
|
detected.append(TextRegion(
|
|
text=text.strip(),
|
|
confidence=float(confidence),
|
|
bbox=(x, y, w, h),
|
|
language=self.lang
|
|
))
|
|
|
|
return detected
|
|
|
|
except Exception as e:
|
|
logger.error(f"OCR processing failed: {e}")
|
|
return []
|
|
|
|
def extract_text_from_region(self, image: np.ndarray,
|
|
region: Tuple[int, int, int, int]) -> List[TextRegion]:
|
|
"""Extract text from specific region of image."""
|
|
x, y, w, h = region
|
|
roi = image[y:y+h, x:x+w]
|
|
|
|
if roi.size == 0:
|
|
return []
|
|
|
|
regions = self.extract_text(roi)
|
|
|
|
# Adjust coordinates back to original image
|
|
for r in regions:
|
|
rx, ry, rw, rh = r.bbox
|
|
r.bbox = (x + rx, y + ry, rw, rh)
|
|
|
|
return regions
|
|
|
|
|
|
class IconDetector:
|
|
"""Detect and extract item icons from game UI."""
|
|
|
|
# Typical Entropia Universe loot window icon sizes
|
|
ICON_SIZES = {
|
|
'small': (32, 32),
|
|
'medium': (48, 48),
|
|
'large': (64, 64),
|
|
'hud': (40, 40)
|
|
}
|
|
|
|
def __init__(self, template_dir: Optional[Path] = None):
|
|
self.template_dir = template_dir or Path(__file__).parent / "templates" / "icons"
|
|
self.templates: Dict[str, np.ndarray] = {}
|
|
self._load_templates()
|
|
|
|
def _load_templates(self):
|
|
"""Load icon templates for matching."""
|
|
if not self.template_dir.exists():
|
|
logger.warning(f"Template directory not found: {self.template_dir}")
|
|
return
|
|
|
|
for template_file in self.template_dir.glob("*.png"):
|
|
try:
|
|
name = template_file.stem
|
|
template = cv2.imread(str(template_file), cv2.IMREAD_COLOR)
|
|
if template is not None:
|
|
self.templates[name] = template
|
|
logger.debug(f"Loaded icon template: {name}")
|
|
except Exception as e:
|
|
logger.error(f"Failed to load template {template_file}: {e}")
|
|
|
|
def detect_loot_window(self, image: np.ndarray) -> Optional[Tuple[int, int, int, int]]:
|
|
"""
|
|
Detect loot window in screenshot.
|
|
|
|
Returns bounding box of loot window or None if not found.
|
|
"""
|
|
# Look for common loot window indicators
|
|
# Method 1: Template matching for "Loot" text or window frame
|
|
if 'loot_window' in self.templates:
|
|
result = cv2.matchTemplate(
|
|
image, self.templates['loot_window'], cv2.TM_CCOEFF_NORMED
|
|
)
|
|
_, max_val, _, max_loc = cv2.minMaxLoc(result)
|
|
if max_val > 0.7:
|
|
h, w = self.templates['loot_window'].shape[:2]
|
|
return (*max_loc, w, h)
|
|
|
|
# Method 2: Detect based on typical loot window characteristics
|
|
# Loot windows usually have a grid of items with consistent spacing
|
|
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
|
|
|
# Look for high-contrast regions that could be icons
|
|
_, thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
|
|
|
|
# Find contours
|
|
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
|
|
|
# Filter for icon-sized squares
|
|
potential_icons = []
|
|
for cnt in contours:
|
|
x, y, w, h = cv2.boundingRect(cnt)
|
|
aspect = w / h if h > 0 else 0
|
|
|
|
# Check if dimensions match typical icon sizes
|
|
for size_name, (sw, sh) in self.ICON_SIZES.items():
|
|
if abs(w - sw) < 5 and abs(h - sh) < 5 and 0.8 < aspect < 1.2:
|
|
potential_icons.append((x, y, w, h))
|
|
break
|
|
|
|
# If we found multiple icons in a grid pattern, assume loot window
|
|
if len(potential_icons) >= 2:
|
|
# Calculate bounding box of all icons
|
|
xs = [p[0] for p in potential_icons]
|
|
ys = [p[1] for p in potential_icons]
|
|
ws = [p[2] for p in potential_icons]
|
|
hs = [p[3] for p in potential_icons]
|
|
|
|
min_x, max_x = min(xs), max(xs) + max(ws)
|
|
min_y, max_y = min(ys), max(ys) + max(hs)
|
|
|
|
# Add padding
|
|
padding = 20
|
|
return (
|
|
max(0, min_x - padding),
|
|
max(0, min_y - padding),
|
|
max_x - min_x + padding * 2,
|
|
max_y - min_y + padding * 2
|
|
)
|
|
|
|
return None
|
|
|
|
def extract_icons_from_region(self, image: np.ndarray,
|
|
region: Tuple[int, int, int, int],
|
|
icon_size: str = 'medium') -> List[IconRegion]:
|
|
"""
|
|
Extract icons from a specific region (e.g., loot window).
|
|
|
|
Args:
|
|
image: Full screenshot
|
|
region: Bounding box (x, y, w, h)
|
|
icon_size: Size preset ('small', 'medium', 'large')
|
|
|
|
Returns:
|
|
List of detected icon regions
|
|
"""
|
|
x, y, w, h = region
|
|
roi = image[y:y+h, x:x+w]
|
|
|
|
if roi.size == 0:
|
|
return []
|
|
|
|
target_size = self.ICON_SIZES.get(icon_size, (48, 48))
|
|
gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
|
|
|
|
# Multiple threshold attempts for different icon styles
|
|
icons = []
|
|
thresholds = [(200, 255), (180, 255), (150, 255)]
|
|
|
|
for thresh_low, thresh_high in thresholds:
|
|
_, thresh = cv2.threshold(gray, thresh_low, thresh_high, cv2.THRESH_BINARY)
|
|
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
|
|
|
for cnt in contours:
|
|
cx, cy, cw, ch = cv2.boundingRect(cnt)
|
|
aspect = cw / ch if ch > 0 else 0
|
|
|
|
# Match icon size with tolerance
|
|
if (abs(cw - target_size[0]) < 8 and
|
|
abs(ch - target_size[1]) < 8 and
|
|
0.7 < aspect < 1.3):
|
|
|
|
# Extract icon image
|
|
icon_img = roi[cy:cy+ch, cx:cx+cw]
|
|
|
|
# Resize to standard size
|
|
icon_img = cv2.resize(icon_img, target_size, interpolation=cv2.INTER_AREA)
|
|
|
|
icons.append(IconRegion(
|
|
image=icon_img,
|
|
bbox=(x + cx, y + cy, cw, ch),
|
|
confidence=0.8 # Placeholder confidence
|
|
))
|
|
|
|
# Remove duplicates (icons that overlap significantly)
|
|
unique_icons = self._remove_duplicate_icons(icons)
|
|
|
|
return unique_icons
|
|
|
|
def _remove_duplicate_icons(self, icons: List[IconRegion],
|
|
iou_threshold: float = 0.5) -> List[IconRegion]:
|
|
"""Remove duplicate icons based on IoU."""
|
|
if not icons:
|
|
return []
|
|
|
|
# Sort by confidence
|
|
sorted_icons = sorted(icons, key=lambda x: x.confidence, reverse=True)
|
|
|
|
kept = []
|
|
for icon in sorted_icons:
|
|
is_duplicate = False
|
|
for kept_icon in kept:
|
|
if self._calculate_iou(icon.bbox, kept_icon.bbox) > iou_threshold:
|
|
is_duplicate = True
|
|
break
|
|
if not is_duplicate:
|
|
kept.append(icon)
|
|
|
|
return kept
|
|
|
|
def _calculate_iou(self, box1: Tuple[int, int, int, int],
|
|
box2: Tuple[int, int, int, int]) -> float:
|
|
"""Calculate Intersection over Union of two bounding boxes."""
|
|
x1, y1, w1, h1 = box1
|
|
x2, y2, w2, h2 = box2
|
|
|
|
xi1 = max(x1, x2)
|
|
yi1 = max(y1, y2)
|
|
xi2 = min(x1 + w1, x2 + w2)
|
|
yi2 = min(y1 + h1, y2 + h2)
|
|
|
|
inter_area = max(0, xi2 - xi1) * max(0, yi2 - yi1)
|
|
box1_area = w1 * h1
|
|
box2_area = w2 * h2
|
|
|
|
union_area = box1_area + box2_area - inter_area
|
|
|
|
return inter_area / union_area if union_area > 0 else 0
|
|
|
|
def detect_icons_yolo(self, image: np.ndarray,
|
|
model_path: Optional[str] = None) -> List[IconRegion]:
|
|
"""
|
|
Detect icons using YOLO model (if available).
|
|
|
|
This is a placeholder for future YOLO integration.
|
|
"""
|
|
# TODO: Implement YOLO detection when model is trained
|
|
logger.debug("YOLO detection not yet implemented")
|
|
return []
|
|
|
|
|
|
class GameVisionAI:
|
|
"""
|
|
Main AI vision interface for game screenshot analysis.
|
|
Combines OCR and icon detection with GPU acceleration.
|
|
"""
|
|
|
|
def __init__(self, use_gpu: bool = True, ocr_lang: str = 'en',
|
|
data_dir: Optional[Path] = None):
|
|
"""
|
|
Initialize Game Vision AI.
|
|
|
|
Args:
|
|
use_gpu: Enable GPU acceleration if available
|
|
ocr_lang: Language for OCR ('en', 'sv', 'latin')
|
|
data_dir: Directory for storing extracted data
|
|
"""
|
|
self.use_gpu = use_gpu
|
|
self.data_dir = data_dir or Path.home() / ".lemontropia"
|
|
self.extracted_icons_dir = self.data_dir / "extracted_icons"
|
|
self.extracted_icons_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Detect GPU
|
|
self.backend = GPUDetector.detect_backend() if use_gpu else GPUBackend.CPU
|
|
|
|
# Initialize processors
|
|
self.ocr = OCRProcessor(use_gpu=use_gpu, lang=ocr_lang)
|
|
self.icon_detector = IconDetector()
|
|
|
|
# Icon matching cache
|
|
self.icon_cache: Dict[str, ItemMatch] = {}
|
|
|
|
logger.info(f"GameVisionAI initialized (GPU: {self.backend.value})")
|
|
|
|
def extract_text_from_image(self, image_path: Union[str, Path]) -> List[TextRegion]:
|
|
"""
|
|
Extract all text from an image.
|
|
|
|
Args:
|
|
image_path: Path to screenshot image
|
|
|
|
Returns:
|
|
List of detected text regions
|
|
"""
|
|
return self.ocr.extract_text(image_path)
|
|
|
|
def extract_icons_from_image(self, image_path: Union[str, Path],
|
|
auto_detect_window: bool = True) -> List[IconRegion]:
|
|
"""
|
|
Extract item icons from image.
|
|
|
|
Args:
|
|
image_path: Path to screenshot image
|
|
auto_detect_window: Automatically detect loot window
|
|
|
|
Returns:
|
|
List of detected icon regions
|
|
"""
|
|
image = cv2.imread(str(image_path))
|
|
if image is None:
|
|
logger.error(f"Failed to load image: {image_path}")
|
|
return []
|
|
|
|
if auto_detect_window:
|
|
window_region = self.icon_detector.detect_loot_window(image)
|
|
if window_region:
|
|
logger.debug(f"Detected loot window: {window_region}")
|
|
return self.icon_detector.extract_icons_from_region(
|
|
image, window_region
|
|
)
|
|
else:
|
|
logger.debug("No loot window detected, scanning full image")
|
|
# Scan full image
|
|
h, w = image.shape[:2]
|
|
return self.icon_detector.extract_icons_from_region(
|
|
image, (0, 0, w, h)
|
|
)
|
|
else:
|
|
h, w = image.shape[:2]
|
|
return self.icon_detector.extract_icons_from_region(
|
|
image, (0, 0, w, h)
|
|
)
|
|
|
|
def match_icon_to_database(self, icon_image: np.ndarray,
|
|
database_path: Optional[Path] = None) -> Optional[ItemMatch]:
|
|
"""
|
|
Match extracted icon to item database.
|
|
|
|
Args:
|
|
icon_image: Icon image (numpy array)
|
|
database_path: Path to icon database directory
|
|
|
|
Returns:
|
|
ItemMatch if found, None otherwise
|
|
"""
|
|
from .icon_matcher import IconMatcher
|
|
|
|
# Lazy load matcher
|
|
if not hasattr(self, '_icon_matcher'):
|
|
self._icon_matcher = IconMatcher(database_path)
|
|
|
|
return self._icon_matcher.match_icon(icon_image)
|
|
|
|
def process_screenshot(self, image_path: Union[str, Path],
|
|
extract_text: bool = True,
|
|
extract_icons: bool = True) -> VisionResult:
|
|
"""
|
|
Process screenshot with all vision capabilities.
|
|
|
|
Args:
|
|
image_path: Path to screenshot
|
|
extract_text: Enable text extraction
|
|
extract_icons: Enable icon extraction
|
|
|
|
Returns:
|
|
VisionResult with all detections
|
|
"""
|
|
start_time = time.time()
|
|
|
|
result = VisionResult(gpu_backend=self.backend.value)
|
|
|
|
# Load image once
|
|
image = cv2.imread(str(image_path))
|
|
if image is None:
|
|
logger.error(f"Failed to load image: {image_path}")
|
|
return result
|
|
|
|
# Extract text
|
|
if extract_text:
|
|
result.text_regions = self.ocr.extract_text(image)
|
|
logger.debug(f"Extracted {len(result.text_regions)} text regions")
|
|
|
|
# Extract icons
|
|
if extract_icons:
|
|
result.icon_regions = self.extract_icons_from_image(image_path)
|
|
logger.debug(f"Extracted {len(result.icon_regions)} icons")
|
|
|
|
# Save extracted icons
|
|
self._save_extracted_icons(result.icon_regions)
|
|
|
|
result.processing_time_ms = (time.time() - start_time) * 1000
|
|
|
|
return result
|
|
|
|
def _save_extracted_icons(self, icons: List[IconRegion]):
|
|
"""Save extracted icons to disk."""
|
|
for i, icon in enumerate(icons):
|
|
filename = f"icon_{icon.icon_hash[:16]}_{int(time.time())}_{i}.png"
|
|
filepath = self.extracted_icons_dir / filename
|
|
cv2.imwrite(str(filepath), icon.image)
|
|
logger.debug(f"Saved icon: {filepath}")
|
|
|
|
def get_gpu_info(self) -> Dict[str, Any]:
|
|
"""Get GPU information."""
|
|
return GPUDetector.get_gpu_info()
|
|
|
|
def is_gpu_available(self) -> bool:
|
|
"""Check if GPU acceleration is available."""
|
|
return self.backend != GPUBackend.CPU
|
|
|
|
def calibrate_for_game(self, sample_screenshots: List[Path]) -> Dict[str, Any]:
|
|
"""
|
|
Calibrate vision system using sample screenshots.
|
|
|
|
Args:
|
|
sample_screenshots: List of sample game screenshots
|
|
|
|
Returns:
|
|
Calibration results
|
|
"""
|
|
calibration = {
|
|
'screenshots_processed': 0,
|
|
'text_regions_detected': 0,
|
|
'icons_detected': 0,
|
|
'average_processing_time_ms': 0,
|
|
'detected_regions': {}
|
|
}
|
|
|
|
total_time = 0
|
|
|
|
for screenshot_path in sample_screenshots:
|
|
try:
|
|
start = time.time()
|
|
result = self.process_screenshot(screenshot_path)
|
|
elapsed = (time.time() - start) * 1000
|
|
|
|
calibration['screenshots_processed'] += 1
|
|
calibration['text_regions_detected'] += len(result.text_regions)
|
|
calibration['icons_detected'] += len(result.icon_regions)
|
|
total_time += elapsed
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to process {screenshot_path}: {e}")
|
|
|
|
if calibration['screenshots_processed'] > 0:
|
|
calibration['average_processing_time_ms'] = (
|
|
total_time / calibration['screenshots_processed']
|
|
)
|
|
|
|
return calibration
|
|
|
|
|
|
# Export main classes
|
|
__all__ = [
|
|
'GameVisionAI',
|
|
'TextRegion',
|
|
'IconRegion',
|
|
'ItemMatch',
|
|
'VisionResult',
|
|
'GPUBackend',
|
|
'GPUDetector',
|
|
'OCRProcessor',
|
|
'IconDetector'
|
|
]
|