Lemontropia-Suite/modules/game_vision.py

366 lines
12 KiB
Python

"""
Lemontropia Suite - Game Vision System
Computer vision module for reading UI elements from Entropia Universe.
"""
import cv2
import numpy as np
import logging
from pathlib import Path
from decimal import Decimal
from dataclasses import dataclass
from typing import Optional, Tuple, List, Dict, Any
import mss
import time
# Try to import PaddleOCR, fallback to None if not available
try:
from paddleocr import PaddleOCR
PADDLE_AVAILABLE = True
except ImportError:
PADDLE_AVAILABLE = False
logger = logging.getLogger(__name__)
@dataclass
class DetectedText:
"""Detected text with metadata."""
text: str
confidence: float
region: Tuple[int, int, int, int] # x, y, w, h
@dataclass
class EquippedGear:
"""Currently equipped gear detected from game."""
weapon_name: Optional[str] = None
weapon_confidence: float = 0.0
armor_name: Optional[str] = None
armor_confidence: float = 0.0
tool_name: Optional[str] = None
tool_confidence: float = 0.0
detected_at: Optional[float] = None
@dataclass
class TargetInfo:
"""Current target mob information."""
mob_name: Optional[str] = None
confidence: float = 0.0
health_percent: Optional[int] = None
detected_at: Optional[float] = None
class ScreenCapture:
"""Cross-platform screen capture."""
def __init__(self):
self.sct = mss.mss()
def capture_full_screen(self) -> np.ndarray:
"""Capture full screen."""
monitor = self.sct.monitors[1] # Primary monitor
screenshot = self.sct.grab(monitor)
# Convert to numpy array (BGR for OpenCV)
img = np.array(screenshot)
return cv2.cvtColor(img, cv2.COLOR_BGRA2BGR)
def capture_region(self, x: int, y: int, w: int, h: int) -> np.ndarray:
"""Capture specific region."""
monitor = {"left": x, "top": y, "width": w, "height": h}
screenshot = self.sct.grab(monitor)
img = np.array(screenshot)
return cv2.cvtColor(img, cv2.COLOR_BGRA2BGR)
def find_window(self, window_title: str) -> Optional[Tuple[int, int, int, int]]:
"""Find window by title (Windows only)."""
try:
import win32gui
def callback(hwnd, extra):
if win32gui.IsWindowVisible(hwnd):
title = win32gui.GetWindowText(hwnd)
if window_title.lower() in title.lower():
rect = win32gui.GetWindowRect(hwnd)
extra.append((rect[0], rect[1], rect[2] - rect[0], rect[3] - rect[1]))
windows = []
win32gui.EnumWindows(callback, windows)
return windows[0] if windows else None
except Exception as e:
logger.error(f"Failed to find window: {e}")
return None
class TemplateMatcher:
"""Template matching for finding UI elements."""
def __init__(self, templates_dir: Optional[Path] = None):
self.templates_dir = templates_dir or Path(__file__).parent / "templates"
self.templates: Dict[str, np.ndarray] = {}
self._load_templates()
def _load_templates(self):
"""Load template images."""
if not self.templates_dir.exists():
logger.warning(f"Templates directory not found: {self.templates_dir}")
return
for template_file in self.templates_dir.glob("*.png"):
try:
name = template_file.stem
self.templates[name] = cv2.imread(str(template_file), cv2.IMREAD_GRAYSCALE)
logger.info(f"Loaded template: {name}")
except Exception as e:
logger.error(f"Failed to load template {template_file}: {e}")
def find_template(self, screenshot: np.ndarray, template_name: str,
threshold: float = 0.8) -> Optional[Tuple[int, int, int, int]]:
"""Find template in screenshot."""
if template_name not in self.templates:
logger.warning(f"Template not found: {template_name}")
return None
template = self.templates[template_name]
gray = cv2.cvtColor(screenshot, cv2.COLOR_BGR2GRAY)
result = cv2.matchTemplate(gray, template, cv2.TM_CCOEFF_NORMED)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
if max_val >= threshold:
x, y = max_loc
h, w = template.shape
return (x, y, w, h)
return None
def find_all_templates(self, screenshot: np.ndarray, threshold: float = 0.7) -> Dict[str, Tuple[int, int, int, int]]:
"""Find all known templates in screenshot."""
found = {}
for name in self.templates:
result = self.find_template(screenshot, name, threshold)
if result:
found[name] = result
return found
class GameVision:
"""
Main computer vision interface for reading game UI.
"""
def __init__(self, use_ocr: bool = True):
self.capture = ScreenCapture()
self.template_matcher = TemplateMatcher()
# Initialize OCR if available
self.ocr = None
if use_ocr and PADDLE_AVAILABLE:
try:
self.ocr = PaddleOCR(
lang='en',
use_gpu=False,
show_log=False,
det_model_dir=None,
rec_model_dir=None,
)
logger.info("PaddleOCR initialized")
except Exception as e:
logger.error(f"Failed to initialize PaddleOCR: {e}")
# Region definitions (relative to game window)
# These would be calibrated based on actual UI
self.regions = {
'weapon_slot': None, # To be defined
'armor_slot': None,
'target_window': None,
'health_bar': None,
}
self.last_equipped: Optional[EquippedGear] = None
self.last_target: Optional[TargetInfo] = None
def read_text_region(self, screenshot: np.ndarray, region: Tuple[int, int, int, int]) -> List[DetectedText]:
"""Read text from a specific region using OCR."""
if self.ocr is None:
logger.warning("OCR not available")
return []
x, y, w, h = region
crop = screenshot[y:y+h, x:x+w]
# Preprocess for better OCR
gray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY)
try:
result = self.ocr.ocr(thresh, cls=False)
detected = []
if result and result[0]:
for line in result[0]:
bbox, (text, confidence) = line
detected.append(DetectedText(
text=text.strip(),
confidence=confidence,
region=(x, y, w, h) # Simplified region
))
return detected
except Exception as e:
logger.error(f"OCR failed: {e}")
return []
def detect_equipped_weapon(self, screenshot: Optional[np.ndarray] = None) -> Optional[str]:
"""Detect currently equipped weapon name."""
if screenshot is None:
screenshot = self.capture.capture_full_screen()
# Find weapon slot region using template matching
region = self.template_matcher.find_template(screenshot, 'weapon_slot')
if not region:
logger.debug("Weapon slot not found")
return None
# Adjust region to focus on text area
x, y, w, h = region
text_region = (x, y + h, w, 20) # Below the icon
# Read text
texts = self.read_text_region(screenshot, text_region)
if texts:
best = max(texts, key=lambda x: x.confidence)
if best.confidence > 0.7:
return best.text
return None
def detect_equipped_armor(self, screenshot: Optional[np.ndarray] = None) -> Optional[str]:
"""Detect currently equipped armor name."""
if screenshot is None:
screenshot = self.capture.capture_full_screen()
region = self.template_matcher.find_template(screenshot, 'armor_slot')
if not region:
return None
texts = self.read_text_region(screenshot, region)
if texts:
best = max(texts, key=lambda x: x.confidence)
if best.confidence > 0.7:
return best.text
return None
def detect_target_mob(self, screenshot: Optional[np.ndarray] = None) -> Optional[TargetInfo]:
"""Detect current target mob name."""
if screenshot is None:
screenshot = self.capture.capture_full_screen()
region = self.template_matcher.find_template(screenshot, 'target_window')
if not region:
return None
texts = self.read_text_region(screenshot, region)
if texts:
# First text is usually the mob name
best = texts[0]
if best.confidence > 0.6:
return TargetInfo(
mob_name=best.text,
confidence=best.confidence,
detected_at=time.time()
)
return None
def scan_equipped_gear(self) -> EquippedGear:
"""Full scan of all equipped gear."""
screenshot = self.capture.capture_full_screen()
gear = EquippedGear(detected_at=time.time())
weapon = self.detect_equipped_weapon(screenshot)
if weapon:
gear.weapon_name = weapon
gear.weapon_confidence = 0.8 # Placeholder
armor = self.detect_equipped_armor(screenshot)
if armor:
gear.armor_name = armor
gear.armor_confidence = 0.8
self.last_equipped = gear
return gear
def poll_target(self, interval: float = 2.0) -> Optional[TargetInfo]:
"""Poll for target changes."""
current_time = time.time()
if (self.last_target and
self.last_target.detected_at and
current_time - self.last_target.detected_at < interval):
return self.last_target
target = self.detect_target_mob()
if target:
self.last_target = target
return target
class TemplateCaptureTool:
"""
Interactive tool for capturing UI templates.
Usage: Run this to create template images for UI elements.
"""
def __init__(self):
self.capture = ScreenCapture()
self.templates_dir = Path.home() / ".lemontropia" / "templates"
self.templates_dir.mkdir(parents=True, exist_ok=True)
def capture_template(self, name: str, region: Tuple[int, int, int, int]):
"""Capture and save a template."""
x, y, w, h = region
img = self.capture.capture_region(x, y, w, h)
filepath = self.templates_dir / f"{name}.png"
cv2.imwrite(str(filepath), img)
logger.info(f"Template saved: {filepath}")
return filepath
def interactive_capture(self):
"""Interactive template capture."""
print("Template Capture Tool")
print("=" * 50)
print("1. Position your mouse at top-left of UI element")
print("2. Press SPACE to capture")
print("3. Enter template name")
print("4. Repeat for all templates")
print("=" * 50)
templates_to_capture = [
'weapon_slot',
'armor_slot',
'target_window',
'health_bar',
'inventory_icon',
]
for template_name in templates_to_capture:
input(f"\nReady to capture: {template_name}")
print("Taking screenshot in 2 seconds...")
time.sleep(2)
# Full screenshot
full = self.capture.capture_full_screen()
# TODO: Allow user to draw region
# For now, use hardcoded regions based on typical EU layout
print(f"Template {template_name} would be captured here")
# Export main classes
__all__ = ['GameVision', 'ScreenCapture', 'TemplateMatcher', 'EquippedGear', 'TargetInfo', 'TemplateCaptureTool']