723 lines
24 KiB
Python
723 lines
24 KiB
Python
"""
|
|
Lemontropia Suite - Game Vision AI Module
|
|
Advanced computer vision with multiple OCR backends and GPU acceleration.
|
|
|
|
OCR Backends (in priority order):
|
|
1. OpenCV EAST - Fastest, no dependencies (primary fallback)
|
|
2. EasyOCR - Good accuracy, lighter than PaddleOCR
|
|
3. Tesseract OCR - Traditional, stable
|
|
4. PaddleOCR - Best accuracy (requires working PyTorch)
|
|
|
|
Handles PyTorch DLL errors on Windows Store Python gracefully.
|
|
"""
|
|
|
|
import cv2
|
|
import numpy as np
|
|
import logging
|
|
import time
|
|
from pathlib import Path
|
|
from dataclasses import dataclass, field
|
|
from typing import Optional, Tuple, List, Dict, Any, Union
|
|
from enum import Enum
|
|
import json
|
|
import hashlib
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Import hardware detection
|
|
from .hardware_detection import (
|
|
HardwareDetector, HardwareInfo, GPUBackend,
|
|
recommend_ocr_backend, get_hardware_info
|
|
)
|
|
|
|
# Import OCR backends
|
|
from .ocr_backends import (
|
|
BaseOCRBackend, OCRTextRegion, OCRBackendInfo,
|
|
OCRBackendFactory
|
|
)
|
|
|
|
|
|
@dataclass
|
|
class TextRegion:
|
|
"""Detected text region with metadata."""
|
|
text: str
|
|
confidence: float
|
|
bbox: Tuple[int, int, int, int] # x, y, w, h
|
|
language: str = "en"
|
|
backend: str = "unknown" # Which OCR backend detected this
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
return {
|
|
'text': self.text,
|
|
'confidence': self.confidence,
|
|
'bbox': self.bbox,
|
|
'language': self.language,
|
|
'backend': self.backend
|
|
}
|
|
|
|
@classmethod
|
|
def from_ocr_region(cls, region: OCRTextRegion, backend: str = "unknown"):
|
|
"""Create from OCR backend region."""
|
|
return cls(
|
|
text=region.text,
|
|
confidence=region.confidence,
|
|
bbox=region.bbox,
|
|
language=region.language,
|
|
backend=backend
|
|
)
|
|
|
|
|
|
@dataclass
|
|
class IconRegion:
|
|
"""Detected icon region with metadata."""
|
|
image: np.ndarray
|
|
bbox: Tuple[int, int, int, int] # x, y, w, h
|
|
confidence: float
|
|
icon_hash: str = ""
|
|
|
|
def __post_init__(self):
|
|
if not self.icon_hash:
|
|
self.icon_hash = self._compute_hash()
|
|
|
|
def _compute_hash(self) -> str:
|
|
"""Compute perceptual hash of icon."""
|
|
if self.image is None or self.image.size == 0:
|
|
return ""
|
|
# Resize to standard size and compute average hash
|
|
small = cv2.resize(self.image, (16, 16), interpolation=cv2.INTER_AREA)
|
|
gray = cv2.cvtColor(small, cv2.COLOR_BGR2GRAY) if len(small.shape) == 3 else small
|
|
avg = gray.mean()
|
|
hash_bits = (gray > avg).flatten()
|
|
return ''.join(['1' if b else '0' for b in hash_bits])
|
|
|
|
|
|
@dataclass
|
|
class ItemMatch:
|
|
"""Result of matching an icon to database."""
|
|
name: str
|
|
confidence: float
|
|
item_id: Optional[str] = None
|
|
category: Optional[str] = None
|
|
matched_hash: str = ""
|
|
|
|
|
|
@dataclass
|
|
class VisionResult:
|
|
"""Complete vision processing result."""
|
|
text_regions: List[TextRegion] = field(default_factory=list)
|
|
icon_regions: List[IconRegion] = field(default_factory=list)
|
|
processing_time_ms: float = 0.0
|
|
gpu_backend: str = "cpu"
|
|
ocr_backend: str = "unknown"
|
|
timestamp: float = field(default_factory=time.time)
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
return {
|
|
'text_regions': [t.to_dict() for t in self.text_regions],
|
|
'icon_count': len(self.icon_regions),
|
|
'processing_time_ms': self.processing_time_ms,
|
|
'gpu_backend': self.gpu_backend,
|
|
'ocr_backend': self.ocr_backend,
|
|
'timestamp': self.timestamp
|
|
}
|
|
|
|
|
|
class GPUDetector:
|
|
"""Detect and manage GPU availability."""
|
|
|
|
@staticmethod
|
|
def detect_backend() -> GPUBackend:
|
|
"""Detect best available GPU backend."""
|
|
info = HardwareDetector.detect_all()
|
|
return info.gpu_backend
|
|
|
|
@staticmethod
|
|
def get_gpu_info() -> Dict[str, Any]:
|
|
"""Get detailed GPU information."""
|
|
info = HardwareDetector.detect_all()
|
|
return info.to_dict()
|
|
|
|
|
|
class UnifiedOCRProcessor:
|
|
"""
|
|
Unified OCR processor with multiple backend support.
|
|
|
|
Automatically selects the best available backend based on:
|
|
1. Hardware capabilities
|
|
2. PyTorch DLL compatibility
|
|
3. User preferences
|
|
|
|
Gracefully falls through backends if one fails.
|
|
"""
|
|
|
|
SUPPORTED_LANGUAGES = ['en', 'sv', 'latin', 'de', 'fr', 'es']
|
|
|
|
# Default priority (can be overridden)
|
|
DEFAULT_PRIORITY = [
|
|
'paddleocr', # Best accuracy if available
|
|
'easyocr', # Good balance
|
|
'tesseract', # Stable fallback
|
|
'opencv_east', # Fastest, always works
|
|
]
|
|
|
|
def __init__(self, use_gpu: bool = True, lang: str = 'en',
|
|
backend_priority: Optional[List[str]] = None,
|
|
auto_select: bool = True):
|
|
"""
|
|
Initialize Unified OCR Processor.
|
|
|
|
Args:
|
|
use_gpu: Enable GPU acceleration if available
|
|
lang: Language for OCR ('en', 'sv', 'latin', etc.)
|
|
backend_priority: Custom backend priority order
|
|
auto_select: Automatically select best backend
|
|
"""
|
|
self.use_gpu = use_gpu
|
|
self.lang = lang if lang in self.SUPPORTED_LANGUAGES else 'en'
|
|
self.backend_priority = backend_priority or self.DEFAULT_PRIORITY
|
|
|
|
self._backend: Optional[BaseOCRBackend] = None
|
|
self._backend_name: str = "unknown"
|
|
self._hardware_info: HardwareInfo = HardwareDetector.detect_all()
|
|
|
|
# Initialize
|
|
if auto_select:
|
|
self._auto_select_backend()
|
|
|
|
logger.info(f"UnifiedOCR initialized with backend: {self._backend_name}")
|
|
|
|
def _auto_select_backend(self):
|
|
"""Automatically select the best available backend."""
|
|
# Check for PyTorch DLL errors first
|
|
if self._hardware_info.pytorch_dll_error:
|
|
logger.warning(
|
|
"PyTorch DLL error detected - avoiding PyTorch-based backends"
|
|
)
|
|
# Remove PyTorch-dependent backends from priority
|
|
safe_backends = [
|
|
b for b in self.backend_priority
|
|
if b not in ['paddleocr', 'easyocr']
|
|
]
|
|
else:
|
|
safe_backends = self.backend_priority
|
|
|
|
# Get recommended backend
|
|
recommended = HardwareDetector.recommend_ocr_backend()
|
|
|
|
# Try to create backend
|
|
for name in safe_backends:
|
|
backend = OCRBackendFactory.create_backend(
|
|
name,
|
|
use_gpu=self.use_gpu,
|
|
lang=self.lang
|
|
)
|
|
|
|
if backend is not None and backend.is_available():
|
|
self._backend = backend
|
|
self._backend_name = name
|
|
logger.info(f"Selected OCR backend: {name}")
|
|
return
|
|
|
|
# Ultimate fallback - OpenCV EAST always works
|
|
logger.warning("All preferred backends failed, trying OpenCV EAST...")
|
|
backend = OCRBackendFactory.create_backend(
|
|
'opencv_east',
|
|
use_gpu=self.use_gpu,
|
|
lang=self.lang
|
|
)
|
|
|
|
if backend is not None and backend.is_available():
|
|
self._backend = backend
|
|
self._backend_name = 'opencv_east'
|
|
logger.info("Using OpenCV EAST as ultimate fallback")
|
|
else:
|
|
logger.error("CRITICAL: No OCR backend available!")
|
|
|
|
def set_backend(self, name: str) -> bool:
|
|
"""
|
|
Manually set OCR backend.
|
|
|
|
Args:
|
|
name: Backend name ('paddleocr', 'easyocr', 'tesseract', 'opencv_east')
|
|
|
|
Returns:
|
|
True if successful
|
|
"""
|
|
backend = OCRBackendFactory.create_backend(
|
|
name,
|
|
use_gpu=self.use_gpu,
|
|
lang=self.lang
|
|
)
|
|
|
|
if backend is not None and backend.is_available():
|
|
self._backend = backend
|
|
self._backend_name = name
|
|
logger.info(f"Switched to OCR backend: {name}")
|
|
return True
|
|
else:
|
|
logger.error(f"Failed to switch to OCR backend: {name}")
|
|
return False
|
|
|
|
def extract_text(self, image: Union[str, np.ndarray, Path]) -> List[TextRegion]:
|
|
"""
|
|
Extract text from image using selected backend.
|
|
|
|
Args:
|
|
image: Image path or numpy array
|
|
|
|
Returns:
|
|
List of detected text regions
|
|
"""
|
|
# Load image if path provided
|
|
if isinstance(image, (str, Path)):
|
|
img = cv2.imread(str(image))
|
|
if img is None:
|
|
logger.error(f"Failed to load image: {image}")
|
|
return []
|
|
else:
|
|
img = image.copy()
|
|
|
|
# Check backend
|
|
if self._backend is None:
|
|
logger.error("No OCR backend available")
|
|
return []
|
|
|
|
try:
|
|
# Extract text using backend
|
|
ocr_regions = self._backend.extract_text(img)
|
|
|
|
# Convert to TextRegion with backend info
|
|
regions = [
|
|
TextRegion.from_ocr_region(r, self._backend_name)
|
|
for r in ocr_regions
|
|
]
|
|
|
|
logger.debug(f"Extracted {len(regions)} text regions using {self._backend_name}")
|
|
return regions
|
|
|
|
except Exception as e:
|
|
logger.error(f"OCR extraction failed: {e}")
|
|
return []
|
|
|
|
def extract_text_from_region(self, image: np.ndarray,
|
|
region: Tuple[int, int, int, int]) -> List[TextRegion]:
|
|
"""Extract text from specific region of image."""
|
|
x, y, w, h = region
|
|
roi = image[y:y+h, x:x+w]
|
|
|
|
if roi.size == 0:
|
|
return []
|
|
|
|
regions = self.extract_text(roi)
|
|
|
|
# Adjust coordinates back to original image
|
|
for r in regions:
|
|
rx, ry, rw, rh = r.bbox
|
|
r.bbox = (x + rx, y + ry, rw, rh)
|
|
|
|
return regions
|
|
|
|
def get_available_backends(self) -> List[OCRBackendInfo]:
|
|
"""Get information about all available backends."""
|
|
return OCRBackendFactory.check_all_backends(self.use_gpu, self.lang)
|
|
|
|
def get_current_backend(self) -> str:
|
|
"""Get name of current backend."""
|
|
return self._backend_name
|
|
|
|
def get_backend_info(self) -> Dict[str, Any]:
|
|
"""Get information about current backend."""
|
|
if self._backend:
|
|
return self._backend.get_info().to_dict()
|
|
return {"error": "No backend initialized"}
|
|
|
|
def is_recognition_supported(self) -> bool:
|
|
"""
|
|
Check if current backend supports text recognition.
|
|
|
|
Note: OpenCV EAST only detects text regions, doesn't recognize text.
|
|
"""
|
|
return self._backend_name not in ['opencv_east']
|
|
|
|
|
|
# Legacy class for backward compatibility
|
|
class OCRProcessor(UnifiedOCRProcessor):
|
|
"""Legacy OCR processor - now wraps UnifiedOCRProcessor."""
|
|
pass
|
|
|
|
|
|
class IconDetector:
|
|
"""Detect and extract item icons from game UI."""
|
|
|
|
# Typical Entropia Universe loot window icon sizes
|
|
ICON_SIZES = {
|
|
'small': (32, 32),
|
|
'medium': (48, 48),
|
|
'large': (64, 64),
|
|
'hud': (40, 40)
|
|
}
|
|
|
|
def __init__(self, template_dir: Optional[Path] = None):
|
|
self.template_dir = template_dir or Path(__file__).parent / "templates" / "icons"
|
|
self.templates: Dict[str, np.ndarray] = {}
|
|
self._load_templates()
|
|
|
|
def _load_templates(self):
|
|
"""Load icon templates for matching."""
|
|
if not self.template_dir.exists():
|
|
logger.warning(f"Template directory not found: {self.template_dir}")
|
|
return
|
|
|
|
for template_file in self.template_dir.glob("*.png"):
|
|
try:
|
|
name = template_file.stem
|
|
template = cv2.imread(str(template_file), cv2.IMREAD_COLOR)
|
|
if template is not None:
|
|
self.templates[name] = template
|
|
logger.debug(f"Loaded icon template: {name}")
|
|
except Exception as e:
|
|
logger.error(f"Failed to load template {template_file}: {e}")
|
|
|
|
def detect_loot_window(self, image: np.ndarray) -> Optional[Tuple[int, int, int, int]]:
|
|
"""Detect loot window in screenshot."""
|
|
# Look for common loot window indicators
|
|
if 'loot_window' in self.templates:
|
|
result = cv2.matchTemplate(
|
|
image, self.templates['loot_window'], cv2.TM_CCOEFF_NORMED
|
|
)
|
|
_, max_val, _, max_loc = cv2.minMaxLoc(result)
|
|
if max_val > 0.7:
|
|
h, w = self.templates['loot_window'].shape[:2]
|
|
return (*max_loc, w, h)
|
|
|
|
# Method 2: Detect based on typical loot window characteristics
|
|
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
|
_, thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
|
|
|
|
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
|
|
|
# Filter for icon-sized squares
|
|
potential_icons = []
|
|
for cnt in contours:
|
|
x, y, w, h = cv2.boundingRect(cnt)
|
|
aspect = w / h if h > 0 else 0
|
|
|
|
for size_name, (sw, sh) in self.ICON_SIZES.items():
|
|
if abs(w - sw) < 5 and abs(h - sh) < 5 and 0.8 < aspect < 1.2:
|
|
potential_icons.append((x, y, w, h))
|
|
break
|
|
|
|
# If we found multiple icons in a grid pattern, assume loot window
|
|
if len(potential_icons) >= 2:
|
|
xs = [p[0] for p in potential_icons]
|
|
ys = [p[1] for p in potential_icons]
|
|
ws = [p[2] for p in potential_icons]
|
|
hs = [p[3] for p in potential_icons]
|
|
|
|
min_x, max_x = min(xs), max(xs) + max(ws)
|
|
min_y, max_y = min(ys), max(ys) + max(hs)
|
|
|
|
padding = 20
|
|
return (
|
|
max(0, min_x - padding),
|
|
max(0, min_y - padding),
|
|
max_x - min_x + padding * 2,
|
|
max_y - min_y + padding * 2
|
|
)
|
|
|
|
return None
|
|
|
|
def extract_icons_from_region(self, image: np.ndarray,
|
|
region: Tuple[int, int, int, int],
|
|
icon_size: str = 'medium') -> List[IconRegion]:
|
|
"""Extract icons from a specific region."""
|
|
x, y, w, h = region
|
|
roi = image[y:y+h, x:x+w]
|
|
|
|
if roi.size == 0:
|
|
return []
|
|
|
|
target_size = self.ICON_SIZES.get(icon_size, (48, 48))
|
|
gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
|
|
|
|
icons = []
|
|
thresholds = [(200, 255), (180, 255), (150, 255)]
|
|
|
|
for thresh_low, thresh_high in thresholds:
|
|
_, thresh = cv2.threshold(gray, thresh_low, thresh_high, cv2.THRESH_BINARY)
|
|
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
|
|
|
for cnt in contours:
|
|
cx, cy, cw, ch = cv2.boundingRect(cnt)
|
|
aspect = cw / ch if ch > 0 else 0
|
|
|
|
if (abs(cw - target_size[0]) < 8 and
|
|
abs(ch - target_size[1]) < 8 and
|
|
0.7 < aspect < 1.3):
|
|
|
|
icon_img = roi[cy:cy+ch, cx:cx+cw]
|
|
icon_img = cv2.resize(icon_img, target_size, interpolation=cv2.INTER_AREA)
|
|
|
|
icons.append(IconRegion(
|
|
image=icon_img,
|
|
bbox=(x + cx, y + cy, cw, ch),
|
|
confidence=0.8
|
|
))
|
|
|
|
# Remove duplicates
|
|
unique_icons = self._remove_duplicate_icons(icons)
|
|
|
|
return unique_icons
|
|
|
|
def _remove_duplicate_icons(self, icons: List[IconRegion],
|
|
iou_threshold: float = 0.5) -> List[IconRegion]:
|
|
"""Remove duplicate icons based on IoU."""
|
|
if not icons:
|
|
return []
|
|
|
|
sorted_icons = sorted(icons, key=lambda x: x.confidence, reverse=True)
|
|
|
|
kept = []
|
|
for icon in sorted_icons:
|
|
is_duplicate = False
|
|
for kept_icon in kept:
|
|
if self._calculate_iou(icon.bbox, kept_icon.bbox) > iou_threshold:
|
|
is_duplicate = True
|
|
break
|
|
if not is_duplicate:
|
|
kept.append(icon)
|
|
|
|
return kept
|
|
|
|
def _calculate_iou(self, box1: Tuple[int, int, int, int],
|
|
box2: Tuple[int, int, int, int]) -> float:
|
|
"""Calculate Intersection over Union."""
|
|
x1, y1, w1, h1 = box1
|
|
x2, y2, w2, h2 = box2
|
|
|
|
xi1 = max(x1, x2)
|
|
yi1 = max(y1, y2)
|
|
xi2 = min(x1 + w1, x2 + w2)
|
|
yi2 = min(y1 + h1, y2 + h2)
|
|
|
|
inter_area = max(0, xi2 - xi1) * max(0, yi2 - yi1)
|
|
box1_area = w1 * h1
|
|
box2_area = w2 * h2
|
|
|
|
union_area = box1_area + box2_area - inter_area
|
|
|
|
return inter_area / union_area if union_area > 0 else 0
|
|
|
|
|
|
class GameVisionAI:
|
|
"""
|
|
Main AI vision interface for game screenshot analysis.
|
|
Combines OCR and icon detection with multiple backend support.
|
|
"""
|
|
|
|
def __init__(self, use_gpu: bool = True, ocr_lang: str = 'en',
|
|
ocr_backend: Optional[str] = None,
|
|
data_dir: Optional[Path] = None):
|
|
"""
|
|
Initialize Game Vision AI.
|
|
|
|
Args:
|
|
use_gpu: Enable GPU acceleration if available
|
|
ocr_lang: Language for OCR
|
|
ocr_backend: Specific OCR backend to use (None for auto)
|
|
data_dir: Directory for storing extracted data
|
|
"""
|
|
self.use_gpu = use_gpu
|
|
self.data_dir = data_dir or Path.home() / ".lemontropia"
|
|
self.extracted_icons_dir = self.data_dir / "extracted_icons"
|
|
self.extracted_icons_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Detect hardware
|
|
self.hardware_info = HardwareDetector.detect_all()
|
|
self.backend = self.hardware_info.gpu_backend
|
|
|
|
# Initialize OCR processor
|
|
self.ocr = UnifiedOCRProcessor(
|
|
use_gpu=use_gpu,
|
|
lang=ocr_lang,
|
|
auto_select=(ocr_backend is None)
|
|
)
|
|
|
|
# Set specific backend if requested
|
|
if ocr_backend:
|
|
self.ocr.set_backend(ocr_backend)
|
|
|
|
# Initialize icon detector
|
|
self.icon_detector = IconDetector()
|
|
|
|
logger.info(f"GameVisionAI initialized (GPU: {self.backend.value}, "
|
|
f"OCR: {self.ocr.get_current_backend()})")
|
|
|
|
def extract_text_from_image(self, image_path: Union[str, Path]) -> List[TextRegion]:
|
|
"""Extract all text from an image."""
|
|
return self.ocr.extract_text(image_path)
|
|
|
|
def extract_icons_from_image(self, image_path: Union[str, Path],
|
|
auto_detect_window: bool = True) -> List[IconRegion]:
|
|
"""Extract item icons from image."""
|
|
image = cv2.imread(str(image_path))
|
|
if image is None:
|
|
logger.error(f"Failed to load image: {image_path}")
|
|
return []
|
|
|
|
if auto_detect_window:
|
|
window_region = self.icon_detector.detect_loot_window(image)
|
|
if window_region:
|
|
logger.debug(f"Detected loot window: {window_region}")
|
|
return self.icon_detector.extract_icons_from_region(
|
|
image, window_region
|
|
)
|
|
else:
|
|
logger.debug("No loot window detected, scanning full image")
|
|
h, w = image.shape[:2]
|
|
return self.icon_detector.extract_icons_from_region(
|
|
image, (0, 0, w, h)
|
|
)
|
|
else:
|
|
h, w = image.shape[:2]
|
|
return self.icon_detector.extract_icons_from_region(
|
|
image, (0, 0, w, h)
|
|
)
|
|
|
|
def process_screenshot(self, image_path: Union[str, Path],
|
|
extract_text: bool = True,
|
|
extract_icons: bool = True) -> VisionResult:
|
|
"""
|
|
Process screenshot with all vision capabilities.
|
|
|
|
Args:
|
|
image_path: Path to screenshot
|
|
extract_text: Enable text extraction
|
|
extract_icons: Enable icon extraction
|
|
|
|
Returns:
|
|
VisionResult with all detections
|
|
"""
|
|
start_time = time.time()
|
|
|
|
result = VisionResult(
|
|
gpu_backend=self.backend.value,
|
|
ocr_backend=self.ocr.get_current_backend()
|
|
)
|
|
|
|
# Load image once
|
|
image = cv2.imread(str(image_path))
|
|
if image is None:
|
|
logger.error(f"Failed to load image: {image_path}")
|
|
return result
|
|
|
|
# Extract text
|
|
if extract_text:
|
|
result.text_regions = self.ocr.extract_text(image)
|
|
logger.debug(f"Extracted {len(result.text_regions)} text regions")
|
|
|
|
# Extract icons
|
|
if extract_icons:
|
|
result.icon_regions = self.extract_icons_from_image(image_path)
|
|
logger.debug(f"Extracted {len(result.icon_regions)} icons")
|
|
|
|
# Save extracted icons
|
|
self._save_extracted_icons(result.icon_regions)
|
|
|
|
result.processing_time_ms = (time.time() - start_time) * 1000
|
|
|
|
return result
|
|
|
|
def _save_extracted_icons(self, icons: List[IconRegion]):
|
|
"""Save extracted icons to disk."""
|
|
for i, icon in enumerate(icons):
|
|
filename = f"icon_{icon.icon_hash[:16]}_{int(time.time())}_{i}.png"
|
|
filepath = self.extracted_icons_dir / filename
|
|
cv2.imwrite(str(filepath), icon.image)
|
|
logger.debug(f"Saved icon: {filepath}")
|
|
|
|
def get_gpu_info(self) -> Dict[str, Any]:
|
|
"""Get GPU information."""
|
|
return self.hardware_info.to_dict()
|
|
|
|
def is_gpu_available(self) -> bool:
|
|
"""Check if GPU acceleration is available."""
|
|
return self.backend != GPUBackend.CPU
|
|
|
|
def get_ocr_backends(self) -> List[Dict[str, Any]]:
|
|
"""Get information about all available OCR backends."""
|
|
backends = self.ocr.get_available_backends()
|
|
return [b.to_dict() for b in backends]
|
|
|
|
def switch_ocr_backend(self, name: str) -> bool:
|
|
"""Switch to a different OCR backend."""
|
|
return self.ocr.set_backend(name)
|
|
|
|
def calibrate_for_game(self, sample_screenshots: List[Path]) -> Dict[str, Any]:
|
|
"""Calibrate vision system using sample screenshots."""
|
|
calibration = {
|
|
'screenshots_processed': 0,
|
|
'text_regions_detected': 0,
|
|
'icons_detected': 0,
|
|
'average_processing_time_ms': 0,
|
|
'detected_regions': {},
|
|
'ocr_backend': self.ocr.get_current_backend(),
|
|
'gpu_backend': self.backend.value,
|
|
}
|
|
|
|
total_time = 0
|
|
|
|
for screenshot_path in sample_screenshots:
|
|
try:
|
|
start = time.time()
|
|
result = self.process_screenshot(screenshot_path)
|
|
elapsed = (time.time() - start) * 1000
|
|
|
|
calibration['screenshots_processed'] += 1
|
|
calibration['text_regions_detected'] += len(result.text_regions)
|
|
calibration['icons_detected'] += len(result.icon_regions)
|
|
total_time += elapsed
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to process {screenshot_path}: {e}")
|
|
|
|
if calibration['screenshots_processed'] > 0:
|
|
calibration['average_processing_time_ms'] = (
|
|
total_time / calibration['screenshots_processed']
|
|
)
|
|
|
|
return calibration
|
|
|
|
@staticmethod
|
|
def diagnose() -> Dict[str, Any]:
|
|
"""Run full diagnostic on vision system."""
|
|
return {
|
|
'hardware': HardwareDetector.detect_all().to_dict(),
|
|
'ocr_backends': [
|
|
b.to_dict() for b in
|
|
OCRBackendFactory.check_all_backends()
|
|
],
|
|
'recommendations': {
|
|
'ocr_backend': HardwareDetector.recommend_ocr_backend(),
|
|
'gpu': GPUDetector.detect_backend().value,
|
|
}
|
|
}
|
|
|
|
|
|
# Export main classes
|
|
__all__ = [
|
|
'GameVisionAI',
|
|
'UnifiedOCRProcessor',
|
|
'OCRProcessor', # Legacy
|
|
'TextRegion',
|
|
'IconRegion',
|
|
'ItemMatch',
|
|
'VisionResult',
|
|
'GPUBackend',
|
|
'GPUDetector',
|
|
'IconDetector',
|
|
'HardwareDetector',
|
|
'OCRBackendFactory',
|
|
'BaseOCRBackend',
|
|
]
|