Lemontropia-Suite/modules/game_vision_ai.py

"""
Lemontropia Suite - Game Vision AI Module
Advanced computer vision with multiple OCR backends and GPU acceleration.

OCR Backends (in priority order):
1. OpenCV EAST - Fastest, no dependencies (primary fallback)
2. EasyOCR - Good accuracy, lighter than PaddleOCR
3. Tesseract OCR - Traditional, stable
4. PaddleOCR - Best accuracy (requires working PyTorch)

Handles PyTorch DLL errors on Windows Store Python gracefully.
"""

import cv2
import numpy as np
import logging
import time
from pathlib import Path
from dataclasses import dataclass, field
from typing import Optional, Tuple, List, Dict, Any, Union
from enum import Enum
import json
import hashlib

logger = logging.getLogger(__name__)

# Import hardware detection
from .hardware_detection import (
    HardwareDetector, HardwareInfo, GPUBackend,
    recommend_ocr_backend, get_hardware_info
)

# Import OCR backends
from .ocr_backends import (
    BaseOCRBackend, OCRTextRegion, OCRBackendInfo,
    OCRBackendFactory
)


@dataclass
class TextRegion:
    """Detected text region with metadata."""
    text: str
    confidence: float
    bbox: Tuple[int, int, int, int]  # x, y, w, h
    language: str = "en"
    backend: str = "unknown"  # Which OCR backend detected this

    def to_dict(self) -> Dict[str, Any]:
        return {
            'text': self.text,
            'confidence': self.confidence,
            'bbox': self.bbox,
            'language': self.language,
            'backend': self.backend
        }

    @classmethod
    def from_ocr_region(cls, region: OCRTextRegion, backend: str = "unknown"):
        """Create from OCR backend region."""
        return cls(
            text=region.text,
            confidence=region.confidence,
            bbox=region.bbox,
            language=region.language,
            backend=backend
        )


@dataclass
class IconRegion:
    """Detected icon region with metadata."""
    image: np.ndarray
    bbox: Tuple[int, int, int, int]  # x, y, w, h
    confidence: float
    icon_hash: str = ""

    def __post_init__(self):
        if not self.icon_hash:
            self.icon_hash = self._compute_hash()

    def _compute_hash(self) -> str:
        """Compute perceptual hash of icon."""
        if self.image is None or self.image.size == 0:
            return ""
        # Resize to standard size and compute average hash
        small = cv2.resize(self.image, (16, 16), interpolation=cv2.INTER_AREA)
        gray = cv2.cvtColor(small, cv2.COLOR_BGR2GRAY) if len(small.shape) == 3 else small
        avg = gray.mean()
        hash_bits = (gray > avg).flatten()
        return ''.join(['1' if b else '0' for b in hash_bits])


@dataclass
class ItemMatch:
    """Result of matching an icon to database."""
    name: str
    confidence: float
    item_id: Optional[str] = None
    category: Optional[str] = None
    matched_hash: str = ""


@dataclass
class VisionResult:
    """Complete vision processing result."""
    text_regions: List[TextRegion] = field(default_factory=list)
    icon_regions: List[IconRegion] = field(default_factory=list)
    processing_time_ms: float = 0.0
    gpu_backend: str = "cpu"
    ocr_backend: str = "unknown"
    timestamp: float = field(default_factory=time.time)

    def to_dict(self) -> Dict[str, Any]:
        return {
            'text_regions': [t.to_dict() for t in self.text_regions],
            'icon_count': len(self.icon_regions),
            'processing_time_ms': self.processing_time_ms,
            'gpu_backend': self.gpu_backend,
            'ocr_backend': self.ocr_backend,
            'timestamp': self.timestamp
        }


class GPUDetector:
    """Detect and manage GPU availability."""

    @staticmethod
    def detect_backend() -> GPUBackend:
        """Detect best available GPU backend."""
        info = HardwareDetector.detect_all()
        return info.gpu_backend

    @staticmethod
    def get_gpu_info() -> Dict[str, Any]:
        """Get detailed GPU information."""
        info = HardwareDetector.detect_all()
        return info.to_dict()


class UnifiedOCRProcessor:
    """
    Unified OCR processor with multiple backend support.

    Automatically selects the best available backend based on:
    1. Hardware capabilities
    2. PyTorch DLL compatibility
    3. User preferences

    Gracefully falls through backends if one fails.
    """

    SUPPORTED_LANGUAGES = ['en', 'sv', 'latin', 'de', 'fr', 'es']

    # Default priority (can be overridden)
    DEFAULT_PRIORITY = [
        'paddleocr',   # Best accuracy if available
        'easyocr',     # Good balance
        'tesseract',   # Stable fallback
        'opencv_east', # Fastest, always works
    ]

    def __init__(self, use_gpu: bool = True, lang: str = 'en',
                 backend_priority: Optional[List[str]] = None,
                 auto_select: bool = True):
        """
        Initialize Unified OCR Processor.

        Args:
            use_gpu: Enable GPU acceleration if available
            lang: Language for OCR ('en', 'sv', 'latin', etc.)
            backend_priority: Custom backend priority order
            auto_select: Automatically select best backend
        """
        self.use_gpu = use_gpu
        self.lang = lang if lang in self.SUPPORTED_LANGUAGES else 'en'
        self.backend_priority = backend_priority or self.DEFAULT_PRIORITY

        self._backend: Optional[BaseOCRBackend] = None
        self._backend_name: str = "unknown"
        self._hardware_info: HardwareInfo = HardwareDetector.detect_all()

        # Initialize
        if auto_select:
            self._auto_select_backend()

        logger.info(f"UnifiedOCR initialized with backend: {self._backend_name}")

    def _auto_select_backend(self):
        """Automatically select the best available backend."""
        # Check for PyTorch DLL errors first
        if self._hardware_info.pytorch_dll_error:
            logger.warning(
                "PyTorch DLL error detected - avoiding PyTorch-based backends"
            )
            # Remove PyTorch-dependent backends from priority
            safe_backends = [
                b for b in self.backend_priority
                if b not in ['paddleocr', 'easyocr']
            ]
        else:
            safe_backends = self.backend_priority

        # Get recommended backend
        recommended = HardwareDetector.recommend_ocr_backend()

        # Try to create backend
        for name in safe_backends:
            backend = OCRBackendFactory.create_backend(
                name,
                use_gpu=self.use_gpu,
                lang=self.lang
            )

            if backend is not None and backend.is_available():
                self._backend = backend
                self._backend_name = name
                logger.info(f"Selected OCR backend: {name}")
                return

        # Ultimate fallback - OpenCV EAST always works
        logger.warning("All preferred backends failed, trying OpenCV EAST...")
        backend = OCRBackendFactory.create_backend(
            'opencv_east',
            use_gpu=self.use_gpu,
            lang=self.lang
        )

        if backend is not None and backend.is_available():
            self._backend = backend
            self._backend_name = 'opencv_east'
            logger.info("Using OpenCV EAST as ultimate fallback")
        else:
            logger.error("CRITICAL: No OCR backend available!")

    def set_backend(self, name: str) -> bool:
        """
        Manually set OCR backend.

        Args:
            name: Backend name ('paddleocr', 'easyocr', 'tesseract', 'opencv_east')

        Returns:
            True if successful
        """
        backend = OCRBackendFactory.create_backend(
            name,
            use_gpu=self.use_gpu,
            lang=self.lang
        )

        if backend is not None and backend.is_available():
            self._backend = backend
            self._backend_name = name
            logger.info(f"Switched to OCR backend: {name}")
            return True
        else:
            logger.error(f"Failed to switch to OCR backend: {name}")
            return False

    def extract_text(self, image: Union[str, np.ndarray, Path]) -> List[TextRegion]:
        """
        Extract text from image using selected backend.

        Args:
            image: Image path or numpy array

        Returns:
            List of detected text regions
        """
        # Load image if path provided
        if isinstance(image, (str, Path)):
            img = cv2.imread(str(image))
            if img is None:
                logger.error(f"Failed to load image: {image}")
                return []
        else:
            img = image.copy()

        # Check backend
        if self._backend is None:
            logger.error("No OCR backend available")
            return []

        try:
            # Extract text using backend
            ocr_regions = self._backend.extract_text(img)

            # Convert to TextRegion with backend info
            regions = [
                TextRegion.from_ocr_region(r, self._backend_name)
                for r in ocr_regions
            ]

            logger.debug(f"Extracted {len(regions)} text regions using {self._backend_name}")
            return regions

        except Exception as e:
            logger.error(f"OCR extraction failed: {e}")
            return []

    def extract_text_from_region(self, image: np.ndarray,
                                  region: Tuple[int, int, int, int]) -> List[TextRegion]:
        """Extract text from specific region of image."""
        x, y, w, h = region
        roi = image[y:y+h, x:x+w]

        if roi.size == 0:
            return []

        regions = self.extract_text(roi)

        # Adjust coordinates back to original image
        for r in regions:
            rx, ry, rw, rh = r.bbox
            r.bbox = (x + rx, y + ry, rw, rh)

        return regions

    def get_available_backends(self) -> List[OCRBackendInfo]:
        """Get information about all available backends."""
        return OCRBackendFactory.check_all_backends(self.use_gpu, self.lang)

    def get_current_backend(self) -> str:
        """Get name of current backend."""
        return self._backend_name

    def get_backend_info(self) -> Dict[str, Any]:
        """Get information about current backend."""
        if self._backend:
            return self._backend.get_info().to_dict()
        return {"error": "No backend initialized"}

    def is_recognition_supported(self) -> bool:
        """
        Check if current backend supports text recognition.

        Note: OpenCV EAST only detects text regions, doesn't recognize text.
        """
        return self._backend_name not in ['opencv_east']


# Legacy class for backward compatibility
class OCRProcessor(UnifiedOCRProcessor):
    """Legacy OCR processor - now wraps UnifiedOCRProcessor."""
    pass


class IconDetector:
    """Detect and extract item icons from game UI."""

    # Typical Entropia Universe loot window icon sizes
    ICON_SIZES = {
        'small': (32, 32),
        'medium': (48, 48),
        'large': (64, 64),
        'hud': (40, 40)
    }

    def __init__(self, template_dir: Optional[Path] = None):
        self.template_dir = template_dir or Path(__file__).parent / "templates" / "icons"
        self.templates: Dict[str, np.ndarray] = {}
        self._load_templates()

    def _load_templates(self):
        """Load icon templates for matching."""
        if not self.template_dir.exists():
            logger.warning(f"Template directory not found: {self.template_dir}")
            return

        for template_file in self.template_dir.glob("*.png"):
            try:
                name = template_file.stem
                template = cv2.imread(str(template_file), cv2.IMREAD_COLOR)
                if template is not None:
                    self.templates[name] = template
                    logger.debug(f"Loaded icon template: {name}")
            except Exception as e:
                logger.error(f"Failed to load template {template_file}: {e}")

    def detect_loot_window(self, image: np.ndarray) -> Optional[Tuple[int, int, int, int]]:
        """Detect loot window in screenshot."""
        # Look for common loot window indicators
        if 'loot_window' in self.templates:
            result = cv2.matchTemplate(
                image, self.templates['loot_window'], cv2.TM_CCOEFF_NORMED
            )
            _, max_val, _, max_loc = cv2.minMaxLoc(result)
            if max_val > 0.7:
                h, w = self.templates['loot_window'].shape[:2]
                return (*max_loc, w, h)

        # Method 2: Detect based on typical loot window characteristics
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        _, thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)

        contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        # Filter for icon-sized squares
        potential_icons = []
        for cnt in contours:
            x, y, w, h = cv2.boundingRect(cnt)
            aspect = w / h if h > 0 else 0

            for size_name, (sw, sh) in self.ICON_SIZES.items():
                if abs(w - sw) < 5 and abs(h - sh) < 5 and 0.8 < aspect < 1.2:
                    potential_icons.append((x, y, w, h))
                    break

        # If we found multiple icons in a grid pattern, assume loot window
        if len(potential_icons) >= 2:
            xs = [p[0] for p in potential_icons]
            ys = [p[1] for p in potential_icons]
            ws = [p[2] for p in potential_icons]
            hs = [p[3] for p in potential_icons]

            min_x, max_x = min(xs), max(xs) + max(ws)
            min_y, max_y = min(ys), max(ys) + max(hs)

            padding = 20
            return (
                max(0, min_x - padding),
                max(0, min_y - padding),
                max_x - min_x + padding * 2,
                max_y - min_y + padding * 2
            )

        return None

    def extract_icons_from_region(self, image: np.ndarray,
                                   region: Tuple[int, int, int, int],
                                   icon_size: str = 'medium') -> List[IconRegion]:
        """Extract icons from a specific region."""
        x, y, w, h = region
        roi = image[y:y+h, x:x+w]

        if roi.size == 0:
            return []

        target_size = self.ICON_SIZES.get(icon_size, (48, 48))
        gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)

        icons = []
        thresholds = [(200, 255), (180, 255), (150, 255)]

        for thresh_low, thresh_high in thresholds:
            _, thresh = cv2.threshold(gray, thresh_low, thresh_high, cv2.THRESH_BINARY)
            contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

            for cnt in contours:
                cx, cy, cw, ch = cv2.boundingRect(cnt)
                aspect = cw / ch if ch > 0 else 0

                if (abs(cw - target_size[0]) < 8 and
                    abs(ch - target_size[1]) < 8 and
                    0.7 < aspect < 1.3):

                    icon_img = roi[cy:cy+ch, cx:cx+cw]
                    icon_img = cv2.resize(icon_img, target_size, interpolation=cv2.INTER_AREA)

                    icons.append(IconRegion(
                        image=icon_img,
                        bbox=(x + cx, y + cy, cw, ch),
                        confidence=0.8
                    ))

        # Remove duplicates
        unique_icons = self._remove_duplicate_icons(icons)

        return unique_icons

    def _remove_duplicate_icons(self, icons: List[IconRegion],
                                 iou_threshold: float = 0.5) -> List[IconRegion]:
        """Remove duplicate icons based on IoU."""
        if not icons:
            return []

        sorted_icons = sorted(icons, key=lambda x: x.confidence, reverse=True)

        kept = []
        for icon in sorted_icons:
            is_duplicate = False
            for kept_icon in kept:
                if self._calculate_iou(icon.bbox, kept_icon.bbox) > iou_threshold:
                    is_duplicate = True
                    break
            if not is_duplicate:
                kept.append(icon)

        return kept

    def _calculate_iou(self, box1: Tuple[int, int, int, int],
                       box2: Tuple[int, int, int, int]) -> float:
        """Calculate Intersection over Union."""
        x1, y1, w1, h1 = box1
        x2, y2, w2, h2 = box2

        xi1 = max(x1, x2)
        yi1 = max(y1, y2)
        xi2 = min(x1 + w1, x2 + w2)
        yi2 = min(y1 + h1, y2 + h2)

        inter_area = max(0, xi2 - xi1) * max(0, yi2 - yi1)
        box1_area = w1 * h1
        box2_area = w2 * h2

        union_area = box1_area + box2_area - inter_area

        return inter_area / union_area if union_area > 0 else 0


class GameVisionAI:
    """
    Main AI vision interface for game screenshot analysis.
    Combines OCR and icon detection with multiple backend support.
    """

    def __init__(self, use_gpu: bool = True, ocr_lang: str = 'en',
                 ocr_backend: Optional[str] = None,
                 data_dir: Optional[Path] = None):
        """
        Initialize Game Vision AI.

        Args:
            use_gpu: Enable GPU acceleration if available
            ocr_lang: Language for OCR
            ocr_backend: Specific OCR backend to use (None for auto)
            data_dir: Directory for storing extracted data
        """
        self.use_gpu = use_gpu
        self.data_dir = data_dir or Path.home() / ".lemontropia"
        self.extracted_icons_dir = self.data_dir / "extracted_icons"
        self.extracted_icons_dir.mkdir(parents=True, exist_ok=True)

        # Detect hardware
        self.hardware_info = HardwareDetector.detect_all()
        self.backend = self.hardware_info.gpu_backend

        # Initialize OCR processor
        self.ocr = UnifiedOCRProcessor(
            use_gpu=use_gpu,
            lang=ocr_lang,
            auto_select=(ocr_backend is None)
        )

        # Set specific backend if requested
        if ocr_backend:
            self.ocr.set_backend(ocr_backend)

        # Initialize icon detector
        self.icon_detector = IconDetector()

        logger.info(f"GameVisionAI initialized (GPU: {self.backend.value}, "
                   f"OCR: {self.ocr.get_current_backend()})")

    def extract_text_from_image(self, image_path: Union[str, Path]) -> List[TextRegion]:
        """Extract all text from an image."""
        return self.ocr.extract_text(image_path)

    def extract_icons_from_image(self, image_path: Union[str, Path],
                                  auto_detect_window: bool = True) -> List[IconRegion]:
        """Extract item icons from image."""
        image = cv2.imread(str(image_path))
        if image is None:
            logger.error(f"Failed to load image: {image_path}")
            return []

        if auto_detect_window:
            window_region = self.icon_detector.detect_loot_window(image)
            if window_region:
                logger.debug(f"Detected loot window: {window_region}")
                return self.icon_detector.extract_icons_from_region(
                    image, window_region
                )
            else:
                logger.debug("No loot window detected, scanning full image")
                h, w = image.shape[:2]
                return self.icon_detector.extract_icons_from_region(
                    image, (0, 0, w, h)
                )
        else:
            h, w = image.shape[:2]
            return self.icon_detector.extract_icons_from_region(
                image, (0, 0, w, h)
            )

    def process_screenshot(self, image_path: Union[str, Path],
                           extract_text: bool = True,
                           extract_icons: bool = True) -> VisionResult:
        """
        Process screenshot with all vision capabilities.

        Args:
            image_path: Path to screenshot
            extract_text: Enable text extraction
            extract_icons: Enable icon extraction

        Returns:
            VisionResult with all detections
        """
        start_time = time.time()

        result = VisionResult(
            gpu_backend=self.backend.value,
            ocr_backend=self.ocr.get_current_backend()
        )

        # Load image once
        image = cv2.imread(str(image_path))
        if image is None:
            logger.error(f"Failed to load image: {image_path}")
            return result

        # Extract text
        if extract_text:
            result.text_regions = self.ocr.extract_text(image)
            logger.debug(f"Extracted {len(result.text_regions)} text regions")

        # Extract icons
        if extract_icons:
            result.icon_regions = self.extract_icons_from_image(image_path)
            logger.debug(f"Extracted {len(result.icon_regions)} icons")

            # Save extracted icons
            self._save_extracted_icons(result.icon_regions)

        result.processing_time_ms = (time.time() - start_time) * 1000

        return result

    def _save_extracted_icons(self, icons: List[IconRegion]):
        """Save extracted icons to disk."""
        for i, icon in enumerate(icons):
            filename = f"icon_{icon.icon_hash[:16]}_{int(time.time())}_{i}.png"
            filepath = self.extracted_icons_dir / filename
            cv2.imwrite(str(filepath), icon.image)
            logger.debug(f"Saved icon: {filepath}")

    def get_gpu_info(self) -> Dict[str, Any]:
        """Get GPU information."""
        return self.hardware_info.to_dict()

    def is_gpu_available(self) -> bool:
        """Check if GPU acceleration is available."""
        return self.backend != GPUBackend.CPU

    def get_ocr_backends(self) -> List[Dict[str, Any]]:
        """Get information about all available OCR backends."""
        backends = self.ocr.get_available_backends()
        return [b.to_dict() for b in backends]

    def switch_ocr_backend(self, name: str) -> bool:
        """Switch to a different OCR backend."""
        return self.ocr.set_backend(name)

    def calibrate_for_game(self, sample_screenshots: List[Path]) -> Dict[str, Any]:
        """Calibrate vision system using sample screenshots."""
        calibration = {
            'screenshots_processed': 0,
            'text_regions_detected': 0,
            'icons_detected': 0,
            'average_processing_time_ms': 0,
            'detected_regions': {},
            'ocr_backend': self.ocr.get_current_backend(),
            'gpu_backend': self.backend.value,
        }

        total_time = 0

        for screenshot_path in sample_screenshots:
            try:
                start = time.time()
                result = self.process_screenshot(screenshot_path)
                elapsed = (time.time() - start) * 1000

                calibration['screenshots_processed'] += 1
                calibration['text_regions_detected'] += len(result.text_regions)
                calibration['icons_detected'] += len(result.icon_regions)
                total_time += elapsed

            except Exception as e:
                logger.error(f"Failed to process {screenshot_path}: {e}")

        if calibration['screenshots_processed'] > 0:
            calibration['average_processing_time_ms'] = (
                total_time / calibration['screenshots_processed']
            )

        return calibration

    @staticmethod
    def diagnose() -> Dict[str, Any]:
        """Run full diagnostic on vision system."""
        return {
            'hardware': HardwareDetector.detect_all().to_dict(),
            'ocr_backends': [
                b.to_dict() for b in
                OCRBackendFactory.check_all_backends()
            ],
            'recommendations': {
                'ocr_backend': HardwareDetector.recommend_ocr_backend(),
                'gpu': GPUDetector.detect_backend().value,
            }
        }


# Export main classes
__all__ = [
    'GameVisionAI',
    'UnifiedOCRProcessor',
    'OCRProcessor',  # Legacy
    'TextRegion',
    'IconRegion',
    'ItemMatch',
    'VisionResult',
    'GPUBackend',
    'GPUDetector',
    'IconDetector',
    'HardwareDetector',
    'OCRBackendFactory',
    'BaseOCRBackend',
]