Lemontropia-Suite/modules/icon_matcher.py

"""
Lemontropia Suite - Icon Matcher Module
Icon similarity matching using multiple algorithms.
Supports perceptual hashing, template matching, and feature-based matching.
"""

import cv2
import numpy as np
import logging
import json
from pathlib import Path
from dataclasses import dataclass, asdict
from typing import Optional, List, Dict, Tuple, Any
import sqlite3
import pickle

logger = logging.getLogger(__name__)


@dataclass
class MatchResult:
    """Icon match result."""
    item_name: str
    confidence: float
    match_method: str
    item_id: Optional[str] = None
    category: Optional[str] = None
    metadata: Dict[str, Any] = None

    def __post_init__(self):
        if self.metadata is None:
            self.metadata = {}


class PerceptualHash:
    """Perceptual hash implementation for icon matching."""

    @staticmethod
    def average_hash(image: np.ndarray, hash_size: int = 16) -> str:
        """Compute average hash (aHash)."""
        # Convert to grayscale
        if len(image.shape) == 3:
            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        else:
            gray = image

        # Resize
        resized = cv2.resize(gray, (hash_size, hash_size), interpolation=cv2.INTER_AREA)

        # Compute average
        avg = resized.mean()

        # Create hash
        hash_bits = (resized > avg).flatten()
        return ''.join(['1' if b else '0' for b in hash_bits])

    @staticmethod
    def difference_hash(image: np.ndarray, hash_size: int = 16) -> str:
        """Compute difference hash (dHash)."""
        if len(image.shape) == 3:
            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        else:
            gray = image

        # Resize (hash_size+1 for horizontal differences)
        resized = cv2.resize(gray, (hash_size + 1, hash_size), interpolation=cv2.INTER_AREA)

        # Compute differences
        diff = resized[:, 1:] > resized[:, :-1]
        return ''.join(['1' if b else '0' for b in diff.flatten()])

    @staticmethod
    def wavelet_hash(image: np.ndarray, hash_size: int = 16) -> str:
        """Compute wavelet hash (wHash) using Haar wavelet."""
        try:
            import pywt
        except ImportError:
            logger.debug("PyWavelets not available, falling back to average hash")
            return PerceptualHash.average_hash(image, hash_size)

        if len(image.shape) == 3:
            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        else:
            gray = image

        # Resize to power of 2
        size = 2 ** (hash_size - 1).bit_length()
        resized = cv2.resize(gray, (size, size), interpolation=cv2.INTER_AREA)

        # Apply Haar wavelet transform
        coeffs = pywt.dwt2(resized, 'haar')
        cA, (cH, cV, cD) = coeffs

        # Use approximation coefficients
        avg = cA.mean()
        hash_bits = (cA > avg).flatten()
        return ''.join(['1' if b else '0' for b in hash_bits])

    @staticmethod
    def hamming_distance(hash1: str, hash2: str) -> int:
        """Calculate Hamming distance between two hashes."""
        if len(hash1) != len(hash2):
            raise ValueError("Hashes must be same length")
        return sum(c1 != c2 for c1, c2 in zip(hash1, hash2))

    @staticmethod
    def similarity(hash1: str, hash2: str) -> float:
        """Calculate similarity between 0 and 1."""
        distance = PerceptualHash.hamming_distance(hash1, hash2)
        max_distance = len(hash1)
        return 1.0 - (distance / max_distance)


class FeatureMatcher:
    """Feature-based icon matching using ORB/SIFT."""

    def __init__(self):
        self.orb = cv2.ORB_create(nfeatures=500)
        self.matcher = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)

    def extract_features(self, image: np.ndarray) -> Tuple[List, np.ndarray]:
        """Extract ORB features from image."""
        if len(image.shape) == 3:
            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        else:
            gray = image

        keypoints, descriptors = self.orb.detectAndCompute(gray, None)
        return keypoints, descriptors

    def match_features(self, desc1: np.ndarray, desc2: np.ndarray,
                       threshold: float = 0.7) -> float:
        """
        Match features between two descriptors.

        Returns confidence score (0-1).
        """
        if desc1 is None or desc2 is None:
            return 0.0

        try:
            matches = self.matcher.match(desc1, desc2)
            matches = sorted(matches, key=lambda x: x.distance)

            # Calculate match ratio
            if len(matches) < 4:
                return 0.0

            # Good matches have distance below threshold
            good_matches = [m for m in matches if m.distance < 50]

            if not good_matches:
                return 0.0

            # Score based on number of good matches vs minimum needed
            score = min(len(good_matches) / 20, 1.0)  # Normalize to 20 matches
            return score

        except Exception as e:
            logger.debug(f"Feature matching failed: {e}")
            return 0.0


class TemplateMatcher:
    """Template matching for icons."""

    @staticmethod
    def match(template: np.ndarray, image: np.ndarray,
              methods: List[int] = None) -> float:
        """
        Match template to image using multiple methods.

        Returns best confidence score.
        """
        if methods is None:
            methods = [
                cv2.TM_CCOEFF_NORMED,
                cv2.TM_CCORR_NORMED,
                cv2.TM_SQDIFF_NORMED
            ]

        # Ensure same size
        h, w = template.shape[:2]
        image = cv2.resize(image, (w, h), interpolation=cv2.INTER_AREA)

        best_score = 0.0

        for method in methods:
            try:
                result = cv2.matchTemplate(image, template, method)
                _, max_val, _, _ = cv2.minMaxLoc(result)

                # Normalize SQDIFF (lower is better)
                if method == cv2.TM_SQDIFF_NORMED:
                    max_val = 1.0 - max_val

                best_score = max(best_score, max_val)
            except Exception as e:
                logger.debug(f"Template matching failed: {e}")
                continue

        return best_score


class IconDatabase:
    """Database for storing and retrieving icon hashes."""

    def __init__(self, db_path: Optional[Path] = None):
        self.db_path = db_path or Path.home() / ".lemontropia" / "icon_database.db"
        self.db_path.parent.mkdir(parents=True, exist_ok=True)
        self._init_database()

    def _init_database(self):
        """Initialize SQLite database."""
        conn = sqlite3.connect(str(self.db_path))
        cursor = conn.cursor()

        cursor.execute('''
            CREATE TABLE IF NOT EXISTS icons (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                item_name TEXT NOT NULL,
                item_id TEXT,
                category TEXT,
                avg_hash TEXT,
                diff_hash TEXT,
                wavelet_hash TEXT,
                features BLOB,
                metadata TEXT,
                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
            )
        ''')

        cursor.execute('''
            CREATE INDEX IF NOT EXISTS idx_avg_hash ON icons(avg_hash)
        ''')

        cursor.execute('''
            CREATE INDEX IF NOT EXISTS idx_item_name ON icons(item_name)
        ''')

        conn.commit()
        conn.close()

    def add_icon(self, item_name: str, image: np.ndarray,
                 item_id: Optional[str] = None,
                 category: Optional[str] = None,
                 metadata: Optional[Dict] = None) -> bool:
        """Add icon to database."""
        try:
            # Compute hashes
            avg_hash = PerceptualHash.average_hash(image)
            diff_hash = PerceptualHash.difference_hash(image)
            wavelet_hash = PerceptualHash.wavelet_hash(image)

            # Extract features
            feature_matcher = FeatureMatcher()
            _, features = feature_matcher.extract_features(image)
            features_blob = pickle.dumps(features) if features is not None else None

            conn = sqlite3.connect(str(self.db_path))
            cursor = conn.cursor()

            cursor.execute('''
                INSERT INTO icons
                (item_name, item_id, category, avg_hash, diff_hash, wavelet_hash, features, metadata)
                VALUES (?, ?, ?, ?, ?, ?, ?, ?)
            ''', (
                item_name, item_id, category,
                avg_hash, diff_hash, wavelet_hash,
                features_blob,
                json.dumps(metadata) if metadata else None
            ))

            conn.commit()
            conn.close()

            logger.debug(f"Added icon to database: {item_name}")
            return True

        except Exception as e:
            logger.error(f"Failed to add icon: {e}")
            return False

    def find_by_hash(self, avg_hash: str, max_distance: int = 10) -> List[Tuple[str, float, Dict]]:
        """Find icons by hash similarity."""
        conn = sqlite3.connect(str(self.db_path))
        cursor = conn.cursor()

        cursor.execute('SELECT item_name, avg_hash, diff_hash, item_id, category, metadata FROM icons')
        results = []

        for row in cursor.fetchall():
            item_name, db_avg_hash, db_diff_hash, item_id, category, metadata_json = row

            # Check average hash similarity
            distance = PerceptualHash.hamming_distance(avg_hash, db_avg_hash)

            if distance <= max_distance:
                similarity = 1.0 - (distance / len(avg_hash))
                metadata = json.loads(metadata_json) if metadata_json else {}
                results.append((item_name, similarity, {
                    'item_id': item_id,
                    'category': category,
                    'metadata': metadata
                }))

        conn.close()

        # Sort by similarity
        results.sort(key=lambda x: x[1], reverse=True)
        return results

    def get_all_icons(self) -> List[Dict]:
        """Get all icons from database."""
        conn = sqlite3.connect(str(self.db_path))
        cursor = conn.cursor()

        cursor.execute('''
            SELECT item_name, item_id, category, avg_hash, metadata
            FROM icons
        ''')

        results = []
        for row in cursor.fetchall():
            results.append({
                'item_name': row[0],
                'item_id': row[1],
                'category': row[2],
                'avg_hash': row[3],
                'metadata': json.loads(row[4]) if row[4] else {}
            })

        conn.close()
        return results

    def get_icon_count(self) -> int:
        """Get total number of icons in database."""
        conn = sqlite3.connect(str(self.db_path))
        cursor = conn.cursor()
        cursor.execute('SELECT COUNT(*) FROM icons')
        count = cursor.fetchone()[0]
        conn.close()
        return count

    def delete_icon(self, item_name: str) -> bool:
        """Delete icon from database."""
        conn = sqlite3.connect(str(self.db_path))
        cursor = conn.cursor()
        cursor.execute('DELETE FROM icons WHERE item_name = ?', (item_name,))
        conn.commit()
        deleted = cursor.rowcount > 0
        conn.close()
        return deleted


class IconMatcher:
    """
    Main icon matching interface.
    Combines multiple matching algorithms for best results.
    """

    # Confidence thresholds
    CONFIDENCE_HIGH = 0.85
    CONFIDENCE_MEDIUM = 0.70
    CONFIDENCE_LOW = 0.50

    def __init__(self, database_path: Optional[Path] = None,
                 icons_dir: Optional[Path] = None):
        """
        Initialize icon matcher.

        Args:
            database_path: Path to icon database
            icons_dir: Directory containing icon images for matching
        """
        self.database = IconDatabase(database_path)
        self.icons_dir = icons_dir or Path.home() / ".lemontropia" / "icons"
        self.feature_matcher = FeatureMatcher()

        # Cache for loaded icons
        self._icon_cache: Dict[str, np.ndarray] = {}

    def match_icon(self, image: np.ndarray,
                   match_methods: List[str] = None) -> Optional[MatchResult]:
        """
        Match an icon image against the database.

        Args:
            image: Icon image (numpy array)
            match_methods: List of methods to use ('hash', 'feature', 'template')

        Returns:
            MatchResult if match found, None otherwise
        """
        if match_methods is None:
            match_methods = ['hash', 'feature', 'template']

        results = []

        # Method 1: Perceptual Hash Matching
        if 'hash' in match_methods:
            hash_result = self._match_by_hash(image)
            if hash_result:
                results.append(hash_result)

        # Method 2: Feature Matching
        if 'feature' in match_methods:
            feature_result = self._match_by_features(image)
            if feature_result:
                results.append(feature_result)

        # Method 3: Template Matching
        if 'template' in match_methods:
            template_result = self._match_by_template(image)
            if template_result:
                results.append(template_result)

        if not results:
            return None

        # Return best match
        best = max(results, key=lambda x: x.confidence)
        return best

    def _match_by_hash(self, image: np.ndarray) -> Optional[MatchResult]:
        """Match using perceptual hashing."""
        avg_hash = PerceptualHash.average_hash(image)

        # Query database
        matches = self.database.find_by_hash(avg_hash, max_distance=15)

        if not matches:
            return None

        best_match = matches[0]
        item_name, similarity, meta = best_match

        if similarity >= self.CONFIDENCE_LOW:
            return MatchResult(
                item_name=item_name,
                confidence=similarity,
                match_method='hash',
                item_id=meta.get('item_id'),
                category=meta.get('category'),
                metadata=meta.get('metadata', {})
            )

        return None

    def _match_by_features(self, image: np.ndarray) -> Optional[MatchResult]:
        """Match using ORB features."""
        _, query_desc = self.feature_matcher.extract_features(image)

        if query_desc is None:
            return None

        # Get all icons with features from database
        conn = sqlite3.connect(str(self.database.db_path))
        cursor = conn.cursor()
        cursor.execute('''
            SELECT item_name, features, item_id, category, metadata
            FROM icons WHERE features IS NOT NULL
        ''')

        best_match = None
        best_score = 0.0
        best_meta = {}

        for row in cursor.fetchall():
            item_name, features_blob, item_id, category, metadata_json = row
            db_desc = pickle.loads(features_blob)

            score = self.feature_matcher.match_features(query_desc, db_desc)

            if score > best_score:
                best_score = score
                best_match = item_name
                best_meta = {
                    'item_id': item_id,
                    'category': category,
                    'metadata': json.loads(metadata_json) if metadata_json else {}
                }

        conn.close()

        if best_match and best_score >= self.CONFIDENCE_LOW:
            return MatchResult(
                item_name=best_match,
                confidence=best_score,
                match_method='feature',
                item_id=best_meta.get('item_id'),
                category=best_meta.get('category'),
                metadata=best_meta.get('metadata', {})
            )

        return None

    def _match_by_template(self, image: np.ndarray) -> Optional[MatchResult]:
        """Match using template matching against icon files."""
        if not self.icons_dir.exists():
            return None

        # Resize query to standard size
        standard_size = (64, 64)
        query_resized = cv2.resize(image, standard_size, interpolation=cv2.INTER_AREA)

        best_match = None
        best_score = 0.0

        for icon_file in self.icons_dir.glob("**/*.png"):
            try:
                template = cv2.imread(str(icon_file), cv2.IMREAD_COLOR)
                if template is None:
                    continue

                template_resized = cv2.resize(template, standard_size, interpolation=cv2.INTER_AREA)

                score = TemplateMatcher.match(query_resized, template_resized)

                if score > best_score:
                    best_score = score
                    best_match = icon_file.stem

            except Exception as e:
                logger.debug(f"Template matching failed for {icon_file}: {e}")
                continue

        if best_match and best_score >= self.CONFIDENCE_MEDIUM:
            return MatchResult(
                item_name=best_match,
                confidence=best_score,
                match_method='template'
            )

        return None

    def add_icon_to_database(self, item_name: str, image: np.ndarray,
                              item_id: Optional[str] = None,
                              category: Optional[str] = None,
                              metadata: Optional[Dict] = None) -> bool:
        """Add a new icon to the database."""
        return self.database.add_icon(item_name, image, item_id, category, metadata)

    def batch_add_icons(self, icons_dir: Path,
                        category: Optional[str] = None) -> Tuple[int, int]:
        """
        Batch add icons from directory.

        Returns:
            Tuple of (success_count, fail_count)
        """
        success = 0
        failed = 0

        for icon_file in icons_dir.glob("**/*.png"):
            try:
                image = cv2.imread(str(icon_file), cv2.IMREAD_COLOR)
                if image is None:
                    failed += 1
                    continue

                item_name = icon_file.stem.replace('_', ' ').title()

                if self.add_icon_to_database(item_name, image, category=category):
                    success += 1
                else:
                    failed += 1

            except Exception as e:
                logger.error(f"Failed to add icon {icon_file}: {e}")
                failed += 1

        logger.info(f"Batch add complete: {success} success, {failed} failed")
        return success, failed

    def get_database_stats(self) -> Dict[str, Any]:
        """Get database statistics."""
        return {
            'total_icons': self.database.get_icon_count(),
            'database_path': str(self.database.db_path),
            'icons_directory': str(self.icons_dir)
        }

    def find_similar_icons(self, image: np.ndarray,
                           top_k: int = 5) -> List[MatchResult]:
        """Find top-k similar icons."""
        avg_hash = PerceptualHash.average_hash(image)

        # Get all matches
        matches = self.database.find_by_hash(avg_hash, max_distance=20)

        results = []
        for item_name, similarity, meta in matches[:top_k]:
            results.append(MatchResult(
                item_name=item_name,
                confidence=similarity,
                match_method='hash',
                item_id=meta.get('item_id'),
                category=meta.get('category'),
                metadata=meta.get('metadata', {})
            ))

        return results


# Export main classes
__all__ = [
    'IconMatcher',
    'MatchResult',
    'PerceptualHash',
    'FeatureMatcher',
    'TemplateMatcher',
    'IconDatabase'
]