""" Lemontropia Suite - Tesseract OCR Backend Traditional OCR using Tesseract - stable, no ML dependencies. """ import numpy as np import logging from typing import List, Optional, Tuple from pathlib import Path import shutil from . import BaseOCRBackend, OCRTextRegion logger = logging.getLogger(__name__) class TesseractBackend(BaseOCRBackend): """ OCR backend using Tesseract OCR. Pros: - Very stable and mature - No PyTorch/TensorFlow dependencies - Fast on CPU - Works with Windows Store Python Cons: - Lower accuracy on game UI text than neural OCR - Requires Tesseract binary installation Installation: - Windows: choco install tesseract or download from UB Mannheim - Linux: sudo apt-get install tesseract-ocr - macOS: brew install tesseract - Python: pip install pytesseract """ NAME = "tesseract" SUPPORTS_GPU = False # Tesseract is CPU-only def __init__(self, use_gpu: bool = True, lang: str = 'en', **kwargs): super().__init__(use_gpu=use_gpu, lang=lang, **kwargs) self.tesseract_cmd = kwargs.get('tesseract_cmd', None) self._version = None # Language mapping for Tesseract self.lang_map = { 'en': 'eng', 'sv': 'swe', # Swedish 'de': 'deu', 'fr': 'fra', 'es': 'spa', 'latin': 'eng+deu+fra+spa', # Multi-language } # Tesseract configuration self.config = kwargs.get('config', '--psm 6') # Assume single uniform block of text def _initialize(self) -> bool: """Initialize Tesseract OCR.""" try: import pytesseract # Set custom path if provided if self.tesseract_cmd: pytesseract.pytesseract.tesseract_cmd = self.tesseract_cmd # Try to get version to verify installation try: version = pytesseract.get_tesseract_version() self._version = str(version) logger.info(f"Tesseract version: {version}") except Exception as e: # Try to find tesseract in PATH tesseract_path = shutil.which('tesseract') if tesseract_path: pytesseract.pytesseract.tesseract_cmd = tesseract_path version = pytesseract.get_tesseract_version() self._version = str(version) logger.info(f"Tesseract found at: {tesseract_path}, version: {version}") else: raise e self._available = True logger.info("Tesseract OCR initialized successfully") return True except ImportError: self._error_msg = "pytesseract not installed. Run: pip install pytesseract" logger.warning(self._error_msg) return False except Exception as e: self._error_msg = f"Tesseract not found: {e}. Please install Tesseract OCR." logger.warning(self._error_msg) logger.info("Download from: https://github.com/UB-Mannheim/tesseract/wiki") return False def extract_text(self, image: np.ndarray) -> List[OCRTextRegion]: """ Extract text from image using Tesseract. Uses a two-step approach: 1. Detect text regions using OpenCV contours 2. Run Tesseract on each region Args: image: Input image (BGR format from OpenCV) Returns: List of detected text regions with recognized text """ if not self._available: logger.error("Tesseract backend not initialized") return [] try: import pytesseract import cv2 # Preprocess image gray = self._to_grayscale(image) processed = self._preprocess_for_tesseract(gray) # Get data including bounding boxes tesseract_lang = self.lang_map.get(self.lang, 'eng') data = pytesseract.image_to_data( processed, lang=tesseract_lang, config=self.config, output_type=pytesseract.Output.DICT ) regions = [] n_boxes = len(data['text']) for i in range(n_boxes): text = data['text'][i].strip() conf = int(data['conf'][i]) # Filter low confidence and empty text if conf > 30 and text: x = data['left'][i] y = data['top'][i] w = data['width'][i] h = data['height'][i] regions.append(OCRTextRegion( text=text, confidence=conf / 100.0, # Normalize to 0-1 bbox=(x, y, w, h), language=self.lang )) # Merge overlapping regions that are likely the same text regions = self._merge_nearby_regions(regions) logger.debug(f"Tesseract detected {len(regions)} text regions") return regions except Exception as e: logger.error(f"Tesseract extraction failed: {e}") return [] def _preprocess_for_tesseract(self, gray: np.ndarray) -> np.ndarray: """Preprocess image specifically for Tesseract.""" import cv2 # Resize small images (Tesseract works better with larger text) h, w = gray.shape[:2] min_height = 100 if h < min_height: scale = min_height / h gray = cv2.resize(gray, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC) # Apply adaptive thresholding processed = cv2.adaptiveThreshold( gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2 ) # Denoise processed = cv2.fastNlMeansDenoising(processed, None, 10, 7, 21) return processed def _merge_nearby_regions(self, regions: List[OCRTextRegion], max_distance: int = 10) -> List[OCRTextRegion]: """Merge text regions that are close to each other.""" if not regions: return [] # Sort by y position sorted_regions = sorted(regions, key=lambda r: (r.bbox[1], r.bbox[0])) merged = [] current = sorted_regions[0] for next_region in sorted_regions[1:]: # Check if regions are close enough to merge cx, cy, cw, ch = current.bbox nx, ny, nw, nh = next_region.bbox # Calculate distance distance = abs(ny - cy) x_overlap = not (cx + cw < nx or nx + nw < cx) if distance < max_distance and x_overlap: # Merge regions min_x = min(cx, nx) min_y = min(cy, ny) max_x = max(cx + cw, nx + nw) max_y = max(cy + ch, ny + nh) # Combine text combined_text = current.text + " " + next_region.text avg_conf = (current.confidence + next_region.confidence) / 2 current = OCRTextRegion( text=combined_text.strip(), confidence=avg_conf, bbox=(min_x, min_y, max_x - min_x, max_y - min_y), language=self.lang ) else: merged.append(current) current = next_region merged.append(current) return merged def extract_text_simple(self, image: np.ndarray) -> str: """ Simple text extraction without region detection. Returns: All text found in image as single string """ if not self._available: return "" try: import pytesseract import cv2 # Convert to RGB if needed if len(image.shape) == 3: image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) tesseract_lang = self.lang_map.get(self.lang, 'eng') text = pytesseract.image_to_string( image, lang=tesseract_lang, config=self.config ) return text.strip() except Exception as e: logger.error(f"Tesseract simple extraction failed: {e}") return "" @staticmethod def find_tesseract() -> Optional[str]: """Find Tesseract installation path.""" path = shutil.which('tesseract') if path: return path # Common Windows paths common_paths = [ r"C:\Program Files\Tesseract-OCR\tesseract.exe", r"C:\Program Files (x86)\Tesseract-OCR\tesseract.exe", r"C:\Users\%USERNAME%\AppData\Local\Tesseract-OCR\tesseract.exe", r"C:\Tesseract-OCR\tesseract.exe", ] import os for p in common_paths: expanded = os.path.expandvars(p) if Path(expanded).exists(): return expanded return None