393 lines
13 KiB
Python
393 lines
13 KiB
Python
"""
|
|
EU-Utility - OCR Service Core Module
|
|
|
|
Screen capture and OCR functionality for all plugins.
|
|
Part of core - not a plugin. Plugins access via PluginAPI.
|
|
"""
|
|
|
|
import io
|
|
import base64
|
|
from typing import Dict, List, Tuple, Optional, Any
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
|
|
try:
|
|
import numpy as np
|
|
NUMPY_AVAILABLE = True
|
|
except ImportError:
|
|
NUMPY_AVAILABLE = False
|
|
np = None
|
|
|
|
|
|
@dataclass
|
|
class OCRResult:
|
|
"""Result from OCR operation."""
|
|
text: str
|
|
confidence: float
|
|
bounding_box: Tuple[int, int, int, int] # x, y, width, height
|
|
raw_data: Any = None
|
|
|
|
|
|
class OCRService:
|
|
"""
|
|
Core OCR service with multiple backend support.
|
|
Fallback chain: EasyOCR -> Tesseract -> PaddleOCR
|
|
LAZY INITIALIZATION - only loads when first used
|
|
"""
|
|
|
|
def __init__(self):
|
|
self._ocr_reader = None
|
|
self._backend = None
|
|
self._initialized = False
|
|
self._initializing = False
|
|
|
|
def _init_backends(self):
|
|
"""Initialize available OCR backends (lazy - called on first use)."""
|
|
if self._initialized or self._initializing:
|
|
return
|
|
|
|
self._initializing = True
|
|
print("[OCR] Initializing backends...")
|
|
|
|
# Try EasyOCR first (best accuracy)
|
|
try:
|
|
import easyocr
|
|
self._ocr_reader = easyocr.Reader(['en'], gpu=False, verbose=False)
|
|
self._backend = 'easyocr'
|
|
self._initialized = True
|
|
print("[OCR] Using EasyOCR backend")
|
|
self._initializing = False
|
|
return
|
|
except ImportError:
|
|
pass
|
|
except Exception as e:
|
|
print(f"[OCR] EasyOCR failed: {e}")
|
|
|
|
# Try Tesseract (most common)
|
|
try:
|
|
import pytesseract
|
|
from PIL import Image
|
|
pytesseract.get_tesseract_version()
|
|
self._backend = 'tesseract'
|
|
self._initialized = True
|
|
print("[OCR] Using Tesseract backend")
|
|
self._initializing = False
|
|
return
|
|
except Exception as e:
|
|
print(f"[OCR] Tesseract failed: {e}")
|
|
|
|
# Try PaddleOCR (fallback) - with minimal config
|
|
try:
|
|
from paddleocr import PaddleOCR
|
|
# Use minimal config to avoid model downloads on init
|
|
import os
|
|
os.environ['PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK'] = 'True'
|
|
|
|
self._ocr_reader = PaddleOCR(
|
|
lang='en',
|
|
show_log=False,
|
|
use_gpu=False # This param may not work in all versions
|
|
)
|
|
self._backend = 'paddle'
|
|
self._initialized = True
|
|
print("[OCR] Using PaddleOCR backend")
|
|
except TypeError:
|
|
# Try without use_gpu if it failed
|
|
try:
|
|
self._ocr_reader = PaddleOCR(lang='en', show_log=False)
|
|
self._backend = 'paddle'
|
|
self._initialized = True
|
|
print("[OCR] Using PaddleOCR backend (no GPU)")
|
|
except Exception as e2:
|
|
print(f"[OCR] PaddleOCR failed: {e2}")
|
|
except Exception as e:
|
|
print(f"[OCR] PaddleOCR failed: {e}")
|
|
|
|
self._initializing = False
|
|
|
|
if not self._initialized:
|
|
print("[OCR] WARNING: No OCR backend available!")
|
|
print("[OCR] Install one of: easyocr, pytesseract, paddleocr")
|
|
|
|
def is_available(self) -> bool:
|
|
"""Check if OCR is available (lazy init)."""
|
|
if not self._initialized and not self._initializing:
|
|
self._init_backends()
|
|
return self._initialized
|
|
|
|
def capture_screen(self, region: Tuple[int, int, int, int] = None) -> 'Image.Image':
|
|
"""
|
|
Capture screen or region using the ScreenshotService.
|
|
|
|
Args:
|
|
region: (x, y, width, height) or None for full screen
|
|
|
|
Returns:
|
|
PIL Image
|
|
"""
|
|
try:
|
|
from core.screenshot import get_screenshot_service
|
|
screenshot_service = get_screenshot_service()
|
|
|
|
if region:
|
|
x, y, width, height = region
|
|
return screenshot_service.capture_region(x, y, width, height)
|
|
else:
|
|
return screenshot_service.capture(full_screen=True)
|
|
|
|
except Exception as e:
|
|
print(f"[OCR] Screenshot service failed, falling back: {e}")
|
|
# Fallback to direct pyautogui capture
|
|
try:
|
|
import pyautogui
|
|
|
|
if region:
|
|
x, y, width, height = region
|
|
return pyautogui.screenshot(region=(x, y, width, height))
|
|
else:
|
|
return pyautogui.screenshot()
|
|
|
|
except ImportError:
|
|
raise RuntimeError("pyautogui not installed. Run: pip install pyautogui")
|
|
|
|
def recognize(self, image=None, region: Tuple[int, int, int, int] = None) -> Dict[str, Any]:
|
|
"""
|
|
Perform OCR on image or screen region.
|
|
|
|
Args:
|
|
image: PIL Image, numpy array, or None to capture screen
|
|
region: Screen region to capture (if image is None)
|
|
|
|
Returns:
|
|
Dict with 'text', 'confidence', 'results', 'image_size'
|
|
"""
|
|
# Lazy initialization
|
|
if not self._initialized and not self._initializing:
|
|
self._init_backends()
|
|
|
|
if not self._initialized:
|
|
return {
|
|
'text': '',
|
|
'confidence': 0,
|
|
'error': 'OCR not initialized - no backend available',
|
|
'results': []
|
|
}
|
|
|
|
try:
|
|
# Capture if needed
|
|
if image is None:
|
|
image = self.capture_screen(region)
|
|
|
|
# Convert to appropriate format
|
|
if self._backend == 'easyocr':
|
|
return self._ocr_easyocr(image)
|
|
elif self._backend == 'tesseract':
|
|
return self._ocr_tesseract(image)
|
|
elif self._backend == 'paddle':
|
|
return self._ocr_paddle(image)
|
|
else:
|
|
return {
|
|
'text': '',
|
|
'confidence': 0,
|
|
'error': 'Unknown backend',
|
|
'results': []
|
|
}
|
|
|
|
except Exception as e:
|
|
return {
|
|
'text': '',
|
|
'confidence': 0,
|
|
'error': str(e),
|
|
'results': []
|
|
}
|
|
|
|
def recognize_image(self, image) -> Dict[str, Any]:
|
|
"""
|
|
Perform OCR on a PIL Image.
|
|
Convenience alias for recognize(image=image).
|
|
|
|
Args:
|
|
image: PIL Image to OCR
|
|
|
|
Returns:
|
|
Dict with 'text', 'confidence', 'results', 'image_size'
|
|
"""
|
|
return self.recognize(image=image)
|
|
|
|
def _ocr_easyocr(self, image) -> Dict[str, Any]:
|
|
"""OCR using EasyOCR."""
|
|
import numpy as np
|
|
|
|
# Convert PIL to numpy
|
|
if hasattr(image, 'convert'):
|
|
image_np = np.array(image)
|
|
else:
|
|
image_np = image
|
|
|
|
results = self._ocr_reader.readtext(image_np)
|
|
|
|
# Parse results
|
|
texts = []
|
|
total_confidence = 0
|
|
parsed_results = []
|
|
|
|
for (bbox, text, conf) in results:
|
|
texts.append(text)
|
|
total_confidence += conf
|
|
|
|
# Get bounding box
|
|
x_coords = [p[0] for p in bbox]
|
|
y_coords = [p[1] for p in bbox]
|
|
x_min, x_max = min(x_coords), max(x_coords)
|
|
y_min, y_max = min(y_coords), max(y_coords)
|
|
|
|
parsed_results.append(OCRResult(
|
|
text=text,
|
|
confidence=conf,
|
|
bounding_box=(int(x_min), int(y_min), int(x_max-x_min), int(y_max-y_min)),
|
|
raw_data={'bbox': bbox}
|
|
))
|
|
|
|
avg_confidence = total_confidence / len(results) if results else 0
|
|
|
|
return {
|
|
'text': ' '.join(texts),
|
|
'confidence': avg_confidence,
|
|
'results': parsed_results,
|
|
'image_size': image.size if hasattr(image, 'size') else None
|
|
}
|
|
|
|
def _ocr_tesseract(self, image) -> Dict[str, Any]:
|
|
"""OCR using Tesseract."""
|
|
import pytesseract
|
|
|
|
# Get full text
|
|
text = pytesseract.image_to_string(image).strip()
|
|
|
|
# Get detailed data
|
|
data = pytesseract.image_to_data(image, output_type=pytesseract.Output.DICT)
|
|
|
|
parsed_results = []
|
|
for i, word in enumerate(data['text']):
|
|
if word.strip():
|
|
conf = int(data['conf'][i])
|
|
if conf > 0: # Valid confidence
|
|
parsed_results.append(OCRResult(
|
|
text=word,
|
|
confidence=conf / 100.0,
|
|
bounding_box=(
|
|
data['left'][i],
|
|
data['top'][i],
|
|
data['width'][i],
|
|
data['height'][i]
|
|
),
|
|
raw_data={'block_num': data['block_num'][i]}
|
|
))
|
|
|
|
avg_confidence = sum(r.confidence for r in parsed_results) / len(parsed_results) if parsed_results else 0
|
|
|
|
return {
|
|
'text': text,
|
|
'confidence': avg_confidence,
|
|
'results': parsed_results,
|
|
'image_size': image.size if hasattr(image, 'size') else None
|
|
}
|
|
|
|
def _ocr_paddle(self, image) -> Dict[str, Any]:
|
|
"""OCR using PaddleOCR."""
|
|
import numpy as np
|
|
|
|
# Convert PIL to numpy
|
|
if hasattr(image, 'convert'):
|
|
image_np = np.array(image)
|
|
else:
|
|
image_np = image
|
|
|
|
result = self._ocr_reader.ocr(image_np, cls=True)
|
|
|
|
texts = []
|
|
parsed_results = []
|
|
total_confidence = 0
|
|
|
|
if result and result[0]:
|
|
for line in result[0]:
|
|
bbox, (text, conf) = line
|
|
texts.append(text)
|
|
total_confidence += conf
|
|
|
|
# Parse bounding box
|
|
x_coords = [p[0] for p in bbox]
|
|
y_coords = [p[1] for p in bbox]
|
|
|
|
parsed_results.append(OCRResult(
|
|
text=text,
|
|
confidence=conf,
|
|
bounding_box=(
|
|
int(min(x_coords)),
|
|
int(min(y_coords)),
|
|
int(max(x_coords) - min(x_coords)),
|
|
int(max(y_coords) - min(y_coords))
|
|
),
|
|
raw_data={'bbox': bbox}
|
|
))
|
|
|
|
avg_confidence = total_confidence / len(parsed_results) if parsed_results else 0
|
|
|
|
return {
|
|
'text': ' '.join(texts),
|
|
'confidence': avg_confidence,
|
|
'results': parsed_results,
|
|
'image_size': image.size if hasattr(image, 'size') else None
|
|
}
|
|
|
|
def recognize_region(self, x: int, y: int, width: int, height: int) -> Dict[str, Any]:
|
|
"""Convenience method for region OCR."""
|
|
return self.recognize(region=(x, y, width, height))
|
|
|
|
def find_text(self, target_text: str, image=None, region: Tuple[int, int, int, int] = None) -> List[OCRResult]:
|
|
"""
|
|
Find specific text in image.
|
|
|
|
Returns list of OCRResult where target_text is found.
|
|
"""
|
|
result = self.recognize(image, region)
|
|
matches = []
|
|
|
|
for r in result.get('results', []):
|
|
if target_text.lower() in r.text.lower():
|
|
matches.append(r)
|
|
|
|
return matches
|
|
|
|
def get_text_at_position(self, x: int, y: int, image=None) -> Optional[str]:
|
|
"""Get text at specific screen position."""
|
|
# Small region around point
|
|
region = (x - 50, y - 10, 100, 20)
|
|
result = self.recognize(image, region)
|
|
return result.get('text') if result.get('text') else None
|
|
|
|
|
|
# Singleton instance
|
|
_ocr_service = None
|
|
|
|
def get_ocr_service() -> OCRService:
|
|
"""Get global OCRService instance."""
|
|
global _ocr_service
|
|
if _ocr_service is None:
|
|
_ocr_service = OCRService()
|
|
return _ocr_service
|
|
|
|
|
|
# Convenience function for quick OCR
|
|
def quick_ocr(region: Tuple[int, int, int, int] = None) -> str:
|
|
"""
|
|
Quick OCR - capture and get text.
|
|
|
|
Usage:
|
|
text = quick_ocr() # Full screen
|
|
text = quick_ocr((100, 100, 200, 50)) # Region
|
|
"""
|
|
service = get_ocr_service()
|
|
result = service.recognize(region=region)
|
|
return result.get('text', '')
|