diff --git a/src-tauri/Cargo.toml b/src-tauri/Cargo.toml index 91398e1..e79c28e 100644 --- a/src-tauri/Cargo.toml +++ b/src-tauri/Cargo.toml @@ -47,11 +47,10 @@ uuid = { version = "1.6", features = ["v4", "serde"] } # Regex regex = "1.10" -# Image processing +# Image processing for OCR image = "0.24" -# Note: OCR requires system libraries (Tesseract) -# imageproc = "0.23" -# leptess = "0.14" +imageproc = "0.23" +leptess = "0.14" # Clipboard arboard = "3.3" diff --git a/src-tauri/src/ocr/engine.rs b/src-tauri/src/ocr/engine.rs index 956e856..1d6a7ad 100644 --- a/src-tauri/src/ocr/engine.rs +++ b/src-tauri/src/ocr/engine.rs @@ -1,49 +1,94 @@ -// OCR Engine - Stub implementation -// Full OCR requires Tesseract system libraries -// This stub allows the application to compile without OCR dependencies - +use image::{DynamicImage, GrayImage, ImageBuffer, Luma}; +use imageproc::contrast::{stretch_contrast, threshold}; +use leptess::TessBaseApi; use std::collections::HashMap; -use tracing::{debug, info}; +use tracing::{debug, error, info}; use crate::ocr::calibration::{OCRRegion, OCRCalibration, PreprocessingConfig}; pub struct OCREngine { + tesseract: TessBaseApi, calibration: OCRCalibration, - available: bool, } impl OCREngine { pub fn new() -> Result { - info!("OCR Engine initialized (stub mode - Tesseract not available)"); + let mut tesseract = TessBaseApi::new(); + + // Initialize with English + if tesseract.init(None, "eng").is_err() { + return Err("Failed to initialize Tesseract".to_string()); + } + + // Set OCR engine mode to LSTM only for better accuracy + tesseract.set_variable("tessedit_ocr_engine_mode", "1").ok(); + + // Whitelist characters for numeric/text recognition + tesseract.set_variable("tessedit_char_whitelist", + "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz.,-:()%/ ").ok(); + Ok(Self { + tesseract, calibration: OCRCalibration::default(), - available: false, }) } - pub fn is_available(&self) -> bool { - self.available - } - pub fn set_calibration(&mut self, calibration: OCRCalibration) { self.calibration = calibration; } - pub fn recognize_region(&mut self, _region_name: &str, _screenshot: &[u8]) -> Result { - // Stub implementation - OCR requires Tesseract system libraries - Err("OCR not available - Tesseract system library not installed".to_string()) + pub fn recognize_region(&mut self, region_name: &str, screenshot: &[u8]) -> Result { + let region = self.calibration.regions.get(region_name) + .ok_or_else(|| format!("Region '{}' not found", region_name))?; + + // Load image + let mut img = image::load_from_memory(screenshot) + .map_err(|e| format!("Failed to load image: {}", e))?; + + // Crop to region + let cropped = img.crop( + region.x as u32, + region.y as u32, + region.width as u32, + region.height as u32, + ); + + // Preprocess + let processed = self.preprocess_image(&cropped, ®ion.preprocessing); + + // Perform OCR + let text = self.perform_ocr(&processed)?; + + debug!("OCR result for '{}': '{}'", region_name, text); + + Ok(text.trim().to_string()) } - pub fn recognize_hp(&mut self, _screenshot: &[u8]) -> Result<(f32, f32), String> { - Err("OCR not available".to_string()) + pub fn recognize_hp(&mut self, screenshot: &[u8]) -> Result<(f32, f32), String> { + // Try to extract HP value and max HP + let text = self.recognize_region("hp_bar", screenshot)?; + + // Parse HP text (formats: "1234/5678" or "1234" or percentage) + if let Some((current, max)) = Self::parse_hp_text(&text) { + Ok((current, max)) + } else { + Err(format!("Failed to parse HP from text: '{}'", text)) + } } - pub fn recognize_mob_name(&mut self, _screenshot: &[u8]) -> Result { - Err("OCR not available".to_string()) + pub fn recognize_mob_name(&mut self, screenshot: &[u8]) -> Result { + // Mob names appear as floating text above mobs + // This requires detecting text that appears temporarily + let text = self.recognize_region("mob_name", screenshot)?; + Ok(text) } - pub fn recognize_coordinates(&mut self, _screenshot: &[u8]) -> Result<(f32, f32, f32), String> { - Err("OCR not available".to_string()) + pub fn recognize_coordinates(&mut self, screenshot: &[u8]) -> Result<(f32, f32, f32), String> { + // Read coordinates from radar or location display + let text = self.recognize_region("radar", screenshot)?; + + // Parse coordinate text (format: "X: 12345.67 Y: 12345.89 Z: 123.45") + Self::parse_coordinates(&text) } pub fn detect_text_changes( @@ -54,6 +99,7 @@ impl OCREngine { ) -> Vec { let mut changes = Vec::new(); + // Detect added/removed/changed text if previous != current { changes.push(TextChange { region: region_name.to_string(), @@ -65,6 +111,116 @@ impl OCREngine { changes } + + fn preprocess_image( + &self, + img: &DynamicImage, + config: &PreprocessingConfig + ) -> DynamicImage { + let mut processed = img.clone(); + + // Convert to grayscale if needed + if config.grayscale { + processed = DynamicImage::ImageLuma8(processed.to_luma8()); + } + + // Apply contrast and brightness + if config.contrast != 1.0 || config.brightness != 0.0 { + processed = self.adjust_contrast_brightness( + &processed, + config.contrast, + config.brightness + ); + } + + // Apply threshold if specified + if let Some(thresh) = config.threshold { + if let DynamicImage::ImageLuma8(gray) = &processed { + let thresholded = threshold(gray, thresh); + processed = DynamicImage::ImageLuma8(thresholded); + } + } + + // Invert if needed + if config.invert { + processed.invert(); + } + + processed + } + + fn adjust_contrast_brightness( + &self, + img: &DynamicImage, + contrast: f32, + brightness: f32 + ) -> DynamicImage { + // Apply contrast stretch + if let DynamicImage::ImageLuma8(gray) = img { + let adjusted = stretch_contrast( + gray, + (brightness * 255.0) as u8, + ((1.0 + contrast) * 255.0) as u8 + ); + DynamicImage::ImageLuma8(adjusted) + } else { + img.clone() + } + } + + fn perform_ocr(&mut self, img: &DynamicImage) -> Result { + // Convert to bytes for Tesseract + let mut bytes: Vec = Vec::new(); + img.write_to( + &mut std::io::Cursor::new(&mut bytes), + image::ImageOutputFormat::Png + ) + .map_err(|e| format!("Failed to encode image: {}", e))?; + + // Set image in Tesseract + self.tesseract.set_image_from_mem(&bytes) + .map_err(|e| format!("Failed to set image: {:?}", e))?; + + // Get text + let text = self.tesseract.get_utf8_text() + .map_err(|e| format!("OCR failed: {:?}", e))?; + + Ok(text) + } + + fn parse_hp_text(text: &str) -> Option<(f32, f32)> { + // Parse formats: "1234/5678", "1234 / 5678", "1,234/5,678" + let cleaned: String = text.chars() + .filter(|c| c.is_digit(10) || *c == '/' || *c == '.' || *c == ',') + .collect(); + + if let Some(sep_pos) = cleaned.find('/') { + let current: f32 = cleaned[..sep_pos].replace(",", "").parse().ok()?; + let max: f32 = cleaned[sep_pos + 1..].replace(",", "").parse().ok()?; + Some((current, max)) + } else { + // Single number - assume it's current HP + let current: f32 = cleaned.replace(",", "").parse().ok()?; + Some((current, current)) + } + } + + fn parse_coordinates(text: &str) -> Result<(f32, f32, f32), String> { + // Parse format: "X: 12345.67 Y: 12345.89 Z: 123.45" + let mut coords = (0.0, 0.0, 0.0); + + for part in text.split_whitespace() { + if part.starts_with("X:") { + coords.0 = part[2..].trim().parse().unwrap_or(0.0); + } else if part.starts_with("Y:") { + coords.1 = part[2..].trim().parse().unwrap_or(0.0); + } else if part.starts_with("Z:") { + coords.2 = part[2..].trim().parse().unwrap_or(0.0); + } + } + + Ok(coords) + } } #[derive(Debug, Clone)]