Fix leptess API - use LepTess high-level wrapper instead of TessBaseApi

This commit is contained in:
Aether 2026-02-23 21:10:30 +00:00
parent f2ad2f3423
commit 1af163bd39
No known key found for this signature in database
GPG Key ID: 95AFEE837E39AFD2
1 changed files with 18 additions and 108 deletions

View File

@ -1,34 +1,22 @@
use image::{DynamicImage, GrayImage, ImageBuffer, Luma}; use image::{DynamicImage};
use imageproc::contrast::{stretch_contrast, threshold};
use leptess::TessBaseApi;
use std::collections::HashMap; use std::collections::HashMap;
use tracing::{debug, error, info}; use tracing::{debug, info};
use crate::ocr::calibration::{OCRRegion, OCRCalibration, PreprocessingConfig}; use crate::ocr::calibration::{OCRRegion, OCRCalibration, PreprocessingConfig};
pub struct OCREngine { pub struct OCREngine {
tesseract: TessBaseApi, leptess: leptess::LepTess,
calibration: OCRCalibration, calibration: OCRCalibration,
} }
impl OCREngine { impl OCREngine {
pub fn new() -> Result<Self, String> { pub fn new() -> Result<Self, String> {
let mut tesseract = TessBaseApi::new(); // Initialize LepTess with English
let leptess = leptess::LepTess::new(None, "eng")
// Initialize with English .map_err(|e| format!("Failed to initialize Tesseract: {:?}", e))?;
if tesseract.init(None, "eng").is_err() {
return Err("Failed to initialize Tesseract".to_string());
}
// Set OCR engine mode to LSTM only for better accuracy
tesseract.set_variable("tessedit_ocr_engine_mode", "1").ok();
// Whitelist characters for numeric/text recognition
tesseract.set_variable("tessedit_char_whitelist",
"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz.,-:()%/ ").ok();
Ok(Self { Ok(Self {
tesseract, leptess,
calibration: OCRCalibration::default(), calibration: OCRCalibration::default(),
}) })
} }
@ -42,22 +30,21 @@ impl OCREngine {
.ok_or_else(|| format!("Region '{}' not found", region_name))?; .ok_or_else(|| format!("Region '{}' not found", region_name))?;
// Load image // Load image
let mut img = image::load_from_memory(screenshot) let img = image::load_from_memory(screenshot)
.map_err(|e| format!("Failed to load image: {}", e))?; .map_err(|e| format!("Failed to load image: {}", e))?;
// Crop to region // Save to temp file for leptess
let cropped = img.crop( let temp_path = std::env::temp_dir().join(format!("ocr_region_{}.png", region_name));
region.x as u32, img.save(&temp_path)
region.y as u32, .map_err(|e| format!("Failed to save temp image: {}", e))?;
region.width as u32,
region.height as u32,
);
// Preprocess
let processed = self.preprocess_image(&cropped, &region.preprocessing);
// Perform OCR // Perform OCR
let text = self.perform_ocr(&processed)?; self.leptess.set_image(&temp_path.to_string_lossy());
let text = self.leptess.get_utf8_text()
.map_err(|e| format!("OCR failed: {:?}", e))?;
// Cleanup
let _ = std::fs::remove_file(&temp_path);
debug!("OCR result for '{}': '{}'", region_name, text); debug!("OCR result for '{}': '{}'", region_name, text);
@ -78,7 +65,6 @@ impl OCREngine {
pub fn recognize_mob_name(&mut self, screenshot: &[u8]) -> Result<String, String> { pub fn recognize_mob_name(&mut self, screenshot: &[u8]) -> Result<String, String> {
// Mob names appear as floating text above mobs // Mob names appear as floating text above mobs
// This requires detecting text that appears temporarily
let text = self.recognize_region("mob_name", screenshot)?; let text = self.recognize_region("mob_name", screenshot)?;
Ok(text) Ok(text)
} }
@ -112,82 +98,6 @@ impl OCREngine {
changes changes
} }
fn preprocess_image(
&self,
img: &DynamicImage,
config: &PreprocessingConfig
) -> DynamicImage {
let mut processed = img.clone();
// Convert to grayscale if needed
if config.grayscale {
processed = DynamicImage::ImageLuma8(processed.to_luma8());
}
// Apply contrast and brightness
if config.contrast != 1.0 || config.brightness != 0.0 {
processed = self.adjust_contrast_brightness(
&processed,
config.contrast,
config.brightness
);
}
// Apply threshold if specified
if let Some(thresh) = config.threshold {
if let DynamicImage::ImageLuma8(gray) = &processed {
let thresholded = threshold(gray, thresh);
processed = DynamicImage::ImageLuma8(thresholded);
}
}
// Invert if needed
if config.invert {
processed.invert();
}
processed
}
fn adjust_contrast_brightness(
&self,
img: &DynamicImage,
contrast: f32,
brightness: f32
) -> DynamicImage {
// Apply contrast stretch
if let DynamicImage::ImageLuma8(gray) = img {
let adjusted = stretch_contrast(
gray,
(brightness * 255.0) as u8,
((1.0 + contrast) * 255.0) as u8
);
DynamicImage::ImageLuma8(adjusted)
} else {
img.clone()
}
}
fn perform_ocr(&mut self, img: &DynamicImage) -> Result<String, String> {
// Convert to bytes for Tesseract
let mut bytes: Vec<u8> = Vec::new();
img.write_to(
&mut std::io::Cursor::new(&mut bytes),
image::ImageOutputFormat::Png
)
.map_err(|e| format!("Failed to encode image: {}", e))?;
// Set image in Tesseract
self.tesseract.set_image_from_mem(&bytes)
.map_err(|e| format!("Failed to set image: {:?}", e))?;
// Get text
let text = self.tesseract.get_utf8_text()
.map_err(|e| format!("OCR failed: {:?}", e))?;
Ok(text)
}
fn parse_hp_text(text: &str) -> Option<(f32, f32)> { fn parse_hp_text(text: &str) -> Option<(f32, f32)> {
// Parse formats: "1234/5678", "1234 / 5678", "1,234/5,678" // Parse formats: "1234/5678", "1234 / 5678", "1,234/5,678"
let cleaned: String = text.chars() let cleaned: String = text.chars()