EU-Utility/core/log_reader_optimized.py

379 lines
13 KiB
Python

"""
EU-Utility - Optimized Log Reader
Performance improvements:
1. Compiled regex patterns (cached at module level)
2. Ring buffer for O(1) line storage
3. Pattern matching cache for repeated lines
4. Batch processing for multiple lines
5. Memory-efficient string storage
"""
import os
import re
import time
import threading
from pathlib import Path
from datetime import datetime
from typing import List, Dict, Callable, Optional
from dataclasses import dataclass, field
from collections import deque
from core.performance_optimizations import RingBuffer, StringInterner
@dataclass
class LogEvent:
"""Represents a parsed log event."""
timestamp: datetime
raw_line: str
event_type: str
data: Dict = field(default_factory=dict)
class LogReader:
"""
Optimized core service for reading and parsing EU chat.log.
Performance features:
- Compiled regex patterns (cached)
- Ring buffer for recent lines (O(1) append)
- LRU cache for pattern matching
- Batch line processing
- String interning for memory efficiency
"""
# Pre-compiled patterns (module level for reuse across instances)
_COMPILED_PATTERNS: Dict[str, re.Pattern] = {}
_PATTERNS_LOCK = threading.Lock()
LOG_PATHS = [
Path.home() / "Documents" / "Entropia Universe" / "chat.log",
Path.home() / "Documents" / "Entropia Universe" / "Logs" / "chat.log",
Path.home() / "Entropia Universe" / "chat.log",
]
def __init__(self, log_path: Path = None):
self.log_path = log_path or self._find_log_file()
self.running = False
self.thread = None
self.last_position = 0
# Subscribers: {event_type: [callbacks]}
self._subscribers: Dict[str, List[Callable]] = {}
self._any_subscribers: List[Callable] = []
self._subscribers_lock = threading.RLock()
# Optimized: Use RingBuffer for O(1) append/pop
self._recent_lines = RingBuffer(1000)
# String interner for memory efficiency
self._string_interner = StringInterner(max_size=5000)
# Pattern matching cache (LRU)
self._pattern_cache: Dict[str, Optional[LogEvent]] = {}
self._cache_max_size = 10000
self._cache_lock = threading.Lock()
# Stats
self._stats = {
'lines_read': 0,
'events_parsed': 0,
'start_time': None,
'cache_hits': 0,
'cache_misses': 0,
}
self._stats_lock = threading.Lock()
# Ensure patterns are compiled
self._ensure_patterns()
@classmethod
def _ensure_patterns(cls):
"""Ensure regex patterns are compiled (thread-safe)."""
with cls._PATTERNS_LOCK:
if not cls._COMPILED_PATTERNS:
cls._COMPILED_PATTERNS = {
'skill_gain': re.compile(
r'(.+?)\s+has\s+improved\s+by\s+(\d+\.?\d*)\s+points?',
re.IGNORECASE
),
'loot': re.compile(
r'You\s+received\s+(.+?)\s+x\s*(\d+)',
re.IGNORECASE
),
'global': re.compile(
r'(\w+)\s+received\s+.+?\s+from\s+(\w+)\s+worth\s+(\d+)\s+PED',
re.IGNORECASE
),
'damage': re.compile(
r'You\s+(?:hit|inflicted)\s+(\d+)\s+damage',
re.IGNORECASE
),
'damage_taken': re.compile(
r'You\s+were\s+hit\s+for\s+(\d+)\s+damage',
re.IGNORECASE
),
'heal': re.compile(
r'You\s+(?:healed|restored)\s+(\d+)\s+(?:health|points)',
re.IGNORECASE
),
'mission_complete': re.compile(
r'Mission\s+completed:\s+(.+)',
re.IGNORECASE
),
'tier_increase': re.compile(
r'Your\s+(.+?)\s+has\s+reached\s+tier\s+(\d+)',
re.IGNORECASE
),
'enhancer_break': re.compile(
r'Your\s+(.+?)\s+broke',
re.IGNORECASE
),
}
def _find_log_file(self) -> Optional[Path]:
"""Find EU chat.log file."""
for path in self.LOG_PATHS:
if path.exists():
return path
return None
def start(self) -> bool:
"""Start log monitoring in background thread."""
if not self.log_path or not self.log_path.exists():
print(f"[LogReader] Log file not found. Tried: {self.LOG_PATHS}")
return False
self.running = True
with self._stats_lock:
self._stats['start_time'] = datetime.now()
# Start at end of file (don't process old lines)
try:
self.last_position = self.log_path.stat().st_size
except OSError:
self.last_position = 0
self.thread = threading.Thread(target=self._watch_loop, daemon=True, name="LogReader")
self.thread.start()
print(f"[LogReader] Started watching: {self.log_path}")
return True
def stop(self):
"""Stop log monitoring."""
self.running = False
if self.thread:
self.thread.join(timeout=2.0)
print("[LogReader] Stopped")
def _watch_loop(self):
"""Main watching loop with adaptive polling."""
poll_interval = 0.5 # Start with 500ms
empty_polls = 0
while self.running:
try:
has_new = self._check_for_new_lines()
# Adaptive polling: increase interval if no new lines
if has_new:
empty_polls = 0
poll_interval = 0.1 # Fast poll when active
else:
empty_polls += 1
# Gradually slow down to 1 second
if empty_polls > 10:
poll_interval = min(1.0, poll_interval * 1.1)
except Exception as e:
print(f"[LogReader] Error: {e}")
poll_interval = 1.0 # Slow down on error
time.sleep(poll_interval)
def _check_for_new_lines(self) -> bool:
"""Check for and process new log lines. Returns True if new lines found."""
try:
current_size = self.log_path.stat().st_size
except OSError:
return False
if current_size < self.last_position:
# Log was rotated/truncated
self.last_position = 0
if current_size == self.last_position:
return False
# Read new lines
lines = []
try:
with open(self.log_path, 'r', encoding='utf-8', errors='ignore') as f:
f.seek(self.last_position)
lines = f.readlines()
self.last_position = f.tell()
except Exception as e:
print(f"[LogReader] Read error: {e}")
return False
if lines:
self._process_lines_batch(lines)
return True
return False
def _process_lines_batch(self, lines: List[str]):
"""Process multiple lines in batch (more efficient)."""
patterns = self._COMPILED_PATTERNS
events = []
for line in lines:
line = line.strip()
if not line:
continue
# Intern the line for memory efficiency
line = self._string_interner.intern(line)
with self._stats_lock:
self._stats['lines_read'] += 1
self._recent_lines.append(line)
# Try cache first
with self._cache_lock:
cached = self._pattern_cache.get(line)
if cached is not None:
with self._stats_lock:
self._stats['cache_hits'] += 1
if cached: # Not None (which means no match)
events.append(cached)
continue
with self._stats_lock:
self._stats['cache_misses'] += 1
# Parse event
event = self._parse_event(line, patterns)
# Cache result
with self._cache_lock:
if len(self._pattern_cache) >= self._cache_max_size:
# Simple eviction: clear half the cache
keys = list(self._pattern_cache.keys())[:self._cache_max_size // 2]
for k in keys:
del self._pattern_cache[k]
self._pattern_cache[line] = event
if event:
with self._stats_lock:
self._stats['events_parsed'] += 1
events.append(event)
# Batch notify (outside parsing loop)
for event in events:
self._notify_subscribers(event)
def _parse_event(self, line: str, patterns: Dict[str, re.Pattern]) -> Optional[LogEvent]:
"""Parse a log line into a LogEvent."""
for event_type, pattern in patterns.items():
match = pattern.search(line)
if match:
return LogEvent(
timestamp=datetime.now(),
raw_line=line,
event_type=event_type,
data={'groups': match.groups()}
)
return None
def _notify_subscribers(self, event: LogEvent):
"""Notify all subscribers of an event."""
with self._subscribers_lock:
callbacks = self._subscribers.get(event.event_type, []).copy()
any_callbacks = self._any_subscribers.copy()
# Type-specific subscribers
for callback in callbacks:
try:
callback(event)
except Exception as e:
print(f"[LogReader] Subscriber error: {e}")
# "Any" subscribers
for callback in any_callbacks:
try:
callback(event)
except Exception as e:
print(f"[LogReader] Subscriber error: {e}")
# ========== Public API ==========
def subscribe(self, event_type: str, callback: Callable):
"""Subscribe to specific event type."""
with self._subscribers_lock:
if event_type not in self._subscribers:
self._subscribers[event_type] = []
self._subscribers[event_type].append(callback)
def subscribe_all(self, callback: Callable):
"""Subscribe to all events."""
with self._subscribers_lock:
self._any_subscribers.append(callback)
def unsubscribe(self, event_type: str, callback: Callable):
"""Unsubscribe from events."""
with self._subscribers_lock:
if event_type in self._subscribers:
self._subscribers[event_type] = [
cb for cb in self._subscribers[event_type] if cb != callback
]
def read_lines(self, count: int = 50, filter_text: str = None) -> List[str]:
"""Read recent lines (API method)."""
# Convert ring buffer to list (most recent last)
lines = list(self._recent_lines)
lines = lines[-count:] if count < len(lines) else lines
if filter_text:
filter_lower = filter_text.lower()
lines = [l for l in lines if filter_lower in l.lower()]
return lines
def get_stats(self) -> Dict:
"""Get reader statistics."""
with self._stats_lock:
stats = self._stats.copy()
total_cache = stats['cache_hits'] + stats['cache_misses']
stats['cache_hit_rate'] = (stats['cache_hits'] / total_cache * 100) if total_cache > 0 else 0
stats['cache_size'] = len(self._pattern_cache)
return stats
def is_available(self) -> bool:
"""Check if log file is available."""
return self.log_path is not None and self.log_path.exists()
def clear_cache(self):
"""Clear the pattern cache."""
with self._cache_lock:
self._pattern_cache.clear()
# Singleton instance
_log_reader = None
_log_reader_lock = threading.Lock()
def get_log_reader() -> LogReader:
"""Get global LogReader instance."""
global _log_reader
if _log_reader is None:
with _log_reader_lock:
if _log_reader is None:
_log_reader = LogReader()
return _log_reader