diff --git a/plugins/skill_scanner/plugin.py b/plugins/skill_scanner/plugin.py index 1cd6190..f1401ac 100644 --- a/plugins/skill_scanner/plugin.py +++ b/plugins/skill_scanner/plugin.py @@ -51,25 +51,91 @@ class SkillOCRThread(QThread): self.scan_error.emit(str(e)) def _parse_skills(self, text): - """Parse skill data from OCR text.""" + """Parse skill data from OCR text with improved handling for 3-column layout.""" skills = {} + + # Ranks in Entropia Universe (in order) + RANKS = [ + 'Newbie', 'Inept', 'Beginner', 'Amateur', 'Average', + 'Skilled', 'Expert', 'Professional', 'Master', 'Grand Master', + 'Champion', 'Legendary', 'Guru', 'Astonishing', 'Remarkable', + 'Outstanding', 'Marvelous', 'Prodigious', 'Amazing', 'Incredible', 'Awesome' + ] + rank_pattern = '|'.join(RANKS) + + # Clean up the text - remove common headers and junk + text = text.replace('SKILLS', '').replace('ALL CATEGORIES', '') + text = text.replace('SKILL NAME', '').replace('RANK', '').replace('POINTS', '') + text = text.replace('Attributes', '').replace('COMBAT', '').replace('Design', '') + text = text.replace('Construction', '').replace('Defense', '').replace('General', '') + text = text.replace('Handgun', '').replace('Heavy Melee Weapons', '') + text = text.replace('Information', '').replace('Inflict Melee Damage', '') + text = text.replace('Inflict Ranged Damage', '').replace('Light Melee Weapons', '') + text = text.replace('Longblades', '').replace('Medical', '').replace('Mining', '') + text = text.replace('Science', '').replace('Social', '').replace('Beauty', '') + text = text.replace('Mindforce', '') + lines = text.split('\n') for line in lines: - # Pattern: SkillName Rank Points + line = line.strip() + if not line: + continue + + # Skip category headers and short lines + if len(line) < 10: + continue + + # Try pattern: SkillName Rank Points + # More flexible pattern to handle merged text match = re.search( - r'(\w+(?:\s+\w+)*)\s+(Newbie|Inept|Beginner|Amateur|Average|Skilled|Expert|Professional|Master|Grand Master|Champion|Legendary|Guru|Astonishing|Remarkable|Outstanding|Marvelous|Prodigious|Amazing|Incredible|Awesome)\s+(\d+)', + rf'([A-Za-z][A-Za-z\s]{{2,50}}?)\s+({rank_pattern})\s+(\d{{1,6}})(?:\s|$)', line, re.IGNORECASE ) + if match: skill_name = match.group(1).strip() rank = match.group(2) points = int(match.group(3)) - skills[skill_name] = { - 'rank': rank, - 'points': points, - 'scanned_at': datetime.now().isoformat() - } + + # Clean up skill name + skill_name = skill_name.strip() + + # Validate - points should be reasonable (not too small) + if points > 0: + skills[skill_name] = { + 'rank': rank, + 'points': points, + 'scanned_at': datetime.now().isoformat() + } + print(f"[SkillScanner] Parsed: {skill_name} = {rank} ({points})") + + # Alternative parsing: try to find skill-rank-points triplets + if not skills: + skills = self._parse_skills_alternative(text, RANKS) + + return skills + + def _parse_skills_alternative(self, text, ranks): + """Alternative parser for when text is heavily merged.""" + skills = {} + + # Find all rank positions in the text + for rank in ranks: + # Look for pattern: [text] [Rank] [number] + pattern = rf'([A-Z][a-z]{{2,}}(?:\s+[A-Z][a-z]{{2,}}){{0,3}})\s+{rank}\s+(\d{{1,6}})' + matches = re.finditer(pattern, text, re.IGNORECASE) + + for match in matches: + skill_name = match.group(1).strip() + points = int(match.group(2)) + + if points > 0 and len(skill_name) > 2: + skills[skill_name] = { + 'rank': rank, + 'points': points, + 'scanned_at': datetime.now().isoformat() + } return skills