diff --git a/plugins/skill_scanner/plugin.py b/plugins/skill_scanner/plugin.py index 7ef8909..d3affb2 100644 --- a/plugins/skill_scanner/plugin.py +++ b/plugins/skill_scanner/plugin.py @@ -75,44 +75,40 @@ class SkillOCRThread(QThread): text = text.replace('SKILLS', '').replace('ALL CATEGORIES', '') text = text.replace('SKILL NAME', '').replace('RANK', '').replace('POINTS', '') - lines = text.split('\n') + # Remove category names that appear as standalone words + for category in ['Attributes', 'COMBAT', 'Combat', 'Design', 'Construction', + 'Defense', 'General', 'Handgun', 'Heavy Melee Weapons', + 'Heavy Weapons', 'Information', 'Inflict Melee Damage', + 'Inflict Ranged Damage', 'Light Melee Weapons', 'Longblades', + 'Medical', 'Mining', 'Science', 'Social', 'Beauty', 'Mindforce']: + text = text.replace(category, ' ') - for line in lines: - line = line.strip() - if not line: - continue - - # Skip category headers and short lines - if len(line) < 10: - continue - - # Try pattern: SkillName Rank Points - # More flexible pattern to handle merged text - # Skill name can be 2-50 chars, rank from our list, points 1-6 digits - match = re.search( - rf'([A-Za-z][A-Za-z\s]{{2,50}}?)\s+({rank_pattern})\s+(\d{{1,6}})(?:\s|$)', - line, re.IGNORECASE - ) - - if match: - skill_name = match.group(1).strip() - rank = match.group(2) - points = int(match.group(3)) - - # Clean up skill name - remove common words that might be prepended - skill_name = re.sub(r'^(Skill|SKILL)\s*', '', skill_name, flags=re.IGNORECASE) - skill_name = skill_name.strip() - - # Validate - points should be reasonable (not too small) - if points > 0 and skill_name: - skills[skill_name] = { - 'rank': rank, - 'points': points, - 'scanned_at': datetime.now().isoformat() - } - print(f"[SkillScanner] Parsed: {skill_name} = {rank} ({points})") + # Remove extra whitespace + text = ' '.join(text.split()) - # Alternative parsing: try to find skill-rank-points triplets + # Find all skills in the text using finditer + for match in re.finditer( + rf'([A-Za-z][A-Za-z\s]{{2,50}}?)\s+({rank_pattern})\s+(\d{{1,6}})(?:\s|$)', + text, re.IGNORECASE + ): + skill_name = match.group(1).strip() + rank = match.group(2) + points = int(match.group(3)) + + # Clean up skill name - remove common words that might be prepended + skill_name = re.sub(r'^(Skill|SKILL)\s*', '', skill_name, flags=re.IGNORECASE) + skill_name = skill_name.strip() + + # Validate - points should be reasonable (not too small) + if points > 0 and skill_name and len(skill_name) > 2: + skills[skill_name] = { + 'rank': rank, + 'points': points, + 'scanned_at': datetime.now().isoformat() + } + print(f"[SkillScanner] Parsed: {skill_name} = {rank} ({points})") + + # If no skills found with primary method, try alternative if not skills: skills = self._parse_skills_alternative(text, ALL_RANKS)