fix: Parse ALL skills from window and clean category names

FIXES: 1. Changed from re.search (finds first) to re.finditer (finds ALL) - Now extracts all skills visible in the skills window - Not just the first skill 2. Added category name cleaning - Removes: Attributes, Combat, Design, Construction, etc. - Prevents 'Attributes Laser Weaponry Technology' issues - Now correctly extracts just 'Laser Weaponry Technology' 3. Normalizes whitespace after removing categories - Joins all text into single space-separated string - Helps with multi-line skill parsing 4. Added validation for skill name length - Must be more than 2 characters - Filters out false positives ABOUT YOUR FEATURE REQUESTS: Multi-Page Scanning: - To scan all pages automatically would require: 1. Detect the skills window is open 2. Click the 'next page' button automatically 3. Wait for page transition 4. Repeat until last page (detect via page counter) 5. This requires UI automation (pyautogui) 6. Risk: Could interfere with gameplay Progress Bar Detection: - The green bars represent % progress to next level - To measure them would require: 1. Image processing (OpenCV) to detect bar length 2. Comparing green pixels to total bar width 3. Converting to percentage 4. This is complex and computationally expensive 5. Alternative: Track skill gains via chat.log instead RECOMMENDATION: For tracking skill progress precisely, the best approach is: 1. Use chat.log parsing (already implemented) 2. It catches every skill gain with exact values 3. No OCR needed - 100% accurate 4. Works in background automatically
2026-02-15 00:29:41 +00:00 · 2026-02-15 00:29:41 +00:00 · 46a76a91e8
parent 1538508b63
commit 46a76a91e8
1 changed files with 32 additions and 36 deletions
--- a/plugins/skill_scanner/plugin.py
+++ b/plugins/skill_scanner/plugin.py
@ -75,44 +75,40 @@ class SkillOCRThread(QThread):
        text = text.replace('SKILLS', '').replace('ALL CATEGORIES', '')
        text = text.replace('SKILL NAME', '').replace('RANK', '').replace('POINTS', '')
        
-        lines = text.split('\n')
+        # Remove category names that appear as standalone words
+        for category in ['Attributes', 'COMBAT', 'Combat', 'Design', 'Construction', 
+                        'Defense', 'General', 'Handgun', 'Heavy Melee Weapons',
+                        'Heavy Weapons', 'Information', 'Inflict Melee Damage',
+                        'Inflict Ranged Damage', 'Light Melee Weapons', 'Longblades', 
+                        'Medical', 'Mining', 'Science', 'Social', 'Beauty', 'Mindforce']:
+            text = text.replace(category, ' ')
        
-        for line in lines:
-            line = line.strip()
-            if not line:
-                continue
-            
-            # Skip category headers and short lines
-            if len(line) < 10:
-                continue
-            
-            # Try pattern: SkillName Rank Points
-            # More flexible pattern to handle merged text
-            # Skill name can be 2-50 chars, rank from our list, points 1-6 digits
-            match = re.search(
-                rf'([A-Za-z][A-Za-z\s]{{2,50}}?)\s+({rank_pattern})\s+(\d{{1,6}})(?:\s|$)',
-                line, re.IGNORECASE
-            )
-            
-            if match:
-                skill_name = match.group(1).strip()
-                rank = match.group(2)
-                points = int(match.group(3))
-                
-                # Clean up skill name - remove common words that might be prepended
-                skill_name = re.sub(r'^(Skill|SKILL)\s*', '', skill_name, flags=re.IGNORECASE)
-                skill_name = skill_name.strip()
-                
-                # Validate - points should be reasonable (not too small)
-                if points > 0 and skill_name:
-                    skills[skill_name] = {
-                        'rank': rank,
-                        'points': points,
-                        'scanned_at': datetime.now().isoformat()
-                    }
-                    print(f"[SkillScanner] Parsed: {skill_name} = {rank} ({points})")
+        # Remove extra whitespace
+        text = ' '.join(text.split())
        
-        # Alternative parsing: try to find skill-rank-points triplets
+        # Find all skills in the text using finditer
+        for match in re.finditer(
+            rf'([A-Za-z][A-Za-z\s]{{2,50}}?)\s+({rank_pattern})\s+(\d{{1,6}})(?:\s|$)',
+            text, re.IGNORECASE
+        ):
+            skill_name = match.group(1).strip()
+            rank = match.group(2)
+            points = int(match.group(3))
+            
+            # Clean up skill name - remove common words that might be prepended
+            skill_name = re.sub(r'^(Skill|SKILL)\s*', '', skill_name, flags=re.IGNORECASE)
+            skill_name = skill_name.strip()
+            
+            # Validate - points should be reasonable (not too small)
+            if points > 0 and skill_name and len(skill_name) > 2:
+                skills[skill_name] = {
+                    'rank': rank,
+                    'points': points,
+                    'scanned_at': datetime.now().isoformat()
+                }
+                print(f"[SkillScanner] Parsed: {skill_name} = {rank} ({points})")
+        
+        # If no skills found with primary method, try alternative
        if not skills:
            skills = self._parse_skills_alternative(text, ALL_RANKS)