Add auth, SSH support, and web config

This commit is contained in:
devmatrix 2026-02-20 15:15:11 +00:00
parent 5b9ec7b351
commit b1c2264cc6
5 changed files with 1260 additions and 323 deletions

Binary file not shown.

View File

@ -8,6 +8,7 @@ import time
import json import json
import logging import logging
import threading import threading
import paramiko
from dataclasses import dataclass, asdict from dataclasses import dataclass, asdict
from typing import List, Dict, Optional, Tuple from typing import List, Dict, Optional, Tuple
from datetime import datetime from datetime import datetime
@ -104,7 +105,7 @@ class IPMIFanController:
return False, str(e) return False, str(e)
def test_connection(self) -> bool: def test_connection(self) -> bool:
"""Test if we can connect to the server.""" """Test IPMI connection."""
success, _ = self._run_ipmi(["mc", "info"], timeout=10) success, _ = self._run_ipmi(["mc", "info"], timeout=10)
return success return success
@ -257,12 +258,139 @@ class IPMIFanController:
return self.consecutive_failures < self.max_failures return self.consecutive_failures < self.max_failures
class SSHSensorClient:
"""SSH client for lm-sensors data collection."""
def __init__(self, host: str, username: str, password: Optional[str] = None,
key_file: Optional[str] = None, port: int = 22):
self.host = host
self.username = username
self.password = password
self.key_file = key_file
self.port = port
self.client: Optional[paramiko.SSHClient] = None
self.consecutive_failures = 0
def connect(self) -> bool:
"""Connect to SSH server."""
try:
self.client = paramiko.SSHClient()
self.client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
connect_kwargs = {
"hostname": self.host,
"port": self.port,
"username": self.username,
"timeout": 10
}
if self.key_file and Path(self.key_file).exists():
connect_kwargs["key_filename"] = self.key_file
elif self.password:
connect_kwargs["password"] = self.password
else:
logger.error("No authentication method available for SSH")
return False
self.client.connect(**connect_kwargs)
logger.info(f"SSH connected to {self.host}")
return True
except Exception as e:
logger.error(f"SSH connection failed: {e}")
self.consecutive_failures += 1
return False
def disconnect(self):
"""Close SSH connection."""
if self.client:
self.client.close()
self.client = None
def get_lm_sensors_data(self) -> List[TemperatureReading]:
"""Get temperature data from lm-sensors."""
if not self.client:
if not self.connect():
return []
try:
stdin, stdout, stderr = self.client.exec_command("sensors -u", timeout=15)
output = stdout.read().decode()
error = stderr.read().decode()
if error:
logger.warning(f"sensors command stderr: {error}")
temps = self._parse_sensors_output(output)
self.consecutive_failures = 0
return temps
except Exception as e:
logger.error(f"Failed to get sensors data: {e}")
self.consecutive_failures += 1
self.disconnect() # Force reconnect on next attempt
return []
def _parse_sensors_output(self, output: str) -> List[TemperatureReading]:
"""Parse lm-sensors -u output."""
temps = []
current_chip = ""
for line in output.splitlines():
line = line.strip()
# New chip section
if line.endswith(":") and not line.startswith(" "):
current_chip = line.rstrip(":")
continue
# Temperature reading
if "_input:" in line and "temp" in line.lower():
parts = line.split(":")
if len(parts) == 2:
name = parts[0].strip()
try:
value = float(parts[1].strip())
location = self._classify_sensor_name(name, current_chip)
temps.append(TemperatureReading(
name=f"{current_chip}/{name}",
location=location,
value=value,
status="ok"
))
except ValueError:
pass
return temps
def _classify_sensor_name(self, name: str, chip: str) -> str:
"""Classify sensor location from name."""
name_lower = name.lower()
chip_lower = chip.lower()
if "core" in name_lower:
if "0" in name or "1" in name:
return "cpu1"
elif "2" in name or "3" in name:
return "cpu2"
return "cpu"
elif "package" in name_lower:
return "cpu"
elif "tdie" in name_lower or "tctl" in name_lower:
return "cpu"
return "other"
def is_healthy(self) -> bool:
return self.consecutive_failures < 3
class FanControlService: class FanControlService:
"""Background service for automatic fan control.""" """Background service for automatic fan control."""
def __init__(self, config_path: str = "/etc/ipmi-fan-controller/config.json"): def __init__(self, config_path: str = "/etc/ipmi-fan-controller/config.json"):
self.config_path = config_path self.config_path = config_path
self.controller: Optional[IPMIFanController] = None self.controller: Optional[IPMIFanController] = None
self.ssh_client: Optional[SSHSensorClient] = None
self.running = False self.running = False
self.thread: Optional[threading.Thread] = None self.thread: Optional[threading.Thread] = None
self.current_speed = 0 self.current_speed = 0
@ -271,14 +399,26 @@ class FanControlService:
self.last_fans: List[FanReading] = [] self.last_fans: List[FanReading] = []
self.lock = threading.Lock() self.lock = threading.Lock()
# Default config # Default config with new structure
self.config = { self.config = {
"host": "", # IPMI Settings
"username": "", "ipmi_host": "",
"password": "", "ipmi_username": "",
"port": 623, "ipmi_password": "",
"ipmi_port": 623,
# SSH Settings
"ssh_enabled": False,
"ssh_host": None,
"ssh_username": None,
"ssh_password": None,
"ssh_use_key": False,
"ssh_key_file": None,
"ssh_port": 22,
# Fan Control Settings
"enabled": False, "enabled": False,
"interval": 10, # seconds "interval": 10,
"min_speed": 10, "min_speed": 10,
"max_speed": 100, "max_speed": 100,
"fan_curve": [ "fan_curve": [
@ -298,8 +438,9 @@ class FanControlService:
def _load_config(self): def _load_config(self):
"""Load configuration from file.""" """Load configuration from file."""
try: try:
if Path(self.config_path).exists(): config_file = Path(self.config_path)
with open(self.config_path, 'r') as f: if config_file.exists():
with open(config_file) as f:
loaded = json.load(f) loaded = json.load(f)
self.config.update(loaded) self.config.update(loaded)
logger.info(f"Loaded config from {self.config_path}") logger.info(f"Loaded config from {self.config_path}")
@ -309,8 +450,9 @@ class FanControlService:
def _save_config(self): def _save_config(self):
"""Save configuration to file.""" """Save configuration to file."""
try: try:
Path(self.config_path).parent.mkdir(parents=True, exist_ok=True) config_file = Path(self.config_path)
with open(self.config_path, 'w') as f: config_file.parent.mkdir(parents=True, exist_ok=True)
with open(config_file, 'w') as f:
json.dump(self.config, f, indent=2) json.dump(self.config, f, indent=2)
logger.info(f"Saved config to {self.config_path}") logger.info(f"Saved config to {self.config_path}")
except Exception as e: except Exception as e:
@ -321,40 +463,70 @@ class FanControlService:
self.config.update(kwargs) self.config.update(kwargs)
self._save_config() self._save_config()
# Reinitialize controller if connection params changed # Reinitialize controllers if connection params changed
if any(k in kwargs for k in ['host', 'username', 'password', 'port']): ipmi_changed = any(k in kwargs for k in ['ipmi_host', 'ipmi_username', 'ipmi_password', 'ipmi_port'])
self._init_controller() ssh_changed = any(k in kwargs for k in ['ssh_host', 'ssh_username', 'ssh_password', 'ssh_key_file', 'ssh_port'])
def _init_controller(self): if ipmi_changed:
self._init_ipmi_controller()
if ssh_changed or (kwargs.get('ssh_enabled') and not self.ssh_client):
self._init_ssh_client()
def _init_ipmi_controller(self) -> bool:
"""Initialize the IPMI controller.""" """Initialize the IPMI controller."""
if not all([self.config.get('host'), self.config.get('username'), self.config.get('password')]): if not all([self.config.get('ipmi_host'), self.config.get('ipmi_username')]):
logger.warning("Missing IPMI credentials") logger.warning("Missing IPMI credentials")
return False return False
self.controller = IPMIFanController( self.controller = IPMIFanController(
host=self.config['host'], host=self.config['ipmi_host'],
username=self.config['username'], username=self.config['ipmi_username'],
password=self.config['password'], password=self.config.get('ipmi_password', ''),
port=self.config.get('port', 623) port=self.config.get('ipmi_port', 623)
) )
if self.controller.test_connection(): if self.controller.test_connection():
logger.info(f"Connected to IPMI at {self.config['host']}") logger.info(f"Connected to IPMI at {self.config['ipmi_host']}")
return True return True
else: else:
logger.error(f"Failed to connect to IPMI at {self.config['host']}") logger.error(f"Failed to connect to IPMI at {self.config['ipmi_host']}")
self.controller = None self.controller = None
return False return False
def start(self): def _init_ssh_client(self) -> bool:
"""Initialize SSH client for lm-sensors."""
if not self.config.get('ssh_enabled'):
return False
host = self.config.get('ssh_host') or self.config.get('ipmi_host')
username = self.config.get('ssh_username') or self.config.get('ipmi_username')
if not all([host, username]):
logger.warning("Missing SSH credentials")
return False
self.ssh_client = SSHSensorClient(
host=host,
username=username,
password=self.config.get('ssh_password') or self.config.get('ipmi_password'),
key_file=self.config.get('ssh_key_file'),
port=self.config.get('ssh_port', 22)
)
return True
def start(self) -> bool:
"""Start the fan control service.""" """Start the fan control service."""
if self.running: if self.running:
return return True
if not self._init_controller(): if not self._init_ipmi_controller():
logger.error("Cannot start service - IPMI connection failed") logger.error("Cannot start service - IPMI connection failed")
return False return False
if self.config.get('ssh_enabled'):
self._init_ssh_client()
self.running = True self.running = True
self.thread = threading.Thread(target=self._control_loop, daemon=True) self.thread = threading.Thread(target=self._control_loop, daemon=True)
self.thread.start() self.thread.start()
@ -371,6 +543,9 @@ class FanControlService:
if self.controller: if self.controller:
self.controller.disable_manual_fan_control() self.controller.disable_manual_fan_control()
if self.ssh_client:
self.ssh_client.disconnect()
logger.info("Fan control service stopped") logger.info("Fan control service stopped")
def _control_loop(self): def _control_loop(self):
@ -385,16 +560,17 @@ class FanControlService:
time.sleep(1) time.sleep(1)
continue continue
# Ensure controllers are healthy
if not self.controller or not self.controller.is_healthy(): if not self.controller or not self.controller.is_healthy():
logger.warning("Controller unhealthy, attempting reconnect...") logger.warning("IPMI controller unhealthy, attempting reconnect...")
if not self._init_controller(): if not self._init_ipmi_controller():
time.sleep(30) time.sleep(30)
continue continue
self.controller.enable_manual_fan_control() self.controller.enable_manual_fan_control()
# Get sensor data # Get temperature data
temps = self.controller.get_temperatures() temps = self._get_temperatures()
fans = self.controller.get_fan_speeds() fans = self.controller.get_fan_speeds() if self.controller else []
with self.lock: with self.lock:
self.last_temps = temps self.last_temps = temps
@ -406,7 +582,9 @@ class FanControlService:
continue continue
# Check for panic temperature # Check for panic temperature
max_temp = max((t.value for t in temps if t.location.startswith('cpu')), default=0) cpu_temps = [t for t in temps if t.location.startswith('cpu')]
max_temp = max((t.value for t in cpu_temps), default=0)
if max_temp >= self.config.get('panic_temp', 85): if max_temp >= self.config.get('panic_temp', 85):
self.target_speed = self.config.get('panic_speed', 100) self.target_speed = self.config.get('panic_speed', 100)
logger.warning(f"PANIC MODE: CPU temp {max_temp}°C, setting fans to {self.target_speed}%") logger.warning(f"PANIC MODE: CPU temp {max_temp}°C, setting fans to {self.target_speed}%")
@ -431,10 +609,27 @@ class FanControlService:
logger.error(f"Control loop error: {e}") logger.error(f"Control loop error: {e}")
time.sleep(10) time.sleep(10)
def _get_temperatures(self) -> List[TemperatureReading]:
"""Get temperatures from IPMI and/or SSH lm-sensors."""
temps = []
# Try IPMI first
if self.controller:
temps = self.controller.get_temperatures()
# Try SSH lm-sensors if enabled and IPMI failed or has no data
if self.config.get('ssh_enabled') and self.ssh_client:
if not temps or self.config.get('prefer_ssh_temps', False):
ssh_temps = self.ssh_client.get_lm_sensors_data()
if ssh_temps:
temps = ssh_temps
return temps
def get_status(self) -> Dict: def get_status(self) -> Dict:
"""Get current status.""" """Get current status."""
with self.lock: with self.lock:
return { status = {
"running": self.running, "running": self.running,
"enabled": self.config.get('enabled', False), "enabled": self.config.get('enabled', False),
"connected": self.controller is not None and self.controller.is_healthy(), "connected": self.controller is not None and self.controller.is_healthy(),
@ -444,10 +639,25 @@ class FanControlService:
"temperatures": [asdict(t) for t in self.last_temps], "temperatures": [asdict(t) for t in self.last_temps],
"fans": [asdict(f) for f in self.last_fans], "fans": [asdict(f) for f in self.last_fans],
"config": { "config": {
k: v for k, v in self.config.items() # IPMI
if k != 'password' # Don't expose password "ipmi_host": self.config.get('ipmi_host'),
"ipmi_port": self.config.get('ipmi_port'),
"ipmi_username": self.config.get('ipmi_username'),
# SSH
"ssh_enabled": self.config.get('ssh_enabled'),
"ssh_host": self.config.get('ssh_host'),
"ssh_port": self.config.get('ssh_port'),
"ssh_username": self.config.get('ssh_username'),
"ssh_use_key": self.config.get('ssh_use_key'),
# Settings
"min_speed": self.config.get('min_speed'),
"max_speed": self.config.get('max_speed'),
"panic_temp": self.config.get('panic_temp'),
"interval": self.config.get('interval'),
"fan_curve": self.config.get('fan_curve')
} }
} }
return status
def set_manual_speed(self, speed: int) -> bool: def set_manual_speed(self, speed: int) -> bool:
"""Set manual fan speed.""" """Set manual fan speed."""
@ -473,16 +683,15 @@ class FanControlService:
self.controller.disable_manual_fan_control() self.controller.disable_manual_fan_control()
# Global service instance # Global service instances
_service: Optional[FanControlService] = None _service_instances: Dict[str, FanControlService] = {}
def get_service(config_path: str = "/etc/ipmi-fan-controller/config.json") -> FanControlService: def get_service(config_path: str = "/etc/ipmi-fan-controller/config.json") -> FanControlService:
"""Get or create the global service instance.""" """Get or create the service instance for a config path."""
global _service if config_path not in _service_instances:
if _service is None: _service_instances[config_path] = FanControlService(config_path)
_service = FanControlService(config_path) return _service_instances[config_path]
return _service
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -2,3 +2,5 @@ fastapi==0.109.0
uvicorn[standard]==0.27.0 uvicorn[standard]==0.27.0
pydantic==2.5.3 pydantic==2.5.3
pydantic-settings==2.1.0 pydantic-settings==2.1.0
python-multipart==0.0.6
paramiko==3.4.0

7
server.log Normal file
View File

@ -0,0 +1,7 @@
/home/devmatrix/projects/fan-controller-v2/web_server.py:49: PydanticDeprecatedSince20: Pydantic V1 style `@validator` validators are deprecated. You should migrate to Pydantic V2 style `@field_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.5/migration/
@validator('new_password')
INFO: Started server process [888347]
INFO: Waiting for application startup.
INFO: Application startup complete.
INFO: Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)
INFO: 127.0.0.1:44244 - "GET /api/status HTTP/1.1" 401 Unauthorized

File diff suppressed because it is too large Load Diff