Add auth, SSH support, and web config
This commit is contained in:
parent
5b9ec7b351
commit
b1c2264cc6
Binary file not shown.
|
|
@ -8,6 +8,7 @@ import time
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import threading
|
import threading
|
||||||
|
import paramiko
|
||||||
from dataclasses import dataclass, asdict
|
from dataclasses import dataclass, asdict
|
||||||
from typing import List, Dict, Optional, Tuple
|
from typing import List, Dict, Optional, Tuple
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
@ -104,7 +105,7 @@ class IPMIFanController:
|
||||||
return False, str(e)
|
return False, str(e)
|
||||||
|
|
||||||
def test_connection(self) -> bool:
|
def test_connection(self) -> bool:
|
||||||
"""Test if we can connect to the server."""
|
"""Test IPMI connection."""
|
||||||
success, _ = self._run_ipmi(["mc", "info"], timeout=10)
|
success, _ = self._run_ipmi(["mc", "info"], timeout=10)
|
||||||
return success
|
return success
|
||||||
|
|
||||||
|
|
@ -257,12 +258,139 @@ class IPMIFanController:
|
||||||
return self.consecutive_failures < self.max_failures
|
return self.consecutive_failures < self.max_failures
|
||||||
|
|
||||||
|
|
||||||
|
class SSHSensorClient:
|
||||||
|
"""SSH client for lm-sensors data collection."""
|
||||||
|
|
||||||
|
def __init__(self, host: str, username: str, password: Optional[str] = None,
|
||||||
|
key_file: Optional[str] = None, port: int = 22):
|
||||||
|
self.host = host
|
||||||
|
self.username = username
|
||||||
|
self.password = password
|
||||||
|
self.key_file = key_file
|
||||||
|
self.port = port
|
||||||
|
self.client: Optional[paramiko.SSHClient] = None
|
||||||
|
self.consecutive_failures = 0
|
||||||
|
|
||||||
|
def connect(self) -> bool:
|
||||||
|
"""Connect to SSH server."""
|
||||||
|
try:
|
||||||
|
self.client = paramiko.SSHClient()
|
||||||
|
self.client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
||||||
|
|
||||||
|
connect_kwargs = {
|
||||||
|
"hostname": self.host,
|
||||||
|
"port": self.port,
|
||||||
|
"username": self.username,
|
||||||
|
"timeout": 10
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.key_file and Path(self.key_file).exists():
|
||||||
|
connect_kwargs["key_filename"] = self.key_file
|
||||||
|
elif self.password:
|
||||||
|
connect_kwargs["password"] = self.password
|
||||||
|
else:
|
||||||
|
logger.error("No authentication method available for SSH")
|
||||||
|
return False
|
||||||
|
|
||||||
|
self.client.connect(**connect_kwargs)
|
||||||
|
logger.info(f"SSH connected to {self.host}")
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"SSH connection failed: {e}")
|
||||||
|
self.consecutive_failures += 1
|
||||||
|
return False
|
||||||
|
|
||||||
|
def disconnect(self):
|
||||||
|
"""Close SSH connection."""
|
||||||
|
if self.client:
|
||||||
|
self.client.close()
|
||||||
|
self.client = None
|
||||||
|
|
||||||
|
def get_lm_sensors_data(self) -> List[TemperatureReading]:
|
||||||
|
"""Get temperature data from lm-sensors."""
|
||||||
|
if not self.client:
|
||||||
|
if not self.connect():
|
||||||
|
return []
|
||||||
|
|
||||||
|
try:
|
||||||
|
stdin, stdout, stderr = self.client.exec_command("sensors -u", timeout=15)
|
||||||
|
output = stdout.read().decode()
|
||||||
|
error = stderr.read().decode()
|
||||||
|
|
||||||
|
if error:
|
||||||
|
logger.warning(f"sensors command stderr: {error}")
|
||||||
|
|
||||||
|
temps = self._parse_sensors_output(output)
|
||||||
|
self.consecutive_failures = 0
|
||||||
|
return temps
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to get sensors data: {e}")
|
||||||
|
self.consecutive_failures += 1
|
||||||
|
self.disconnect() # Force reconnect on next attempt
|
||||||
|
return []
|
||||||
|
|
||||||
|
def _parse_sensors_output(self, output: str) -> List[TemperatureReading]:
|
||||||
|
"""Parse lm-sensors -u output."""
|
||||||
|
temps = []
|
||||||
|
current_chip = ""
|
||||||
|
|
||||||
|
for line in output.splitlines():
|
||||||
|
line = line.strip()
|
||||||
|
|
||||||
|
# New chip section
|
||||||
|
if line.endswith(":") and not line.startswith(" "):
|
||||||
|
current_chip = line.rstrip(":")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Temperature reading
|
||||||
|
if "_input:" in line and "temp" in line.lower():
|
||||||
|
parts = line.split(":")
|
||||||
|
if len(parts) == 2:
|
||||||
|
name = parts[0].strip()
|
||||||
|
try:
|
||||||
|
value = float(parts[1].strip())
|
||||||
|
location = self._classify_sensor_name(name, current_chip)
|
||||||
|
temps.append(TemperatureReading(
|
||||||
|
name=f"{current_chip}/{name}",
|
||||||
|
location=location,
|
||||||
|
value=value,
|
||||||
|
status="ok"
|
||||||
|
))
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return temps
|
||||||
|
|
||||||
|
def _classify_sensor_name(self, name: str, chip: str) -> str:
|
||||||
|
"""Classify sensor location from name."""
|
||||||
|
name_lower = name.lower()
|
||||||
|
chip_lower = chip.lower()
|
||||||
|
|
||||||
|
if "core" in name_lower:
|
||||||
|
if "0" in name or "1" in name:
|
||||||
|
return "cpu1"
|
||||||
|
elif "2" in name or "3" in name:
|
||||||
|
return "cpu2"
|
||||||
|
return "cpu"
|
||||||
|
elif "package" in name_lower:
|
||||||
|
return "cpu"
|
||||||
|
elif "tdie" in name_lower or "tctl" in name_lower:
|
||||||
|
return "cpu"
|
||||||
|
return "other"
|
||||||
|
|
||||||
|
def is_healthy(self) -> bool:
|
||||||
|
return self.consecutive_failures < 3
|
||||||
|
|
||||||
|
|
||||||
class FanControlService:
|
class FanControlService:
|
||||||
"""Background service for automatic fan control."""
|
"""Background service for automatic fan control."""
|
||||||
|
|
||||||
def __init__(self, config_path: str = "/etc/ipmi-fan-controller/config.json"):
|
def __init__(self, config_path: str = "/etc/ipmi-fan-controller/config.json"):
|
||||||
self.config_path = config_path
|
self.config_path = config_path
|
||||||
self.controller: Optional[IPMIFanController] = None
|
self.controller: Optional[IPMIFanController] = None
|
||||||
|
self.ssh_client: Optional[SSHSensorClient] = None
|
||||||
self.running = False
|
self.running = False
|
||||||
self.thread: Optional[threading.Thread] = None
|
self.thread: Optional[threading.Thread] = None
|
||||||
self.current_speed = 0
|
self.current_speed = 0
|
||||||
|
|
@ -271,14 +399,26 @@ class FanControlService:
|
||||||
self.last_fans: List[FanReading] = []
|
self.last_fans: List[FanReading] = []
|
||||||
self.lock = threading.Lock()
|
self.lock = threading.Lock()
|
||||||
|
|
||||||
# Default config
|
# Default config with new structure
|
||||||
self.config = {
|
self.config = {
|
||||||
"host": "",
|
# IPMI Settings
|
||||||
"username": "",
|
"ipmi_host": "",
|
||||||
"password": "",
|
"ipmi_username": "",
|
||||||
"port": 623,
|
"ipmi_password": "",
|
||||||
|
"ipmi_port": 623,
|
||||||
|
|
||||||
|
# SSH Settings
|
||||||
|
"ssh_enabled": False,
|
||||||
|
"ssh_host": None,
|
||||||
|
"ssh_username": None,
|
||||||
|
"ssh_password": None,
|
||||||
|
"ssh_use_key": False,
|
||||||
|
"ssh_key_file": None,
|
||||||
|
"ssh_port": 22,
|
||||||
|
|
||||||
|
# Fan Control Settings
|
||||||
"enabled": False,
|
"enabled": False,
|
||||||
"interval": 10, # seconds
|
"interval": 10,
|
||||||
"min_speed": 10,
|
"min_speed": 10,
|
||||||
"max_speed": 100,
|
"max_speed": 100,
|
||||||
"fan_curve": [
|
"fan_curve": [
|
||||||
|
|
@ -298,8 +438,9 @@ class FanControlService:
|
||||||
def _load_config(self):
|
def _load_config(self):
|
||||||
"""Load configuration from file."""
|
"""Load configuration from file."""
|
||||||
try:
|
try:
|
||||||
if Path(self.config_path).exists():
|
config_file = Path(self.config_path)
|
||||||
with open(self.config_path, 'r') as f:
|
if config_file.exists():
|
||||||
|
with open(config_file) as f:
|
||||||
loaded = json.load(f)
|
loaded = json.load(f)
|
||||||
self.config.update(loaded)
|
self.config.update(loaded)
|
||||||
logger.info(f"Loaded config from {self.config_path}")
|
logger.info(f"Loaded config from {self.config_path}")
|
||||||
|
|
@ -309,8 +450,9 @@ class FanControlService:
|
||||||
def _save_config(self):
|
def _save_config(self):
|
||||||
"""Save configuration to file."""
|
"""Save configuration to file."""
|
||||||
try:
|
try:
|
||||||
Path(self.config_path).parent.mkdir(parents=True, exist_ok=True)
|
config_file = Path(self.config_path)
|
||||||
with open(self.config_path, 'w') as f:
|
config_file.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
with open(config_file, 'w') as f:
|
||||||
json.dump(self.config, f, indent=2)
|
json.dump(self.config, f, indent=2)
|
||||||
logger.info(f"Saved config to {self.config_path}")
|
logger.info(f"Saved config to {self.config_path}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
@ -321,40 +463,70 @@ class FanControlService:
|
||||||
self.config.update(kwargs)
|
self.config.update(kwargs)
|
||||||
self._save_config()
|
self._save_config()
|
||||||
|
|
||||||
# Reinitialize controller if connection params changed
|
# Reinitialize controllers if connection params changed
|
||||||
if any(k in kwargs for k in ['host', 'username', 'password', 'port']):
|
ipmi_changed = any(k in kwargs for k in ['ipmi_host', 'ipmi_username', 'ipmi_password', 'ipmi_port'])
|
||||||
self._init_controller()
|
ssh_changed = any(k in kwargs for k in ['ssh_host', 'ssh_username', 'ssh_password', 'ssh_key_file', 'ssh_port'])
|
||||||
|
|
||||||
def _init_controller(self):
|
if ipmi_changed:
|
||||||
|
self._init_ipmi_controller()
|
||||||
|
if ssh_changed or (kwargs.get('ssh_enabled') and not self.ssh_client):
|
||||||
|
self._init_ssh_client()
|
||||||
|
|
||||||
|
def _init_ipmi_controller(self) -> bool:
|
||||||
"""Initialize the IPMI controller."""
|
"""Initialize the IPMI controller."""
|
||||||
if not all([self.config.get('host'), self.config.get('username'), self.config.get('password')]):
|
if not all([self.config.get('ipmi_host'), self.config.get('ipmi_username')]):
|
||||||
logger.warning("Missing IPMI credentials")
|
logger.warning("Missing IPMI credentials")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
self.controller = IPMIFanController(
|
self.controller = IPMIFanController(
|
||||||
host=self.config['host'],
|
host=self.config['ipmi_host'],
|
||||||
username=self.config['username'],
|
username=self.config['ipmi_username'],
|
||||||
password=self.config['password'],
|
password=self.config.get('ipmi_password', ''),
|
||||||
port=self.config.get('port', 623)
|
port=self.config.get('ipmi_port', 623)
|
||||||
)
|
)
|
||||||
|
|
||||||
if self.controller.test_connection():
|
if self.controller.test_connection():
|
||||||
logger.info(f"Connected to IPMI at {self.config['host']}")
|
logger.info(f"Connected to IPMI at {self.config['ipmi_host']}")
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
logger.error(f"Failed to connect to IPMI at {self.config['host']}")
|
logger.error(f"Failed to connect to IPMI at {self.config['ipmi_host']}")
|
||||||
self.controller = None
|
self.controller = None
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def start(self):
|
def _init_ssh_client(self) -> bool:
|
||||||
|
"""Initialize SSH client for lm-sensors."""
|
||||||
|
if not self.config.get('ssh_enabled'):
|
||||||
|
return False
|
||||||
|
|
||||||
|
host = self.config.get('ssh_host') or self.config.get('ipmi_host')
|
||||||
|
username = self.config.get('ssh_username') or self.config.get('ipmi_username')
|
||||||
|
|
||||||
|
if not all([host, username]):
|
||||||
|
logger.warning("Missing SSH credentials")
|
||||||
|
return False
|
||||||
|
|
||||||
|
self.ssh_client = SSHSensorClient(
|
||||||
|
host=host,
|
||||||
|
username=username,
|
||||||
|
password=self.config.get('ssh_password') or self.config.get('ipmi_password'),
|
||||||
|
key_file=self.config.get('ssh_key_file'),
|
||||||
|
port=self.config.get('ssh_port', 22)
|
||||||
|
)
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def start(self) -> bool:
|
||||||
"""Start the fan control service."""
|
"""Start the fan control service."""
|
||||||
if self.running:
|
if self.running:
|
||||||
return
|
return True
|
||||||
|
|
||||||
if not self._init_controller():
|
if not self._init_ipmi_controller():
|
||||||
logger.error("Cannot start service - IPMI connection failed")
|
logger.error("Cannot start service - IPMI connection failed")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
if self.config.get('ssh_enabled'):
|
||||||
|
self._init_ssh_client()
|
||||||
|
|
||||||
self.running = True
|
self.running = True
|
||||||
self.thread = threading.Thread(target=self._control_loop, daemon=True)
|
self.thread = threading.Thread(target=self._control_loop, daemon=True)
|
||||||
self.thread.start()
|
self.thread.start()
|
||||||
|
|
@ -371,6 +543,9 @@ class FanControlService:
|
||||||
if self.controller:
|
if self.controller:
|
||||||
self.controller.disable_manual_fan_control()
|
self.controller.disable_manual_fan_control()
|
||||||
|
|
||||||
|
if self.ssh_client:
|
||||||
|
self.ssh_client.disconnect()
|
||||||
|
|
||||||
logger.info("Fan control service stopped")
|
logger.info("Fan control service stopped")
|
||||||
|
|
||||||
def _control_loop(self):
|
def _control_loop(self):
|
||||||
|
|
@ -385,16 +560,17 @@ class FanControlService:
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# Ensure controllers are healthy
|
||||||
if not self.controller or not self.controller.is_healthy():
|
if not self.controller or not self.controller.is_healthy():
|
||||||
logger.warning("Controller unhealthy, attempting reconnect...")
|
logger.warning("IPMI controller unhealthy, attempting reconnect...")
|
||||||
if not self._init_controller():
|
if not self._init_ipmi_controller():
|
||||||
time.sleep(30)
|
time.sleep(30)
|
||||||
continue
|
continue
|
||||||
self.controller.enable_manual_fan_control()
|
self.controller.enable_manual_fan_control()
|
||||||
|
|
||||||
# Get sensor data
|
# Get temperature data
|
||||||
temps = self.controller.get_temperatures()
|
temps = self._get_temperatures()
|
||||||
fans = self.controller.get_fan_speeds()
|
fans = self.controller.get_fan_speeds() if self.controller else []
|
||||||
|
|
||||||
with self.lock:
|
with self.lock:
|
||||||
self.last_temps = temps
|
self.last_temps = temps
|
||||||
|
|
@ -406,7 +582,9 @@ class FanControlService:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Check for panic temperature
|
# Check for panic temperature
|
||||||
max_temp = max((t.value for t in temps if t.location.startswith('cpu')), default=0)
|
cpu_temps = [t for t in temps if t.location.startswith('cpu')]
|
||||||
|
max_temp = max((t.value for t in cpu_temps), default=0)
|
||||||
|
|
||||||
if max_temp >= self.config.get('panic_temp', 85):
|
if max_temp >= self.config.get('panic_temp', 85):
|
||||||
self.target_speed = self.config.get('panic_speed', 100)
|
self.target_speed = self.config.get('panic_speed', 100)
|
||||||
logger.warning(f"PANIC MODE: CPU temp {max_temp}°C, setting fans to {self.target_speed}%")
|
logger.warning(f"PANIC MODE: CPU temp {max_temp}°C, setting fans to {self.target_speed}%")
|
||||||
|
|
@ -431,10 +609,27 @@ class FanControlService:
|
||||||
logger.error(f"Control loop error: {e}")
|
logger.error(f"Control loop error: {e}")
|
||||||
time.sleep(10)
|
time.sleep(10)
|
||||||
|
|
||||||
|
def _get_temperatures(self) -> List[TemperatureReading]:
|
||||||
|
"""Get temperatures from IPMI and/or SSH lm-sensors."""
|
||||||
|
temps = []
|
||||||
|
|
||||||
|
# Try IPMI first
|
||||||
|
if self.controller:
|
||||||
|
temps = self.controller.get_temperatures()
|
||||||
|
|
||||||
|
# Try SSH lm-sensors if enabled and IPMI failed or has no data
|
||||||
|
if self.config.get('ssh_enabled') and self.ssh_client:
|
||||||
|
if not temps or self.config.get('prefer_ssh_temps', False):
|
||||||
|
ssh_temps = self.ssh_client.get_lm_sensors_data()
|
||||||
|
if ssh_temps:
|
||||||
|
temps = ssh_temps
|
||||||
|
|
||||||
|
return temps
|
||||||
|
|
||||||
def get_status(self) -> Dict:
|
def get_status(self) -> Dict:
|
||||||
"""Get current status."""
|
"""Get current status."""
|
||||||
with self.lock:
|
with self.lock:
|
||||||
return {
|
status = {
|
||||||
"running": self.running,
|
"running": self.running,
|
||||||
"enabled": self.config.get('enabled', False),
|
"enabled": self.config.get('enabled', False),
|
||||||
"connected": self.controller is not None and self.controller.is_healthy(),
|
"connected": self.controller is not None and self.controller.is_healthy(),
|
||||||
|
|
@ -444,10 +639,25 @@ class FanControlService:
|
||||||
"temperatures": [asdict(t) for t in self.last_temps],
|
"temperatures": [asdict(t) for t in self.last_temps],
|
||||||
"fans": [asdict(f) for f in self.last_fans],
|
"fans": [asdict(f) for f in self.last_fans],
|
||||||
"config": {
|
"config": {
|
||||||
k: v for k, v in self.config.items()
|
# IPMI
|
||||||
if k != 'password' # Don't expose password
|
"ipmi_host": self.config.get('ipmi_host'),
|
||||||
|
"ipmi_port": self.config.get('ipmi_port'),
|
||||||
|
"ipmi_username": self.config.get('ipmi_username'),
|
||||||
|
# SSH
|
||||||
|
"ssh_enabled": self.config.get('ssh_enabled'),
|
||||||
|
"ssh_host": self.config.get('ssh_host'),
|
||||||
|
"ssh_port": self.config.get('ssh_port'),
|
||||||
|
"ssh_username": self.config.get('ssh_username'),
|
||||||
|
"ssh_use_key": self.config.get('ssh_use_key'),
|
||||||
|
# Settings
|
||||||
|
"min_speed": self.config.get('min_speed'),
|
||||||
|
"max_speed": self.config.get('max_speed'),
|
||||||
|
"panic_temp": self.config.get('panic_temp'),
|
||||||
|
"interval": self.config.get('interval'),
|
||||||
|
"fan_curve": self.config.get('fan_curve')
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return status
|
||||||
|
|
||||||
def set_manual_speed(self, speed: int) -> bool:
|
def set_manual_speed(self, speed: int) -> bool:
|
||||||
"""Set manual fan speed."""
|
"""Set manual fan speed."""
|
||||||
|
|
@ -473,16 +683,15 @@ class FanControlService:
|
||||||
self.controller.disable_manual_fan_control()
|
self.controller.disable_manual_fan_control()
|
||||||
|
|
||||||
|
|
||||||
# Global service instance
|
# Global service instances
|
||||||
_service: Optional[FanControlService] = None
|
_service_instances: Dict[str, FanControlService] = {}
|
||||||
|
|
||||||
|
|
||||||
def get_service(config_path: str = "/etc/ipmi-fan-controller/config.json") -> FanControlService:
|
def get_service(config_path: str = "/etc/ipmi-fan-controller/config.json") -> FanControlService:
|
||||||
"""Get or create the global service instance."""
|
"""Get or create the service instance for a config path."""
|
||||||
global _service
|
if config_path not in _service_instances:
|
||||||
if _service is None:
|
_service_instances[config_path] = FanControlService(config_path)
|
||||||
_service = FanControlService(config_path)
|
return _service_instances[config_path]
|
||||||
return _service
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
||||||
|
|
@ -2,3 +2,5 @@ fastapi==0.109.0
|
||||||
uvicorn[standard]==0.27.0
|
uvicorn[standard]==0.27.0
|
||||||
pydantic==2.5.3
|
pydantic==2.5.3
|
||||||
pydantic-settings==2.1.0
|
pydantic-settings==2.1.0
|
||||||
|
python-multipart==0.0.6
|
||||||
|
paramiko==3.4.0
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,7 @@
|
||||||
|
/home/devmatrix/projects/fan-controller-v2/web_server.py:49: PydanticDeprecatedSince20: Pydantic V1 style `@validator` validators are deprecated. You should migrate to Pydantic V2 style `@field_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.5/migration/
|
||||||
|
@validator('new_password')
|
||||||
|
INFO: Started server process [888347]
|
||||||
|
INFO: Waiting for application startup.
|
||||||
|
INFO: Application startup complete.
|
||||||
|
INFO: Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)
|
||||||
|
INFO: 127.0.0.1:44244 - "GET /api/status HTTP/1.1" 401 Unauthorized
|
||||||
1147
web_server.py
1147
web_server.py
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue