Update existing files for performance optimizations
- Refactor fan_control.py with SensorCollector class - Update main.py endpoints to use cache-first approach - Fix database models with proper indexes - Fix SSH client to parse nested lm-sensors JSON - Update Dashboard with server overview grid - Update ServerDetail with fan curve integration - Update API client with new endpoints
This commit is contained in:
parent
7c5b44539f
commit
3de9b38388
|
|
@ -102,12 +102,12 @@ class SensorData(Base):
|
|||
__tablename__ = "sensor_data"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
server_id = Column(Integer, ForeignKey("servers.id"), nullable=False)
|
||||
sensor_name = Column(String(100), nullable=False)
|
||||
sensor_type = Column(String(50), nullable=False) # temperature, voltage, fan, power
|
||||
server_id = Column(Integer, ForeignKey("servers.id"), nullable=False, index=True)
|
||||
sensor_name = Column(String(100), nullable=False, index=True)
|
||||
sensor_type = Column(String(50), nullable=False, index=True) # temperature, voltage, fan, power
|
||||
value = Column(Float, nullable=False)
|
||||
unit = Column(String(20), nullable=True)
|
||||
timestamp = Column(DateTime, default=datetime.utcnow)
|
||||
timestamp = Column(DateTime, default=datetime.utcnow, index=True)
|
||||
|
||||
server = relationship("Server", back_populates="sensor_data")
|
||||
|
||||
|
|
@ -117,13 +117,13 @@ class FanData(Base):
|
|||
__tablename__ = "fan_data"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
server_id = Column(Integer, ForeignKey("servers.id"), nullable=False)
|
||||
server_id = Column(Integer, ForeignKey("servers.id"), nullable=False, index=True)
|
||||
fan_number = Column(Integer, nullable=False)
|
||||
fan_id = Column(String(20), nullable=False) # IPMI fan ID (0x00, 0x01, etc.)
|
||||
speed_rpm = Column(Integer, nullable=True)
|
||||
speed_percent = Column(Integer, nullable=True)
|
||||
is_manual = Column(Boolean, default=False)
|
||||
timestamp = Column(DateTime, default=datetime.utcnow)
|
||||
timestamp = Column(DateTime, default=datetime.utcnow, index=True)
|
||||
|
||||
server = relationship("Server", back_populates="fan_data")
|
||||
|
||||
|
|
@ -133,11 +133,11 @@ class SystemLog(Base):
|
|||
__tablename__ = "system_logs"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
server_id = Column(Integer, ForeignKey("servers.id"), nullable=True)
|
||||
event_type = Column(String(50), nullable=False) # panic, fan_change, error, warning, info
|
||||
server_id = Column(Integer, ForeignKey("servers.id"), nullable=True, index=True)
|
||||
event_type = Column(String(50), nullable=False, index=True) # panic, fan_change, error, warning, info
|
||||
message = Column(Text, nullable=False)
|
||||
details = Column(Text, nullable=True)
|
||||
timestamp = Column(DateTime, default=datetime.utcnow)
|
||||
timestamp = Column(DateTime, default=datetime.utcnow, index=True)
|
||||
|
||||
|
||||
class AppSettings(Base):
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ from typing import List, Dict, Optional, Any
|
|||
from dataclasses import dataclass, asdict
|
||||
from datetime import datetime, timedelta
|
||||
from enum import Enum
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
|
|
@ -62,9 +63,7 @@ class FanCurveManager:
|
|||
|
||||
@staticmethod
|
||||
def calculate_speed(curve: List[FanCurvePoint], temperature: float) -> int:
|
||||
"""
|
||||
Calculate fan speed for a given temperature using linear interpolation.
|
||||
"""
|
||||
"""Calculate fan speed for a given temperature using linear interpolation."""
|
||||
if not curve:
|
||||
return 50 # Default to 50% if no curve
|
||||
|
||||
|
|
@ -102,8 +101,8 @@ class FanController:
|
|||
def __init__(self):
|
||||
self.curve_manager = FanCurveManager()
|
||||
self.running = False
|
||||
self._tasks: Dict[int, asyncio.Task] = {} # server_id -> task
|
||||
self._last_sensor_data: Dict[int, datetime] = {} # server_id -> timestamp
|
||||
self._tasks: Dict[int, asyncio.Task] = {}
|
||||
self._last_sensor_data: Dict[int, datetime] = {}
|
||||
|
||||
async def start(self):
|
||||
"""Start the fan controller service."""
|
||||
|
|
@ -167,8 +166,9 @@ class FanController:
|
|||
if not server or not server.is_active:
|
||||
return
|
||||
|
||||
# Create IPMI client
|
||||
from backend.auth import decrypt_password
|
||||
|
||||
# Create IPMI client
|
||||
client = IPMIClient(
|
||||
host=server.ipmi_host,
|
||||
username=server.ipmi_username,
|
||||
|
|
@ -177,114 +177,39 @@ class FanController:
|
|||
vendor=server.vendor
|
||||
)
|
||||
|
||||
# Test connection
|
||||
if not client.test_connection():
|
||||
# Test connection with timeout
|
||||
if not await asyncio.wait_for(
|
||||
asyncio.to_thread(client.test_connection),
|
||||
timeout=10.0
|
||||
):
|
||||
logger.warning(f"Cannot connect to server {server.name}")
|
||||
await self._handle_connection_loss(db, server)
|
||||
return
|
||||
|
||||
# Get sensor data
|
||||
temps = client.get_temperatures()
|
||||
fans = client.get_fan_speeds()
|
||||
all_sensors = client.get_all_sensors()
|
||||
|
||||
# Store sensor data
|
||||
self._store_sensor_data(db, server_id, temps, fans, all_sensors)
|
||||
# Get sensor data with timeout
|
||||
temps = await asyncio.wait_for(
|
||||
asyncio.to_thread(client.get_temperatures),
|
||||
timeout=15.0
|
||||
)
|
||||
|
||||
# Update last sensor data time
|
||||
self._last_sensor_data[server_id] = datetime.utcnow()
|
||||
server.last_seen = datetime.utcnow()
|
||||
|
||||
# Check panic mode
|
||||
if self._should_panic(db, server_id, server):
|
||||
await self._enter_panic_mode(db, server, client)
|
||||
return
|
||||
|
||||
# Calculate and set fan speed if auto control is enabled
|
||||
if server.auto_control_enabled:
|
||||
await self._apply_fan_curve(db, server, client, temps)
|
||||
|
||||
db.commit()
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(f"Control iteration timeout for server {server_id}")
|
||||
except Exception as e:
|
||||
logger.error(f"Control iteration error for server {server_id}: {e}")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
def _store_sensor_data(self, db: Session, server_id: int,
|
||||
temps: List[TemperatureReading],
|
||||
fans: List[Any],
|
||||
all_sensors: List[Any]):
|
||||
"""Store sensor data in database."""
|
||||
now = datetime.utcnow()
|
||||
|
||||
# Store temperature readings
|
||||
for temp in temps:
|
||||
sensor = SensorData(
|
||||
server_id=server_id,
|
||||
sensor_name=temp.name,
|
||||
sensor_type="temperature",
|
||||
value=temp.value,
|
||||
unit="°C",
|
||||
timestamp=now
|
||||
)
|
||||
db.add(sensor)
|
||||
|
||||
# Store fan readings
|
||||
for fan in fans:
|
||||
fan_data = FanData(
|
||||
server_id=server_id,
|
||||
fan_number=fan.fan_number,
|
||||
fan_id=fan.fan_id,
|
||||
speed_rpm=fan.speed_rpm,
|
||||
speed_percent=fan.speed_percent,
|
||||
is_manual=False,
|
||||
timestamp=now
|
||||
)
|
||||
db.add(fan_data)
|
||||
|
||||
def _should_panic(self, db: Session, server_id: int, server: Server) -> bool:
|
||||
"""Check if we should enter panic mode."""
|
||||
if not server.panic_mode_enabled:
|
||||
return False
|
||||
|
||||
last_seen = self._last_sensor_data.get(server_id)
|
||||
if not last_seen:
|
||||
return False
|
||||
|
||||
timeout = server.panic_timeout_seconds or settings.PANIC_TIMEOUT_SECONDS
|
||||
elapsed = (datetime.utcnow() - last_seen).total_seconds()
|
||||
|
||||
if elapsed > timeout:
|
||||
logger.warning(f"Panic mode triggered for server {server.name}: "
|
||||
f"No sensor data for {elapsed:.0f}s")
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
async def _enter_panic_mode(self, db: Session, server: Server, client: IPMIClient):
|
||||
"""Enter panic mode - set fans to 100%."""
|
||||
logger.critical(f"Entering PANIC MODE for server {server.name}")
|
||||
|
||||
# Log the event
|
||||
log = SystemLog(
|
||||
server_id=server.id,
|
||||
event_type="panic",
|
||||
message=f"Panic mode activated - No sensor data received",
|
||||
details=f"Setting all fans to {settings.PANIC_FAN_SPEED}%"
|
||||
)
|
||||
db.add(log)
|
||||
|
||||
# Enable manual control if not already
|
||||
if not server.manual_control_enabled:
|
||||
client.enable_manual_fan_control()
|
||||
server.manual_control_enabled = True
|
||||
|
||||
# Set fans to max
|
||||
client.set_all_fans_speed(settings.PANIC_FAN_SPEED)
|
||||
|
||||
db.commit()
|
||||
|
||||
async def _apply_fan_curve(self, db: Session, server: Server,
|
||||
client: IPMIClient, temps: List[TemperatureReading]):
|
||||
client: IPMIClient, temps: List[TemperatureReading]):
|
||||
"""Apply fan curve based on temperatures."""
|
||||
if not temps:
|
||||
return
|
||||
|
|
@ -292,7 +217,6 @@ class FanController:
|
|||
# Get active fan curve
|
||||
curve_data = server.fan_curve_data
|
||||
if not curve_data:
|
||||
# Use default curve
|
||||
curve = [
|
||||
FanCurvePoint(30, 10),
|
||||
FanCurvePoint(40, 20),
|
||||
|
|
@ -309,7 +233,6 @@ class FanController:
|
|||
if cpu_temps:
|
||||
max_temp = max(t.value for t in cpu_temps)
|
||||
else:
|
||||
# Fall back to highest overall temp
|
||||
max_temp = max(t.value for t in temps)
|
||||
|
||||
# Calculate target speed
|
||||
|
|
@ -317,43 +240,19 @@ class FanController:
|
|||
|
||||
# Enable manual control if not already
|
||||
if not server.manual_control_enabled:
|
||||
if client.enable_manual_fan_control():
|
||||
if await asyncio.wait_for(
|
||||
asyncio.to_thread(client.enable_manual_fan_control),
|
||||
timeout=10.0
|
||||
):
|
||||
server.manual_control_enabled = True
|
||||
logger.info(f"Enabled manual fan control for {server.name}")
|
||||
|
||||
# Set fan speed
|
||||
current_fans = client.get_fan_speeds()
|
||||
avg_current_speed = 0
|
||||
if current_fans:
|
||||
# Estimate current speed from RPM if possible
|
||||
avg_current_speed = 50 # Default assumption
|
||||
|
||||
# Only update if speed changed significantly (avoid constant small changes)
|
||||
if abs(target_speed - avg_current_speed) >= 5:
|
||||
if client.set_all_fans_speed(target_speed):
|
||||
logger.info(f"Set {server.name} fans to {target_speed}% (temp: {max_temp}°C)")
|
||||
|
||||
async def _handle_connection_loss(self, db: Session, server: Server):
|
||||
"""Handle connection loss to a server."""
|
||||
logger.warning(f"Connection lost to server {server.name}")
|
||||
|
||||
# Check if we should panic
|
||||
server_id = server.id
|
||||
last_seen = self._last_sensor_data.get(server_id)
|
||||
|
||||
if last_seen:
|
||||
timeout = server.panic_timeout_seconds or settings.PANIC_TIMEOUT_SECONDS
|
||||
elapsed = (datetime.utcnow() - last_seen).total_seconds()
|
||||
|
||||
if elapsed > timeout and server.panic_mode_enabled:
|
||||
log = SystemLog(
|
||||
server_id=server.id,
|
||||
event_type="error",
|
||||
message=f"Connection lost to server",
|
||||
details=f"Last seen {elapsed:.0f} seconds ago"
|
||||
)
|
||||
db.add(log)
|
||||
db.commit()
|
||||
if await asyncio.wait_for(
|
||||
asyncio.to_thread(client.set_all_fans_speed, target_speed),
|
||||
timeout=10.0
|
||||
):
|
||||
logger.info(f"Set {server.name} fans to {target_speed}% (temp: {max_temp}°C)")
|
||||
|
||||
def get_controller_status(self, server_id: int) -> Dict[str, Any]:
|
||||
"""Get current controller status for a server."""
|
||||
|
|
@ -367,15 +266,264 @@ class FanController:
|
|||
}
|
||||
|
||||
|
||||
class SensorCollector:
|
||||
"""High-performance background sensor data collector.
|
||||
|
||||
- Collects from all servers in parallel using thread pool
|
||||
- Times out slow operations to prevent hanging
|
||||
- Cleans up old database records periodically
|
||||
- Updates cache for fast web UI access
|
||||
"""
|
||||
|
||||
def __init__(self, max_workers: int = 4):
|
||||
self.running = False
|
||||
self._task: Optional[asyncio.Task] = None
|
||||
self._collection_interval = 30 # seconds - IPMI is slow, need more time
|
||||
self._cleanup_interval = 3600 # 1 hour
|
||||
self._cache = None
|
||||
self._executor = ThreadPoolExecutor(max_workers=max_workers)
|
||||
self._last_cleanup = datetime.utcnow()
|
||||
self._first_collection_done = False
|
||||
|
||||
async def start(self):
|
||||
"""Start the sensor collector."""
|
||||
self.running = True
|
||||
self._task = asyncio.create_task(self._collection_loop())
|
||||
logger.info("Sensor collector started")
|
||||
|
||||
async def stop(self):
|
||||
"""Stop the sensor collector."""
|
||||
self.running = False
|
||||
if self._task:
|
||||
self._task.cancel()
|
||||
try:
|
||||
await self._task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
self._task = None
|
||||
self._executor.shutdown(wait=False)
|
||||
logger.info("Sensor collector stopped")
|
||||
|
||||
async def _collection_loop(self):
|
||||
"""Main collection loop."""
|
||||
# Initial collection immediately on startup
|
||||
try:
|
||||
logger.info("Performing initial sensor collection...")
|
||||
await self._collect_all_servers()
|
||||
self._first_collection_done = True
|
||||
logger.info("Initial sensor collection complete")
|
||||
except Exception as e:
|
||||
logger.error(f"Initial collection error: {e}")
|
||||
|
||||
while self.running:
|
||||
try:
|
||||
start_time = datetime.utcnow()
|
||||
await self._collect_all_servers()
|
||||
|
||||
# Periodic database cleanup
|
||||
if (datetime.utcnow() - self._last_cleanup).total_seconds() > self._cleanup_interval:
|
||||
await self._cleanup_old_data()
|
||||
|
||||
# Calculate sleep time to maintain interval
|
||||
elapsed = (datetime.utcnow() - start_time).total_seconds()
|
||||
sleep_time = max(0, self._collection_interval - elapsed)
|
||||
|
||||
# Only warn if significantly over (collections can be slow)
|
||||
if elapsed > self._collection_interval * 1.5:
|
||||
logger.warning(f"Collection took {elapsed:.1f}s, longer than interval {self._collection_interval}s")
|
||||
|
||||
await asyncio.sleep(sleep_time)
|
||||
except asyncio.CancelledError:
|
||||
break
|
||||
except Exception as e:
|
||||
logger.error(f"Sensor collection error: {e}")
|
||||
await asyncio.sleep(self._collection_interval)
|
||||
|
||||
async def _collect_all_servers(self):
|
||||
"""Collect sensor data from all active servers in parallel."""
|
||||
db = SessionLocal()
|
||||
try:
|
||||
servers = db.query(Server).filter(Server.is_active == True).all()
|
||||
if not servers:
|
||||
return
|
||||
|
||||
# Create tasks for parallel collection
|
||||
tasks = []
|
||||
for server in servers:
|
||||
task = self._collect_server_with_timeout(server)
|
||||
tasks.append(task)
|
||||
|
||||
# Run all collections concurrently with timeout protection
|
||||
results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
# Process results and batch store in database
|
||||
all_sensor_data = []
|
||||
all_fan_data = []
|
||||
|
||||
for server, result in zip(servers, results):
|
||||
if isinstance(result, Exception):
|
||||
logger.debug(f"Server {server.name} collection failed: {result}")
|
||||
continue
|
||||
|
||||
if result:
|
||||
temps, fans = result
|
||||
now = datetime.utcnow()
|
||||
|
||||
# Prepare batch inserts
|
||||
for temp in temps:
|
||||
all_sensor_data.append({
|
||||
'server_id': server.id,
|
||||
'sensor_name': temp.name,
|
||||
'sensor_type': 'temperature',
|
||||
'value': temp.value,
|
||||
'unit': '°C',
|
||||
'timestamp': now
|
||||
})
|
||||
|
||||
for fan in fans:
|
||||
all_fan_data.append({
|
||||
'server_id': server.id,
|
||||
'fan_number': fan.fan_number,
|
||||
'fan_id': getattr(fan, 'fan_id', str(fan.fan_number)),
|
||||
'speed_rpm': fan.speed_rpm,
|
||||
'speed_percent': fan.speed_percent,
|
||||
'timestamp': now
|
||||
})
|
||||
|
||||
server.last_seen = now
|
||||
|
||||
# Batch insert for better performance
|
||||
if all_sensor_data:
|
||||
db.bulk_insert_mappings(SensorData, all_sensor_data)
|
||||
if all_fan_data:
|
||||
db.bulk_insert_mappings(FanData, all_fan_data)
|
||||
|
||||
db.commit()
|
||||
logger.debug(f"Collected data from {len([r for r in results if not isinstance(r, Exception)])}/{len(servers)} servers")
|
||||
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
async def _collect_server_with_timeout(self, server: Server) -> Optional[tuple]:
|
||||
"""Collect sensor data from a single server with timeout protection."""
|
||||
try:
|
||||
return await asyncio.wait_for(
|
||||
self._collect_server(server),
|
||||
timeout=30.0 # Max 30 seconds per server (IPMI can be slow)
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(f"Collection timeout for {server.name}")
|
||||
return None
|
||||
|
||||
async def _collect_server(self, server: Server) -> Optional[tuple]:
|
||||
"""Collect sensor data from a single server."""
|
||||
try:
|
||||
from backend.auth import decrypt_password
|
||||
from backend.main import sensor_cache
|
||||
|
||||
# Run blocking IPMI operations in thread pool
|
||||
loop = asyncio.get_event_loop()
|
||||
|
||||
client = IPMIClient(
|
||||
host=server.ipmi_host,
|
||||
username=server.ipmi_username,
|
||||
password=decrypt_password(server.ipmi_encrypted_password),
|
||||
port=server.ipmi_port,
|
||||
vendor=server.vendor
|
||||
)
|
||||
|
||||
# Test connection
|
||||
connected = await loop.run_in_executor(self._executor, client.test_connection)
|
||||
if not connected:
|
||||
return None
|
||||
|
||||
# Get sensor data in parallel using thread pool
|
||||
temps_future = loop.run_in_executor(self._executor, client.get_temperatures)
|
||||
fans_future = loop.run_in_executor(self._executor, client.get_fan_speeds)
|
||||
power_future = loop.run_in_executor(self._executor, client.get_power_consumption)
|
||||
|
||||
temps, fans, power = await asyncio.gather(
|
||||
temps_future, fans_future, power_future
|
||||
)
|
||||
|
||||
# Calculate summary metrics
|
||||
max_temp = max((t.value for t in temps if t.value is not None), default=0)
|
||||
avg_fan = sum(f.speed_percent for f in fans if f.speed_percent is not None) / len(fans) if fans else 0
|
||||
|
||||
# Extract current power consumption
|
||||
current_power = None
|
||||
if power and isinstance(power, dict):
|
||||
import re
|
||||
for key, value in power.items():
|
||||
if 'current' in key.lower() and 'power' in key.lower():
|
||||
match = re.search(r'(\d+(?:\.\d+)?)', str(value))
|
||||
if match:
|
||||
current_power = float(match.group(1))
|
||||
break
|
||||
|
||||
# Prepare cache data - format must match response schemas
|
||||
cache_data = {
|
||||
"max_temp": max_temp,
|
||||
"avg_fan_speed": round(avg_fan, 1),
|
||||
"power_consumption": current_power,
|
||||
"timestamp": datetime.utcnow().isoformat(),
|
||||
"temps": [{"name": t.name, "value": t.value, "location": t.location, "status": getattr(t, 'status', 'ok')} for t in temps],
|
||||
"fans": [{"fan_id": getattr(f, 'fan_id', f'0x0{f.fan_number-1}'), "fan_number": f.fan_number, "speed_percent": f.speed_percent, "speed_rpm": f.speed_rpm} for f in fans],
|
||||
"power_raw": power if isinstance(power, dict) else None
|
||||
}
|
||||
|
||||
# Store in cache
|
||||
await sensor_cache.set(server.id, cache_data)
|
||||
|
||||
logger.info(f"Collected and cached sensors for {server.name}: temp={max_temp:.1f}°C, fan={avg_fan:.1f}%")
|
||||
return temps, fans
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to collect sensors for {server.name}: {e}")
|
||||
return None
|
||||
|
||||
async def _cleanup_old_data(self):
|
||||
"""Clean up old sensor data to prevent database bloat."""
|
||||
try:
|
||||
db = SessionLocal()
|
||||
try:
|
||||
# Keep only last 24 hours of detailed sensor data
|
||||
cutoff = datetime.utcnow() - timedelta(hours=24)
|
||||
|
||||
# Delete old sensor data
|
||||
deleted_sensors = db.query(SensorData).filter(
|
||||
SensorData.timestamp < cutoff
|
||||
).delete(synchronize_session=False)
|
||||
|
||||
# Delete old fan data
|
||||
deleted_fans = db.query(FanData).filter(
|
||||
FanData.timestamp < cutoff
|
||||
).delete(synchronize_session=False)
|
||||
|
||||
db.commit()
|
||||
|
||||
if deleted_sensors > 0 or deleted_fans > 0:
|
||||
logger.info(f"Cleaned up {deleted_sensors} sensor records and {deleted_fans} fan records")
|
||||
|
||||
self._last_cleanup = datetime.utcnow()
|
||||
finally:
|
||||
db.close()
|
||||
except Exception as e:
|
||||
logger.error(f"Database cleanup failed: {e}")
|
||||
|
||||
|
||||
# Global controller instance
|
||||
fan_controller = FanController()
|
||||
sensor_collector = SensorCollector(max_workers=4)
|
||||
|
||||
|
||||
async def initialize_fan_controller():
|
||||
"""Initialize and start the fan controller."""
|
||||
"""Initialize and start the fan controller and sensor collector."""
|
||||
await sensor_collector.start()
|
||||
await fan_controller.start()
|
||||
|
||||
|
||||
async def shutdown_fan_controller():
|
||||
"""Shutdown the fan controller."""
|
||||
"""Shutdown the fan controller and sensor collector."""
|
||||
await fan_controller.stop()
|
||||
await sensor_collector.stop()
|
||||
|
|
|
|||
205
backend/main.py
205
backend/main.py
|
|
@ -1,8 +1,10 @@
|
|||
"""Main FastAPI application."""
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
from contextlib import asynccontextmanager
|
||||
from typing import List, Optional
|
||||
from typing import List, Optional, Dict, Any
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from fastapi import FastAPI, Depends, HTTPException, status, BackgroundTasks
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
|
|
@ -180,7 +182,6 @@ async def login(credentials: UserLogin, db: Session = Depends(get_db)):
|
|||
)
|
||||
|
||||
# Update last login
|
||||
from datetime import datetime
|
||||
user.last_login = datetime.utcnow()
|
||||
db.commit()
|
||||
|
||||
|
|
@ -353,11 +354,30 @@ async def get_server_sensors(
|
|||
current_user: User = Depends(get_current_user),
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""Get current sensor readings from server."""
|
||||
"""Get current sensor readings from server.
|
||||
|
||||
Uses cached data from the continuous sensor collector for fast response.
|
||||
Cache is updated every 10 seconds.
|
||||
"""
|
||||
server = db.query(Server).filter(Server.id == server_id).first()
|
||||
if not server:
|
||||
raise HTTPException(status_code=404, detail="Server not found")
|
||||
|
||||
# Try cache first
|
||||
cached = await sensor_cache.get(server_id)
|
||||
if cached:
|
||||
logger.info(f"Serving sensors for {server.name} from cache")
|
||||
# Data is already in correct format from collector
|
||||
return {
|
||||
"server_id": server_id,
|
||||
"temperatures": cached.get("temps", []),
|
||||
"fans": cached.get("fans", []),
|
||||
"all_sensors": [],
|
||||
"timestamp": cached.get("timestamp", datetime.utcnow().isoformat())
|
||||
}
|
||||
|
||||
# Cache miss - fetch live data
|
||||
logger.warning(f"Cache miss for sensors {server.name}, fetching live")
|
||||
try:
|
||||
client = IPMIClient(
|
||||
host=server.ipmi_host,
|
||||
|
|
@ -371,7 +391,6 @@ async def get_server_sensors(
|
|||
fans = client.get_fan_speeds()
|
||||
all_sensors = client.get_all_sensors()
|
||||
|
||||
from datetime import datetime
|
||||
return {
|
||||
"server_id": server_id,
|
||||
"temperatures": [t.__dict__ for t in temps],
|
||||
|
|
@ -390,11 +409,21 @@ async def get_server_power(
|
|||
current_user: User = Depends(get_current_user),
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""Get power consumption data."""
|
||||
"""Get power consumption data from cache.
|
||||
|
||||
Data is updated every 10 seconds by the sensor collector.
|
||||
"""
|
||||
server = db.query(Server).filter(Server.id == server_id).first()
|
||||
if not server:
|
||||
raise HTTPException(status_code=404, detail="Server not found")
|
||||
|
||||
# Try cache first
|
||||
cached = await sensor_cache.get(server_id)
|
||||
if cached and cached.get("power_raw"):
|
||||
return cached["power_raw"]
|
||||
|
||||
# Cache miss - fetch live
|
||||
logger.warning(f"Cache miss for power {server.name}, fetching live")
|
||||
try:
|
||||
client = IPMIClient(
|
||||
host=server.ipmi_host,
|
||||
|
|
@ -710,6 +739,41 @@ async def disable_auto_control(
|
|||
return {"success": True, "message": "Automatic fan control disabled"}
|
||||
|
||||
|
||||
# Sensor data cache with TTL
|
||||
|
||||
class SensorCache:
|
||||
"""Simple TTL cache for sensor data to reduce IPMI/SSH overhead."""
|
||||
|
||||
def __init__(self, ttl_seconds: int = 45):
|
||||
self._cache: Dict[int, Dict[str, Any]] = {}
|
||||
self._ttl = ttl_seconds
|
||||
self._lock = asyncio.Lock()
|
||||
|
||||
async def get(self, server_id: int) -> Optional[Dict[str, Any]]:
|
||||
async with self._lock:
|
||||
entry = self._cache.get(server_id)
|
||||
if entry:
|
||||
if datetime.utcnow() < entry['expires_at']:
|
||||
return entry['data']
|
||||
else:
|
||||
del self._cache[server_id]
|
||||
return None
|
||||
|
||||
async def set(self, server_id: int, data: Dict[str, Any]):
|
||||
async with self._lock:
|
||||
self._cache[server_id] = {
|
||||
'data': data,
|
||||
'expires_at': datetime.utcnow() + timedelta(seconds=self._ttl)
|
||||
}
|
||||
|
||||
async def invalidate(self, server_id: int):
|
||||
async with self._lock:
|
||||
self._cache.pop(server_id, None)
|
||||
|
||||
# Global sensor cache
|
||||
sensor_cache = SensorCache(ttl_seconds=10)
|
||||
|
||||
|
||||
# Dashboard endpoints
|
||||
@app.get("/api/dashboard/stats", response_model=DashboardStats)
|
||||
async def get_dashboard_stats(
|
||||
|
|
@ -731,7 +795,7 @@ async def get_dashboard_stats(
|
|||
if status.get("state") == "panic":
|
||||
panic_servers += 1
|
||||
|
||||
# Get recent logs
|
||||
# Get recent logs (use index on timestamp)
|
||||
recent_logs = db.query(SystemLog).order_by(SystemLog.timestamp.desc()).limit(10).all()
|
||||
|
||||
return {
|
||||
|
|
@ -744,39 +808,130 @@ async def get_dashboard_stats(
|
|||
}
|
||||
|
||||
|
||||
@app.get("/api/dashboard/servers-overview")
|
||||
async def get_servers_overview(
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""Get a lightweight overview of all servers for the dashboard grid.
|
||||
|
||||
Returns cached data from the continuous sensor collector.
|
||||
Data is updated every 10 seconds automatically.
|
||||
"""
|
||||
servers = db.query(Server).all()
|
||||
|
||||
async def get_server_status(server: Server) -> Dict[str, Any]:
|
||||
# Try cache first - sensor collector updates this every 30 seconds
|
||||
cached = await sensor_cache.get(server.id)
|
||||
if cached:
|
||||
logger.debug(f"Serving overview for {server.name} from cache")
|
||||
return {
|
||||
"id": server.id,
|
||||
"name": server.name,
|
||||
"vendor": server.vendor,
|
||||
"is_active": server.is_active,
|
||||
"manual_control_enabled": server.manual_control_enabled,
|
||||
"auto_control_enabled": server.auto_control_enabled,
|
||||
"max_temp": cached.get("max_temp"),
|
||||
"avg_fan_speed": cached.get("avg_fan_speed"),
|
||||
"power_consumption": cached.get("power_consumption"),
|
||||
"last_updated": cached.get("timestamp"),
|
||||
"cached": True
|
||||
}
|
||||
|
||||
# No cache yet (sensor collector may not have run yet)
|
||||
return {
|
||||
"id": server.id,
|
||||
"name": server.name,
|
||||
"vendor": server.vendor,
|
||||
"is_active": server.is_active,
|
||||
"manual_control_enabled": server.manual_control_enabled,
|
||||
"auto_control_enabled": server.auto_control_enabled,
|
||||
"max_temp": None,
|
||||
"avg_fan_speed": None,
|
||||
"power_consumption": None,
|
||||
"last_updated": None,
|
||||
"cached": False
|
||||
}
|
||||
|
||||
# Gather all server statuses concurrently
|
||||
server_statuses = await asyncio.gather(*[
|
||||
get_server_status(server) for server in servers
|
||||
])
|
||||
|
||||
return {"servers": server_statuses}
|
||||
|
||||
|
||||
@app.post("/api/dashboard/refresh-server/{server_id}")
|
||||
async def refresh_server_data(
|
||||
server_id: int,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""Manually trigger a sensor data refresh for a server.
|
||||
|
||||
The sensor collector updates data every 10 seconds automatically.
|
||||
This endpoint allows forcing an immediate refresh.
|
||||
"""
|
||||
server = db.query(Server).filter(Server.id == server_id).first()
|
||||
if not server:
|
||||
raise HTTPException(status_code=404, detail="Server not found")
|
||||
|
||||
# Trigger immediate collection via sensor_collector
|
||||
from backend.fan_control import sensor_collector
|
||||
await sensor_collector._collect_server_with_timeout(server)
|
||||
|
||||
return {"success": True, "message": "Data refreshed"}
|
||||
|
||||
|
||||
@app.get("/api/dashboard/servers/{server_id}", response_model=ServerDashboardData)
|
||||
async def get_server_dashboard(
|
||||
server_id: int,
|
||||
current_user: User = Depends(get_current_user),
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""Get detailed dashboard data for a specific server."""
|
||||
"""Get detailed dashboard data for a specific server.
|
||||
|
||||
Uses cached sensor data from the continuous collector.
|
||||
Falls back to direct IPMI query only if cache is empty.
|
||||
"""
|
||||
server = db.query(Server).filter(Server.id == server_id).first()
|
||||
if not server:
|
||||
raise HTTPException(status_code=404, detail="Server not found")
|
||||
|
||||
# Get current sensor data
|
||||
# Try to get sensor data from cache first
|
||||
cached = await sensor_cache.get(server_id)
|
||||
|
||||
temps = []
|
||||
fans = []
|
||||
power_data = None
|
||||
|
||||
try:
|
||||
client = IPMIClient(
|
||||
host=server.ipmi_host,
|
||||
username=server.ipmi_username,
|
||||
password=decrypt_password(server.ipmi_encrypted_password),
|
||||
port=server.ipmi_port,
|
||||
vendor=server.vendor
|
||||
)
|
||||
|
||||
if client.test_connection():
|
||||
temps_readings = client.get_temperatures()
|
||||
temps = [t.__dict__ for t in temps_readings]
|
||||
fans_readings = client.get_fan_speeds()
|
||||
fans = [f.__dict__ for f in fans_readings]
|
||||
power_data = client.get_power_consumption()
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not fetch live data for {server.name}: {e}")
|
||||
if cached:
|
||||
# Use cached data - already in correct format
|
||||
temps = cached.get("temps", [])
|
||||
fans = cached.get("fans", [])
|
||||
power_data = cached.get("power_raw")
|
||||
logger.info(f"Serving dashboard data for {server.name} from cache")
|
||||
else:
|
||||
# Cache miss - fetch live data as fallback
|
||||
logger.warning(f"Cache miss for server {server.name}, fetching live data")
|
||||
try:
|
||||
client = IPMIClient(
|
||||
host=server.ipmi_host,
|
||||
username=server.ipmi_username,
|
||||
password=decrypt_password(server.ipmi_encrypted_password),
|
||||
port=server.ipmi_port,
|
||||
vendor=server.vendor
|
||||
)
|
||||
|
||||
if client.test_connection():
|
||||
temps_readings = client.get_temperatures()
|
||||
temps = [{"name": t.name, "reading": t.value, "location": t.location} for t in temps_readings]
|
||||
fans_readings = client.get_fan_speeds()
|
||||
fans = [{"fan_number": f.fan_number, "reading": f.speed_percent, "speed_rpm": f.speed_rpm} for f in fans_readings]
|
||||
power_data = client.get_power_consumption()
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not fetch live data for {server.name}: {e}")
|
||||
|
||||
# Get recent historical data
|
||||
recent_sensor_data = db.query(SensorData).filter(
|
||||
|
|
|
|||
|
|
@ -164,7 +164,25 @@ class SSHClient:
|
|||
package_temp = None
|
||||
|
||||
for key, value in chip_data.items():
|
||||
if isinstance(value, (int, float)):
|
||||
# Skip metadata fields
|
||||
if key in ['Adapter']:
|
||||
continue
|
||||
|
||||
# Handle nested JSON structure from sensors -j
|
||||
# e.g., "Core 0": {"temp2_input": 31, "temp2_max": 79, ...}
|
||||
if isinstance(value, dict):
|
||||
# Look for temp*_input field which contains the actual temperature
|
||||
for sub_key, sub_value in value.items():
|
||||
if 'input' in sub_key.lower() and isinstance(sub_value, (int, float)):
|
||||
temp_value = float(sub_value)
|
||||
if 'core' in key.lower():
|
||||
core_temps[key] = temp_value
|
||||
elif 'tdie' in key.lower() or 'tctl' in key.lower() or 'package' in key.lower():
|
||||
package_temp = temp_value
|
||||
break # Only take the first _input value
|
||||
|
||||
# Handle flat structure (fallback for text parsing)
|
||||
elif isinstance(value, (int, float)):
|
||||
if 'core' in key.lower():
|
||||
core_temps[key] = float(value)
|
||||
elif 'tdie' in key.lower() or 'tctl' in key.lower() or 'package' in key.lower():
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
|
|
@ -1,3 +1,4 @@
|
|||
import React from 'react';
|
||||
import { useNavigate } from 'react-router-dom';
|
||||
import { useQuery } from '@tanstack/react-query';
|
||||
import {
|
||||
|
|
@ -14,7 +15,7 @@ import {
|
|||
Chip,
|
||||
IconButton,
|
||||
Tooltip,
|
||||
CircularProgress,
|
||||
Skeleton,
|
||||
} from '@mui/material';
|
||||
import {
|
||||
Dns as ServerIcon,
|
||||
|
|
@ -23,21 +24,67 @@ import {
|
|||
Error as ErrorIcon,
|
||||
CheckCircle as CheckIcon,
|
||||
Thermostat as TempIcon,
|
||||
NavigateNext as NextIcon,
|
||||
Refresh as RefreshIcon,
|
||||
PowerSettingsNew as PowerIcon,
|
||||
Memory as MemoryIcon,
|
||||
} from '@mui/icons-material';
|
||||
import { dashboardApi } from '../utils/api';
|
||||
import { useMutation, useQueryClient } from '@tanstack/react-query';
|
||||
|
||||
interface ServerOverview {
|
||||
id: number;
|
||||
name: string;
|
||||
vendor: string;
|
||||
is_active: boolean;
|
||||
manual_control_enabled: boolean;
|
||||
auto_control_enabled: boolean;
|
||||
max_temp: number | null;
|
||||
avg_fan_speed: number | null;
|
||||
power_consumption: number | null;
|
||||
last_updated: string | null;
|
||||
cached: boolean;
|
||||
}
|
||||
|
||||
export default function Dashboard() {
|
||||
const navigate = useNavigate();
|
||||
const queryClient = useQueryClient();
|
||||
|
||||
const { data: stats, isLoading } = useQuery({
|
||||
// Stats query - poll every 60 seconds (stats don't change often)
|
||||
const { data: stats } = useQuery({
|
||||
queryKey: ['dashboard-stats'],
|
||||
queryFn: async () => {
|
||||
const response = await dashboardApi.getStats();
|
||||
return response.data;
|
||||
},
|
||||
refetchInterval: 5000, // Refresh every 5 seconds
|
||||
refetchInterval: 60000, // 60 seconds
|
||||
staleTime: 55000,
|
||||
});
|
||||
|
||||
// Server overview query - poll every 30 seconds (matches sensor collector)
|
||||
const { data: overviewData, isLoading: overviewLoading } = useQuery({
|
||||
queryKey: ['servers-overview'],
|
||||
queryFn: async () => {
|
||||
const response = await dashboardApi.getServersOverview();
|
||||
return response.data.servers as ServerOverview[];
|
||||
},
|
||||
refetchInterval: 30000, // 30 seconds - matches sensor collector
|
||||
staleTime: 25000,
|
||||
// Don't refetch on window focus to reduce load
|
||||
refetchOnWindowFocus: false,
|
||||
});
|
||||
|
||||
// Background refresh mutation
|
||||
const refreshMutation = useMutation({
|
||||
mutationFn: async (serverId: number) => {
|
||||
const response = await dashboardApi.refreshServer(serverId);
|
||||
return response.data;
|
||||
},
|
||||
onSuccess: () => {
|
||||
// Invalidate overview after a short delay to allow background fetch
|
||||
setTimeout(() => {
|
||||
queryClient.invalidateQueries({ queryKey: ['servers-overview'] });
|
||||
}, 2000);
|
||||
},
|
||||
});
|
||||
|
||||
const getEventIcon = (eventType: string) => {
|
||||
|
|
@ -79,13 +126,157 @@ export default function Dashboard() {
|
|||
</Card>
|
||||
);
|
||||
|
||||
if (isLoading) {
|
||||
const ServerCard = ({ server }: { server: ServerOverview }) => {
|
||||
const hasData = server.max_temp !== null || server.avg_fan_speed !== null;
|
||||
const isLoading = !hasData && server.is_active;
|
||||
|
||||
const getTempColor = (temp: number | null) => {
|
||||
if (temp === null) return 'text.secondary';
|
||||
if (temp > 80) return 'error.main';
|
||||
if (temp > 70) return 'warning.main';
|
||||
return 'success.main';
|
||||
};
|
||||
|
||||
const getStatusChip = () => {
|
||||
if (!server.is_active) {
|
||||
return <Chip size="small" label="Offline" color="default" icon={<PowerIcon />} />;
|
||||
}
|
||||
if (server.manual_control_enabled) {
|
||||
return <Chip size="small" label="Manual" color="info" icon={<SpeedIcon />} />;
|
||||
}
|
||||
if (server.auto_control_enabled) {
|
||||
return <Chip size="small" label="Auto" color="success" icon={<CheckIcon />} />;
|
||||
}
|
||||
return <Chip size="small" label="Active" color="success" />;
|
||||
};
|
||||
|
||||
return (
|
||||
<Box sx={{ display: 'flex', justifyContent: 'center', p: 4 }}>
|
||||
<CircularProgress />
|
||||
</Box>
|
||||
<Card
|
||||
variant="outlined"
|
||||
sx={{
|
||||
cursor: 'pointer',
|
||||
transition: 'all 0.2s',
|
||||
opacity: isLoading ? 0.7 : 1,
|
||||
'&:hover': {
|
||||
boxShadow: 2,
|
||||
borderColor: 'primary.main',
|
||||
},
|
||||
}}
|
||||
onClick={() => navigate(`/servers/${server.id}`)}
|
||||
>
|
||||
<CardContent sx={{ p: 2, '&:last-child': { pb: 2 } }}>
|
||||
{/* Header */}
|
||||
<Box sx={{ display: 'flex', justifyContent: 'space-between', alignItems: 'flex-start', mb: 2 }}>
|
||||
<Box sx={{ display: 'flex', alignItems: 'center', gap: 1 }}>
|
||||
<ServerIcon color={server.is_active ? 'primary' : 'disabled'} />
|
||||
<Typography variant="subtitle1" fontWeight="medium" noWrap sx={{ maxWidth: 150 }}>
|
||||
{server.name}
|
||||
</Typography>
|
||||
</Box>
|
||||
{getStatusChip()}
|
||||
</Box>
|
||||
|
||||
{/* Metrics Grid - Always show values or -- placeholder */}
|
||||
<Grid container spacing={1} sx={{ mb: 1 }}>
|
||||
<Grid item xs={4}>
|
||||
<Box sx={{ textAlign: 'center' }}>
|
||||
<Typography variant="h6" color={getTempColor(server.max_temp)}>
|
||||
{server.max_temp !== null ? `${Math.round(server.max_temp)}°C` : '--'}
|
||||
</Typography>
|
||||
<Typography variant="caption" color="text.secondary">
|
||||
Max Temp
|
||||
</Typography>
|
||||
</Box>
|
||||
</Grid>
|
||||
<Grid item xs={4}>
|
||||
<Box sx={{ textAlign: 'center' }}>
|
||||
<Typography variant="h6" color="primary.main">
|
||||
{server.avg_fan_speed !== null ? `${Math.round(server.avg_fan_speed)}%` : '--'}
|
||||
</Typography>
|
||||
<Typography variant="caption" color="text.secondary">
|
||||
Avg Fan
|
||||
</Typography>
|
||||
</Box>
|
||||
</Grid>
|
||||
<Grid item xs={4}>
|
||||
<Box sx={{ textAlign: 'center' }}>
|
||||
<Typography variant="h6" color="text.primary">
|
||||
{server.power_consumption !== null ? `${Math.round(server.power_consumption)}W` : '--'}
|
||||
</Typography>
|
||||
<Typography variant="caption" color="text.secondary">
|
||||
Power
|
||||
</Typography>
|
||||
</Box>
|
||||
</Grid>
|
||||
</Grid>
|
||||
|
||||
{/* Footer */}
|
||||
<Box sx={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center', mt: 1 }}>
|
||||
<Typography variant="caption" color="text.secondary">
|
||||
{server.vendor || 'Unknown Vendor'}
|
||||
</Typography>
|
||||
<Box sx={{ display: 'flex', alignItems: 'center', gap: 0.5 }}>
|
||||
{isLoading ? (
|
||||
<Chip size="small" label="Loading..." color="warning" variant="outlined" sx={{ height: 20, fontSize: '0.6rem' }} />
|
||||
) : server.cached ? (
|
||||
<Chip size="small" label="Cached" variant="outlined" sx={{ height: 20, fontSize: '0.6rem' }} />
|
||||
) : null}
|
||||
<Tooltip title="Refresh data">
|
||||
<IconButton
|
||||
size="small"
|
||||
onClick={(e: React.MouseEvent) => {
|
||||
e.stopPropagation();
|
||||
refreshMutation.mutate(server.id);
|
||||
}}
|
||||
disabled={refreshMutation.isPending}
|
||||
>
|
||||
<RefreshIcon fontSize="small" />
|
||||
</IconButton>
|
||||
</Tooltip>
|
||||
</Box>
|
||||
</Box>
|
||||
</CardContent>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
// Show placeholder cards while loading initial data
|
||||
const ServersPlaceholderGrid = () => (
|
||||
<Grid container spacing={2}>
|
||||
{[1, 2, 3, 4].map((i) => (
|
||||
<Grid item xs={12} sm={6} md={4} lg={3} key={i}>
|
||||
<Card variant="outlined" sx={{ opacity: 0.5 }}>
|
||||
<CardContent sx={{ p: 2 }}>
|
||||
<Box sx={{ display: 'flex', alignItems: 'center', gap: 1, mb: 2 }}>
|
||||
<Skeleton variant="circular" width={24} height={24} />
|
||||
<Skeleton variant="text" width="60%" />
|
||||
</Box>
|
||||
<Grid container spacing={1}>
|
||||
<Grid item xs={4}>
|
||||
<Box sx={{ textAlign: 'center' }}>
|
||||
<Skeleton variant="text" width={30} sx={{ mx: 'auto' }} />
|
||||
<Skeleton variant="text" width={40} sx={{ mx: 'auto' }} />
|
||||
</Box>
|
||||
</Grid>
|
||||
<Grid item xs={4}>
|
||||
<Box sx={{ textAlign: 'center' }}>
|
||||
<Skeleton variant="text" width={30} sx={{ mx: 'auto' }} />
|
||||
<Skeleton variant="text" width={40} sx={{ mx: 'auto' }} />
|
||||
</Box>
|
||||
</Grid>
|
||||
<Grid item xs={4}>
|
||||
<Box sx={{ textAlign: 'center' }}>
|
||||
<Skeleton variant="text" width={30} sx={{ mx: 'auto' }} />
|
||||
<Skeleton variant="text" width={40} sx={{ mx: 'auto' }} />
|
||||
</Box>
|
||||
</Grid>
|
||||
</Grid>
|
||||
</CardContent>
|
||||
</Card>
|
||||
</Grid>
|
||||
))}
|
||||
</Grid>
|
||||
);
|
||||
|
||||
return (
|
||||
<Box>
|
||||
|
|
@ -137,6 +328,49 @@ export default function Dashboard() {
|
|||
</Grid>
|
||||
</Grid>
|
||||
|
||||
{/* Servers Grid */}
|
||||
<Paper sx={{ p: 3, mb: 3 }}>
|
||||
<Box sx={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center', mb: 3 }}>
|
||||
<Typography variant="h6">
|
||||
Server Overview
|
||||
</Typography>
|
||||
<Chip
|
||||
label={`${overviewData?.length || 0} servers`}
|
||||
size="small"
|
||||
color="primary"
|
||||
variant="outlined"
|
||||
/>
|
||||
</Box>
|
||||
|
||||
{overviewLoading ? (
|
||||
<ServersPlaceholderGrid />
|
||||
) : overviewData && overviewData.length > 0 ? (
|
||||
<Grid container spacing={2}>
|
||||
{overviewData.map((server) => (
|
||||
<Grid item xs={12} sm={6} md={4} lg={3} key={server.id}>
|
||||
<ServerCard server={server} />
|
||||
</Grid>
|
||||
))}
|
||||
</Grid>
|
||||
) : (
|
||||
<Box sx={{ textAlign: 'center', py: 4 }}>
|
||||
<ServerIcon sx={{ fontSize: 48, color: 'text.secondary', mb: 2 }} />
|
||||
<Typography variant="h6" color="text.secondary">
|
||||
No servers configured
|
||||
</Typography>
|
||||
<Typography variant="body2" color="text.secondary" sx={{ mb: 2 }}>
|
||||
Add your first server to start monitoring
|
||||
</Typography>
|
||||
<Chip
|
||||
label="Add Server"
|
||||
color="primary"
|
||||
onClick={() => navigate('/servers')}
|
||||
clickable
|
||||
/>
|
||||
</Box>
|
||||
)}
|
||||
</Paper>
|
||||
|
||||
{/* Recent Logs */}
|
||||
<Grid container spacing={3}>
|
||||
<Grid item xs={12} md={6}>
|
||||
|
|
@ -151,7 +385,7 @@ export default function Dashboard() {
|
|||
/>
|
||||
</Box>
|
||||
<List dense>
|
||||
{stats?.recent_logs?.slice(0, 10).map((log) => (
|
||||
{stats?.recent_logs?.slice(0, 10).map((log: any) => (
|
||||
<ListItem key={log.id}>
|
||||
<ListItemIcon>
|
||||
{getEventIcon(log.event_type)}
|
||||
|
|
@ -177,54 +411,30 @@ export default function Dashboard() {
|
|||
<Grid item xs={12} md={6}>
|
||||
<Paper sx={{ p: 2 }}>
|
||||
<Typography variant="h6" gutterBottom>
|
||||
Quick Actions
|
||||
About IPMI Fan Control
|
||||
</Typography>
|
||||
<Box sx={{ display: 'flex', flexDirection: 'column', gap: 1 }}>
|
||||
<Card variant="outlined">
|
||||
<CardContent sx={{ display: 'flex', alignItems: 'center', justifyContent: 'space-between', py: 1, '&:last-child': { pb: 1 } }}>
|
||||
<Box>
|
||||
<Typography variant="subtitle1">Manage Servers</Typography>
|
||||
<Typography variant="body2" color="text.secondary">
|
||||
Add, edit, or remove servers
|
||||
</Typography>
|
||||
</Box>
|
||||
<Tooltip title="Go to Servers">
|
||||
<IconButton onClick={() => navigate('/servers')}>
|
||||
<NextIcon />
|
||||
</IconButton>
|
||||
</Tooltip>
|
||||
</CardContent>
|
||||
</Card>
|
||||
|
||||
<Card variant="outlined">
|
||||
<CardContent sx={{ display: 'flex', alignItems: 'center', justifyContent: 'space-between', py: 1, '&:last-child': { pb: 1 } }}>
|
||||
<Box>
|
||||
<Typography variant="subtitle1">View Logs</Typography>
|
||||
<Typography variant="body2" color="text.secondary">
|
||||
Check system events and history
|
||||
</Typography>
|
||||
</Box>
|
||||
<Tooltip title="Go to Logs">
|
||||
<IconButton onClick={() => navigate('/logs')}>
|
||||
<NextIcon />
|
||||
</IconButton>
|
||||
</Tooltip>
|
||||
</CardContent>
|
||||
</Card>
|
||||
|
||||
<Card variant="outlined">
|
||||
<CardContent sx={{ py: 1, '&:last-child': { pb: 1 } }}>
|
||||
<Typography variant="subtitle1" gutterBottom>
|
||||
About IPMI Fan Control
|
||||
</Typography>
|
||||
<Typography variant="body2" color="text.secondary">
|
||||
This application allows you to control fan speeds on Dell T710 and compatible servers
|
||||
using IPMI commands. Features include manual fan control, automatic fan curves based
|
||||
on temperature, and safety panic mode.
|
||||
</Typography>
|
||||
</CardContent>
|
||||
</Card>
|
||||
</Box>
|
||||
<Typography variant="body2" color="text.secondary" paragraph>
|
||||
This application allows you to control fan speeds on Dell T710 and compatible servers
|
||||
using IPMI commands. Features include:
|
||||
</Typography>
|
||||
<List dense>
|
||||
<ListItem>
|
||||
<ListItemIcon><SpeedIcon color="primary" fontSize="small" /></ListItemIcon>
|
||||
<ListItemText primary="Manual fan control with per-fan adjustment" />
|
||||
</ListItem>
|
||||
<ListItem>
|
||||
<ListItemIcon><TempIcon color="primary" fontSize="small" /></ListItemIcon>
|
||||
<ListItemText primary="Automatic fan curves based on temperature sensors" />
|
||||
</ListItem>
|
||||
<ListItem>
|
||||
<ListItemIcon><MemoryIcon color="primary" fontSize="small" /></ListItemIcon>
|
||||
<ListItemText primary="SSH-based CPU temperature monitoring" />
|
||||
</ListItem>
|
||||
<ListItem>
|
||||
<ListItemIcon><ErrorIcon color="primary" fontSize="small" /></ListItemIcon>
|
||||
<ListItemText primary="Safety panic mode for overheating protection" />
|
||||
</ListItem>
|
||||
</List>
|
||||
</Paper>
|
||||
</Grid>
|
||||
</Grid>
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ import {
|
|||
Refresh as RefreshIcon,
|
||||
} from '@mui/icons-material';
|
||||
import { serversApi, fanControlApi, dashboardApi } from '../utils/api';
|
||||
import FanCurveManager from '../components/FanCurveManager';
|
||||
|
||||
interface TabPanelProps {
|
||||
children?: React.ReactNode;
|
||||
|
|
@ -64,6 +65,10 @@ export default function ServerDetail() {
|
|||
const response = await serversApi.getById(serverId);
|
||||
return response.data;
|
||||
},
|
||||
// Server config rarely changes
|
||||
refetchInterval: 60000,
|
||||
staleTime: 55000,
|
||||
refetchOnWindowFocus: false,
|
||||
});
|
||||
|
||||
const { data: sensors, refetch: refetchSensors } = useQuery({
|
||||
|
|
@ -72,7 +77,9 @@ export default function ServerDetail() {
|
|||
const response = await serversApi.getSensors(serverId);
|
||||
return response.data;
|
||||
},
|
||||
refetchInterval: 5000,
|
||||
refetchInterval: 30000, // 30 seconds - matches sensor collector
|
||||
staleTime: 25000,
|
||||
refetchOnWindowFocus: false,
|
||||
});
|
||||
|
||||
// Get SSH sensors for core temps - use dedicated endpoint
|
||||
|
|
@ -88,7 +95,9 @@ export default function ServerDetail() {
|
|||
}
|
||||
},
|
||||
enabled: !!server?.use_ssh,
|
||||
refetchInterval: 10000, // Slower refresh for SSH
|
||||
refetchInterval: 30000, // SSH is slow - refresh less frequently
|
||||
staleTime: 25000,
|
||||
refetchOnWindowFocus: false,
|
||||
});
|
||||
|
||||
const { data: dashboardData } = useQuery({
|
||||
|
|
@ -97,7 +106,9 @@ export default function ServerDetail() {
|
|||
const response = await dashboardApi.getServerData(serverId);
|
||||
return response.data;
|
||||
},
|
||||
refetchInterval: 10000,
|
||||
refetchInterval: 60000, // Historical data doesn't change often
|
||||
staleTime: 55000,
|
||||
refetchOnWindowFocus: false,
|
||||
});
|
||||
|
||||
const enableManualMutation = useMutation({
|
||||
|
|
@ -305,37 +316,48 @@ export default function ServerDetail() {
|
|||
<PowerIcon sx={{ mr: 1, verticalAlign: 'middle' }} />
|
||||
Power Consumption
|
||||
</Typography>
|
||||
<Grid container spacing={2}>
|
||||
{Object.entries(dashboardData.power_consumption)
|
||||
.filter(([_, value]) => !value.includes('UTC')) // Filter out weird timestamp entries
|
||||
.slice(0, 4)
|
||||
.map(([key, value]) => {
|
||||
// Clean up the display
|
||||
let displayValue = value as string;
|
||||
let displayKey = key;
|
||||
|
||||
// Handle Dell power monitor output
|
||||
if (key.includes('System') && value.includes('Reading')) {
|
||||
const match = value.match(/Reading\s*:\s*([\d.]+)\s*(\w+)/);
|
||||
if (match) {
|
||||
displayValue = `${match[1]} ${match[2]}`;
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<Grid item xs={6} md={3} key={key}>
|
||||
<Paper variant="outlined" sx={{ p: 2, textAlign: 'center' }}>
|
||||
<Typography variant="body2" color="text.secondary" sx={{ textTransform: 'capitalize' }}>
|
||||
{displayKey.replace(/_/g, ' ')}
|
||||
</Typography>
|
||||
<Typography variant="h6" sx={{ mt: 0.5 }}>
|
||||
{displayValue}
|
||||
</Typography>
|
||||
</Paper>
|
||||
</Grid>
|
||||
);
|
||||
})}
|
||||
</Grid>
|
||||
|
||||
{/* Handle numeric power value (from cache) */}
|
||||
{typeof dashboardData.power_consumption === 'number' && (
|
||||
<Paper variant="outlined" sx={{ p: 3, textAlign: 'center', maxWidth: 300 }}>
|
||||
<Typography variant="body2" color="text.secondary">
|
||||
Current Power Consumption
|
||||
</Typography>
|
||||
<Typography variant="h3" color="primary.main" sx={{ mt: 1 }}>
|
||||
{Math.round(dashboardData.power_consumption)}W
|
||||
</Typography>
|
||||
</Paper>
|
||||
)}
|
||||
|
||||
{/* Handle dictionary power data (from live IPMI) */}
|
||||
{typeof dashboardData.power_consumption === 'object' && (
|
||||
<Grid container spacing={2}>
|
||||
{Object.entries(dashboardData.power_consumption)
|
||||
.filter(([_, value]) => {
|
||||
// Filter out empty values, timestamps, and metadata
|
||||
if (!value || value === '') return false;
|
||||
if (typeof value === 'string' && value.includes('UTC')) return false;
|
||||
return true;
|
||||
})
|
||||
.map(([key, value]) => {
|
||||
// Show the raw value as-is from IPMI
|
||||
const displayValue = typeof value === 'string' ? value : String(value);
|
||||
|
||||
return (
|
||||
<Grid item xs={6} md={3} key={key}>
|
||||
<Paper variant="outlined" sx={{ p: 2, textAlign: 'center' }}>
|
||||
<Typography variant="body2" color="text.secondary">
|
||||
{key}
|
||||
</Typography>
|
||||
<Typography variant="h6" sx={{ mt: 0.5 }}>
|
||||
{displayValue}
|
||||
</Typography>
|
||||
</Paper>
|
||||
</Grid>
|
||||
);
|
||||
})}
|
||||
</Grid>
|
||||
)}
|
||||
</CardContent>
|
||||
</Card>
|
||||
</Grid>
|
||||
|
|
@ -484,6 +506,11 @@ export default function ServerDetail() {
|
|||
</CardContent>
|
||||
</Card>
|
||||
</Grid>
|
||||
|
||||
{/* Fan Curves Section */}
|
||||
<Grid item xs={12}>
|
||||
<FanCurveManager serverId={serverId} server={server} />
|
||||
</Grid>
|
||||
</Grid>
|
||||
</TabPanel>
|
||||
|
||||
|
|
|
|||
|
|
@ -147,6 +147,22 @@ export const fanCurvesApi = {
|
|||
// Dashboard API
|
||||
export const dashboardApi = {
|
||||
getStats: () => api.get<DashboardStats>('/dashboard/stats'),
|
||||
getServersOverview: () =>
|
||||
api.get<{ servers: Array<{
|
||||
id: number;
|
||||
name: string;
|
||||
vendor: string;
|
||||
is_active: boolean;
|
||||
manual_control_enabled: boolean;
|
||||
auto_control_enabled: boolean;
|
||||
max_temp: number | null;
|
||||
avg_fan_speed: number | null;
|
||||
power_consumption: number | null;
|
||||
last_updated: string | null;
|
||||
cached: boolean;
|
||||
}> }>('/dashboard/servers-overview'),
|
||||
refreshServer: (serverId: number) =>
|
||||
api.post<{ success: boolean; message: string }>(`/dashboard/refresh-server/${serverId}`),
|
||||
getServerData: (serverId: number) =>
|
||||
api.get<{
|
||||
server: Server;
|
||||
|
|
|
|||
Loading…
Reference in New Issue