fix: spurious health alerts, show rotated logs, clear history button
app.py: - Alert logic now checks status.running (container up/down) instead of healthy (which requires connectivity tests) — services are only alerted when actually down - Add POST /api/health/history/clear endpoint to reset history + alert counters log_manager.py: - get_all_log_file_infos: include rotated backup files (*.log.1, *.log.2 ...) in listing, marked with backup=true so UI can dim them and hide rotate button api.js: add monitoringAPI.clearHealthHistory Logs page: - Health History: add Clear button with confirmation - File panel: show full filename (including .log.1 backups), explain host path and naming, hide rotate button for backup files Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+16
-18
@@ -234,36 +234,26 @@ def perform_health_check():
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
result[service_name] = {'error': str(e), 'status': 'offline'}
|
result[service_name] = {'error': str(e), 'status': 'offline'}
|
||||||
|
|
||||||
# Health alerting logic - improved to be more robust
|
# Health alerting logic — alert only when a service container is not running
|
||||||
global service_alert_counters
|
global service_alert_counters
|
||||||
for service_name in service_bus.list_services():
|
for service_name in service_bus.list_services():
|
||||||
if service_name in result:
|
if service_name in result:
|
||||||
status = result[service_name]
|
status = result[service_name]
|
||||||
healthy = True
|
healthy = True
|
||||||
|
|
||||||
# Improved health determination logic
|
|
||||||
if isinstance(status, dict):
|
if isinstance(status, dict):
|
||||||
# Check for explicit healthy field first
|
# Prefer status.running (container actually up) over healthy (connectivity tests)
|
||||||
if 'healthy' in status:
|
inner = status.get('status', {})
|
||||||
healthy = status['healthy']
|
if isinstance(inner, dict):
|
||||||
# Check for running status
|
if 'running' in inner:
|
||||||
|
healthy = inner['running']
|
||||||
|
elif 'status' in inner:
|
||||||
|
healthy = str(inner['status']).lower() in ('ok', 'healthy', 'online', 'active')
|
||||||
elif 'running' in status:
|
elif 'running' in status:
|
||||||
healthy = status['running']
|
healthy = status['running']
|
||||||
# Check for status field with various healthy values
|
|
||||||
elif 'status' in status:
|
|
||||||
status_value = status['status']
|
|
||||||
if isinstance(status_value, str):
|
|
||||||
healthy = status_value.lower() in ('ok', 'healthy', 'online', 'active')
|
|
||||||
else:
|
|
||||||
healthy = bool(status_value)
|
|
||||||
# Check for error field
|
|
||||||
elif 'error' in status:
|
elif 'error' in status:
|
||||||
healthy = False
|
healthy = False
|
||||||
# If no health indicators, assume healthy if service exists
|
|
||||||
else:
|
|
||||||
healthy = True
|
|
||||||
else:
|
else:
|
||||||
# If status is not a dict, assume it's a boolean
|
|
||||||
healthy = bool(status)
|
healthy = bool(status)
|
||||||
|
|
||||||
# Only count as unhealthy if we're certain it's down
|
# Only count as unhealthy if we're certain it's down
|
||||||
@@ -1985,6 +1975,14 @@ def get_health_history():
|
|||||||
"""Get recent unified health check results."""
|
"""Get recent unified health check results."""
|
||||||
return jsonify(list(health_history))
|
return jsonify(list(health_history))
|
||||||
|
|
||||||
|
@app.route('/api/health/history/clear', methods=['POST'])
|
||||||
|
def clear_health_history():
|
||||||
|
"""Clear health history and reset alert counters."""
|
||||||
|
global service_alert_counters
|
||||||
|
health_history.clear()
|
||||||
|
service_alert_counters = {}
|
||||||
|
return jsonify({'message': 'Health history cleared'})
|
||||||
|
|
||||||
@app.route('/api/logs', methods=['GET'])
|
@app.route('/api/logs', methods=['GET'])
|
||||||
def get_backend_logs():
|
def get_backend_logs():
|
||||||
"""Get backend log file contents (last N lines)."""
|
"""Get backend log file contents (last N lines)."""
|
||||||
|
|||||||
+22
-12
@@ -520,19 +520,29 @@ class LogManager:
|
|||||||
}
|
}
|
||||||
|
|
||||||
def get_all_log_file_infos(self) -> List[Dict[str, Any]]:
|
def get_all_log_file_infos(self) -> List[Dict[str, Any]]:
|
||||||
"""Return size/mtime info for all service log files."""
|
"""Return size/mtime info for active and rotated service log files."""
|
||||||
results = []
|
results = []
|
||||||
for log_file in sorted(self.log_dir.glob('*.log')):
|
# Active logs (*.log) then rotated backups (*.log.1, *.log.2, ...)
|
||||||
try:
|
patterns = ['*.log', '*.log.*']
|
||||||
stat = log_file.stat()
|
seen = set()
|
||||||
results.append({
|
for pattern in patterns:
|
||||||
'name': log_file.stem,
|
for log_file in sorted(self.log_dir.glob(pattern)):
|
||||||
'file': log_file.name,
|
if log_file in seen or log_file.suffix == '.gz':
|
||||||
'size': stat.st_size,
|
continue
|
||||||
'modified': datetime.fromtimestamp(stat.st_mtime).isoformat(),
|
seen.add(log_file)
|
||||||
})
|
try:
|
||||||
except Exception:
|
stat = log_file.stat()
|
||||||
pass
|
name = log_file.name
|
||||||
|
is_backup = not name.endswith('.log')
|
||||||
|
results.append({
|
||||||
|
'name': log_file.stem.split('.')[0], # service name
|
||||||
|
'file': name,
|
||||||
|
'size': stat.st_size,
|
||||||
|
'modified': datetime.fromtimestamp(stat.st_mtime).isoformat(),
|
||||||
|
'backup': is_backup,
|
||||||
|
})
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
return results
|
return results
|
||||||
|
|
||||||
def compress_old_logs(self):
|
def compress_old_logs(self):
|
||||||
|
|||||||
@@ -134,24 +134,29 @@ function ApiServiceLogsTab() {
|
|||||||
{/* File info panel */}
|
{/* File info panel */}
|
||||||
{showFiles && (
|
{showFiles && (
|
||||||
<div className="border rounded bg-gray-50 p-3">
|
<div className="border rounded bg-gray-50 p-3">
|
||||||
<div className="flex justify-between items-center mb-2">
|
<div className="flex justify-between items-center mb-1">
|
||||||
<span className="text-sm font-medium text-gray-700">Log Files (persisted to ./data/logs/)</span>
|
<div>
|
||||||
|
<span className="text-sm font-medium text-gray-700">Log Files</span>
|
||||||
|
<span className="ml-2 text-xs text-gray-400">host path: <code>./data/logs/</code> — rotated backups saved as <code>wireguard.log.1</code>, <code>wireguard.log.2</code> …</span>
|
||||||
|
</div>
|
||||||
<button className="btn btn-secondary text-xs px-2 py-0.5" onClick={() => rotate(null)} disabled={rotating === 'all'}>
|
<button className="btn btn-secondary text-xs px-2 py-0.5" onClick={() => rotate(null)} disabled={rotating === 'all'}>
|
||||||
<RotateCcw className={`h-3 w-3 inline mr-1 ${rotating === 'all' ? 'animate-spin' : ''}`} />Rotate All
|
<RotateCcw className={`h-3 w-3 inline mr-1 ${rotating === 'all' ? 'animate-spin' : ''}`} />Rotate All
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
<table className="w-full text-xs">
|
<table className="w-full text-xs">
|
||||||
<thead><tr className="text-gray-500"><th className="text-left py-1">Service</th><th className="text-right py-1">Size</th><th className="text-left py-1 pl-3">Modified</th><th className="text-center py-1"></th></tr></thead>
|
<thead><tr className="text-gray-500"><th className="text-left py-1">File</th><th className="text-right py-1">Size</th><th className="text-left py-1 pl-3">Modified</th><th className="text-center py-1"></th></tr></thead>
|
||||||
<tbody>
|
<tbody>
|
||||||
{fileInfos.map(f => (
|
{fileInfos.map(f => (
|
||||||
<tr key={f.name} className="border-t">
|
<tr key={f.file} className={`border-t ${f.backup ? 'text-gray-400' : ''}`}>
|
||||||
<td className="py-1 font-mono">{f.name}</td>
|
<td className="py-1 font-mono">{f.file}</td>
|
||||||
<td className="py-1 text-right font-mono">{fmtSize(f.size)}</td>
|
<td className="py-1 text-right font-mono">{fmtSize(f.size)}</td>
|
||||||
<td className="py-1 pl-3 text-gray-500">{f.modified?.slice(0, 19)}</td>
|
<td className="py-1 pl-3 text-gray-500">{f.modified?.slice(0, 19)}</td>
|
||||||
<td className="py-1 text-center">
|
<td className="py-1 text-center">
|
||||||
<button className="btn btn-secondary px-1.5 py-0.5 text-xs" onClick={() => rotate(f.name)} disabled={rotating === f.name}>
|
{!f.backup && (
|
||||||
<RotateCcw className={`h-3 w-3 ${rotating === f.name ? 'animate-spin' : ''}`} />
|
<button className="btn btn-secondary px-1.5 py-0.5 text-xs" onClick={() => rotate(f.name)} disabled={rotating === f.name}>
|
||||||
</button>
|
<RotateCcw className={`h-3 w-3 ${rotating === f.name ? 'animate-spin' : ''}`} />
|
||||||
|
</button>
|
||||||
|
)}
|
||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
))}
|
))}
|
||||||
@@ -373,7 +378,14 @@ function HealthHistoryTab() {
|
|||||||
<div className="space-y-4">
|
<div className="space-y-4">
|
||||||
<div className="flex justify-between items-center">
|
<div className="flex justify-between items-center">
|
||||||
<h3 className="text-lg font-medium text-gray-900">Health History</h3>
|
<h3 className="text-lg font-medium text-gray-900">Health History</h3>
|
||||||
<button className="btn btn-secondary text-sm" onClick={load}><RefreshCw className="h-4 w-4 mr-1 inline" />Refresh</button>
|
<div className="flex gap-2">
|
||||||
|
<button className="btn btn-secondary text-sm" onClick={load}><RefreshCw className="h-4 w-4 mr-1 inline" />Refresh</button>
|
||||||
|
<button className="btn btn-secondary text-sm text-red-600" onClick={async () => {
|
||||||
|
if (!window.confirm('Clear all health history and reset alert counters?')) return;
|
||||||
|
await monitoringAPI.clearHealthHistory();
|
||||||
|
await load();
|
||||||
|
}}>Clear</button>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
{loading ? <div className="text-gray-500 text-sm">Loading…</div> : (
|
{loading ? <div className="text-gray-500 text-sm">Loading…</div> : (
|
||||||
<div className="overflow-x-auto">
|
<div className="overflow-x-auto">
|
||||||
|
|||||||
@@ -184,6 +184,7 @@ export const healthAPI = {
|
|||||||
export const monitoringAPI = {
|
export const monitoringAPI = {
|
||||||
getBackendLogs: (lines = 100) => api.get('/api/logs', { params: { lines } }),
|
getBackendLogs: (lines = 100) => api.get('/api/logs', { params: { lines } }),
|
||||||
getHealthHistory: () => api.get('/api/health/history'),
|
getHealthHistory: () => api.get('/api/health/history'),
|
||||||
|
clearHealthHistory: () => api.post('/api/health/history/clear'),
|
||||||
};
|
};
|
||||||
|
|
||||||
// Logs API
|
// Logs API
|
||||||
|
|||||||
Reference in New Issue
Block a user