fix: spurious health alerts, show rotated logs, clear history button
app.py: - Alert logic now checks status.running (container up/down) instead of healthy (which requires connectivity tests) — services are only alerted when actually down - Add POST /api/health/history/clear endpoint to reset history + alert counters log_manager.py: - get_all_log_file_infos: include rotated backup files (*.log.1, *.log.2 ...) in listing, marked with backup=true so UI can dim them and hide rotate button api.js: add monitoringAPI.clearHealthHistory Logs page: - Health History: add Clear button with confirmation - File panel: show full filename (including .log.1 backups), explain host path and naming, hide rotate button for backup files Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+17
-19
@@ -234,36 +234,26 @@ def perform_health_check():
|
||||
except Exception as e:
|
||||
result[service_name] = {'error': str(e), 'status': 'offline'}
|
||||
|
||||
# Health alerting logic - improved to be more robust
|
||||
# Health alerting logic — alert only when a service container is not running
|
||||
global service_alert_counters
|
||||
for service_name in service_bus.list_services():
|
||||
if service_name in result:
|
||||
status = result[service_name]
|
||||
healthy = True
|
||||
|
||||
# Improved health determination logic
|
||||
|
||||
if isinstance(status, dict):
|
||||
# Check for explicit healthy field first
|
||||
if 'healthy' in status:
|
||||
healthy = status['healthy']
|
||||
# Check for running status
|
||||
# Prefer status.running (container actually up) over healthy (connectivity tests)
|
||||
inner = status.get('status', {})
|
||||
if isinstance(inner, dict):
|
||||
if 'running' in inner:
|
||||
healthy = inner['running']
|
||||
elif 'status' in inner:
|
||||
healthy = str(inner['status']).lower() in ('ok', 'healthy', 'online', 'active')
|
||||
elif 'running' in status:
|
||||
healthy = status['running']
|
||||
# Check for status field with various healthy values
|
||||
elif 'status' in status:
|
||||
status_value = status['status']
|
||||
if isinstance(status_value, str):
|
||||
healthy = status_value.lower() in ('ok', 'healthy', 'online', 'active')
|
||||
else:
|
||||
healthy = bool(status_value)
|
||||
# Check for error field
|
||||
elif 'error' in status:
|
||||
healthy = False
|
||||
# If no health indicators, assume healthy if service exists
|
||||
else:
|
||||
healthy = True
|
||||
else:
|
||||
# If status is not a dict, assume it's a boolean
|
||||
healthy = bool(status)
|
||||
|
||||
# Only count as unhealthy if we're certain it's down
|
||||
@@ -1985,6 +1975,14 @@ def get_health_history():
|
||||
"""Get recent unified health check results."""
|
||||
return jsonify(list(health_history))
|
||||
|
||||
@app.route('/api/health/history/clear', methods=['POST'])
|
||||
def clear_health_history():
|
||||
"""Clear health history and reset alert counters."""
|
||||
global service_alert_counters
|
||||
health_history.clear()
|
||||
service_alert_counters = {}
|
||||
return jsonify({'message': 'Health history cleared'})
|
||||
|
||||
@app.route('/api/logs', methods=['GET'])
|
||||
def get_backend_logs():
|
||||
"""Get backend log file contents (last N lines)."""
|
||||
|
||||
+22
-12
@@ -520,19 +520,29 @@ class LogManager:
|
||||
}
|
||||
|
||||
def get_all_log_file_infos(self) -> List[Dict[str, Any]]:
|
||||
"""Return size/mtime info for all service log files."""
|
||||
"""Return size/mtime info for active and rotated service log files."""
|
||||
results = []
|
||||
for log_file in sorted(self.log_dir.glob('*.log')):
|
||||
try:
|
||||
stat = log_file.stat()
|
||||
results.append({
|
||||
'name': log_file.stem,
|
||||
'file': log_file.name,
|
||||
'size': stat.st_size,
|
||||
'modified': datetime.fromtimestamp(stat.st_mtime).isoformat(),
|
||||
})
|
||||
except Exception:
|
||||
pass
|
||||
# Active logs (*.log) then rotated backups (*.log.1, *.log.2, ...)
|
||||
patterns = ['*.log', '*.log.*']
|
||||
seen = set()
|
||||
for pattern in patterns:
|
||||
for log_file in sorted(self.log_dir.glob(pattern)):
|
||||
if log_file in seen or log_file.suffix == '.gz':
|
||||
continue
|
||||
seen.add(log_file)
|
||||
try:
|
||||
stat = log_file.stat()
|
||||
name = log_file.name
|
||||
is_backup = not name.endswith('.log')
|
||||
results.append({
|
||||
'name': log_file.stem.split('.')[0], # service name
|
||||
'file': name,
|
||||
'size': stat.st_size,
|
||||
'modified': datetime.fromtimestamp(stat.st_mtime).isoformat(),
|
||||
'backup': is_backup,
|
||||
})
|
||||
except Exception:
|
||||
pass
|
||||
return results
|
||||
|
||||
def compress_old_logs(self):
|
||||
|
||||
Reference in New Issue
Block a user