fix: resolve all Cell Identity banner and cert issues
Unit Tests / test (push) Successful in 7m17s

Four bugs fixed:

1. Banner delay (up to 5 s): DraftConfigContext now exposes isDirty as
   reactive useState so App.jsx re-renders immediately when any section
   marks itself dirty, instead of waiting for the next checkPending() poll.

2. Banner re-triggers after Apply (race): For non-'*' container restarts
   (e.g., cell_name → DNS restart) the background thread took ~300 ms to
   clear _pending_restart. A concurrent checkPending() poll could see
   needs_restart=True and overwrite the frontend's optimistic clear.
   Fix: set needs_restart=False and applying=True synchronously before
   spawning the thread.

3. Apply showed banner during applyPending() when hasDirty()==false:
   setApplyStatus('saving') was skipped for the auto-save-then-apply
   path, leaving applyStatus=null while applyPending() ran and the
   banner stayed visible. Always set 'saving' before applyPending().

4. Cert status always 'unknown' in pic_ngo mode: _check_cert_via_ssl
   connected to cell-caddy:443 but sent SNI='cell-caddy'. Caddy finds no
   matching cert and returns nothing. Fix: pass the effective public
   domain (e.g. pic1.pic.ngo) as SNI so Caddy returns the right cert.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-06-10 04:17:56 -04:00
parent ec8995d41e
commit 649378b59b
7 changed files with 171 additions and 19 deletions
+39
View File
@@ -412,6 +412,45 @@ class TestRefreshCertStatus(unittest.TestCase):
# Should have been persisted to identity
mgr.config_manager.set_identity_field.assert_called_with('tls', expected)
def test_refresh_cert_status_uses_effective_domain_as_sni(self):
"""refresh_cert_status passes the effective domain as SNI, not the container hostname.
Without this, Caddy receives SNI='cell-caddy' which matches no certificate
and the SSL handshake returns nothing, leaving cert status as 'unknown'.
"""
mgr = _mgr(identity={'cell_name': 'pic1', 'domain_mode': 'pic_ngo'})
mgr.config_manager.get_effective_domain.return_value = 'pic1.pic.ngo'
expected = {'status': 'valid', 'expiry': '2026-12-01T00:00:00+00:00', 'days_remaining': 179}
with patch.object(CaddyManager, '_check_cert_via_ssl', return_value=expected) as mock_ssl:
mgr.refresh_cert_status()
# The SNI keyword argument must be the effective domain, not the container name.
call_kwargs = mock_ssl.call_args
sni_passed = call_kwargs.kwargs.get('sni') or (
call_kwargs.args[2] if len(call_kwargs.args) > 2 else None
)
self.assertEqual(sni_passed, 'pic1.pic.ngo',
f'Expected SNI=pic1.pic.ngo but got {sni_passed!r}')
def test_check_cert_via_ssl_passes_sni_to_wrap_socket(self):
"""_check_cert_via_ssl uses sni parameter as server_hostname in SSL handshake."""
der = self._make_der_cert(60)
mock_tls = MagicMock()
mock_tls.__enter__ = MagicMock(return_value=mock_tls)
mock_tls.__exit__ = MagicMock(return_value=False)
mock_tls.getpeercert.return_value = der
mock_raw = MagicMock()
mock_raw.__enter__ = MagicMock(return_value=mock_raw)
mock_raw.__exit__ = MagicMock(return_value=False)
with patch('caddy_manager._socket.create_connection', return_value=mock_raw) as mock_conn:
with patch('caddy_manager._ssl.create_default_context') as mock_ctx:
mock_ctx.return_value.wrap_socket.return_value = mock_tls
CaddyManager._check_cert_via_ssl('cell-caddy', 443, sni='pic1.pic.ngo')
# TCP connects to container hostname, SSL handshake uses the public domain
mock_conn.assert_called_with(('cell-caddy', 443), timeout=5)
mock_ctx.return_value.wrap_socket.assert_called_with(
mock_raw, server_hostname='pic1.pic.ngo'
)
def test_refresh_cert_status_ssl_failure_returns_unknown(self):
"""When SSL check returns None, status is 'unknown'."""
mgr = _mgr(identity={'cell_name': 'alpha', 'domain_mode': 'pic_ngo'})
+39
View File
@@ -179,6 +179,45 @@ class TestConfigApplyRoute(unittest.TestCase):
self.assertIn('-d', cmd)
self.assertIn('dns', cmd)
# ── Race-condition fix: needs_restart cleared synchronously ────────────
# For non-'*' container restarts the background thread takes ~300 ms.
# The frontend polls /api/config/pending every 5 s; if needs_restart is
# still True when that poll fires, the banner re-appears after Apply.
# Fix: set needs_restart=False and applying=True before spawning the thread.
@patch('threading.Thread')
@patch('docker.from_env')
def test_specific_containers_clears_needs_restart_synchronously(
self, mock_docker, mock_thread):
"""needs_restart must be False as soon as apply returns, not after thread."""
mock_docker.side_effect = Exception('no docker in test')
mock_thread.return_value = MagicMock() # thread is mocked — never runs
_set_pending_restart(['cell_name changed to pic2'], ['dns'])
self.client.post('/api/config/apply')
pending = config_manager.configs.get('_pending_restart', {})
self.assertFalse(pending.get('needs_restart', True),
'needs_restart must be False immediately after apply for non-* restarts')
self.assertTrue(pending.get('applying', False),
'applying must be True while the background thread runs')
@patch('threading.Thread')
@patch('docker.from_env')
def test_wildcard_containers_sets_applying_but_not_clears_needs_restart(
self, mock_docker, mock_thread):
"""For '*' restarts the helper container clears the flag; API must not."""
mock_docker.side_effect = Exception('no docker in test')
mock_thread.return_value = MagicMock()
_set_pending_restart(['ip_range changed'], ['*'])
self.client.post('/api/config/apply')
pending = config_manager.configs.get('_pending_restart', {})
# Wildcard restart: API sets applying=True but leaves needs_restart=True
# so the helper container can clear it on success.
self.assertTrue(pending.get('applying', False))
# ── Exception in route body returns 500 ───────────────────────────────
@patch('app.config_manager')