fix: spawn helper container for all-services restart so API survives

When containers=['*'] (ip_range change or full restart), the previous
code ran docker compose down/up in a background thread inside cell-api.
docker compose down killed cell-api, terminating the thread before
docker compose up could run — leaving all containers stopped.

Fix: spawn an independent docker run --rm container (pic_api:latest)
that has the docker socket and project dir mounted. This helper outlives
cell-api being stopped and completes the up -d independently.

For specific-container restarts (port changes), keep the direct approach
since the API container is not in the affected set.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-22 15:02:26 -04:00
parent 5f4f635719
commit c9ed28f258
+54 -30
View File
@@ -692,40 +692,64 @@ def apply_pending_config():
# existing network's subnet in-place, so we need `down` + `up` in that case.
needs_network_recreate = pending.get('network_recreate', False)
host_env = os.path.join(project_dir, '.env')
host_compose = os.path.join(project_dir, 'docker-compose.yml')
if '*' in containers:
# All-services restart: `docker compose down` or `up -d` may stop/recreate the
# API container itself, killing this background thread mid-operation.
# Spawn an independent helper container using pic_api:latest that has docker CLI
# and survives cell-api being stopped/recreated.
if needs_network_recreate:
# down removes containers AND the bridge network; up recreates everything
compose_down_args = ['down']
compose_up_args = ['up', '-d']
helper_script = (
f'sleep 2'
f' && docker compose --project-directory {project_dir}'
f' -f {host_compose} --env-file {host_env} down'
f' && docker compose --project-directory {project_dir}'
f' -f {host_compose} --env-file {host_env} up -d'
)
else:
compose_down_args = None
compose_up_args = ['up', '-d']
helper_script = (
f'sleep 2'
f' && docker compose --project-directory {project_dir}'
f' -f {host_compose} --env-file {host_env} up -d'
)
def _do_apply():
import subprocess as _subprocess
_subprocess.Popen(
['docker', 'run', '--rm',
'-v', '/var/run/docker.sock:/var/run/docker.sock',
'-v', f'{project_dir}:{project_dir}',
'--entrypoint', 'sh',
'pic_api:latest',
'-c', helper_script],
close_fds=True,
stdout=_subprocess.DEVNULL,
stderr=_subprocess.DEVNULL,
)
logger.info(
'spawned helper container for all-services restart'
+ (' (network_recreate)' if needs_network_recreate else '')
)
else:
compose_down_args = None
compose_up_args = ['up', '-d', '--no-deps'] + containers
base_cmd = ['docker', 'compose',
'--project-directory', project_dir,
'-f', '/app/docker-compose.yml',
'--env-file', '/app/.env.compose']
# Run in a background thread; 0.3 s delay lets Flask send this response first.
def _do_apply():
import time as _time
import subprocess as _subprocess
_time.sleep(0.3)
if compose_down_args:
r = _subprocess.run(base_cmd + compose_down_args,
capture_output=True, text=True, timeout=60)
if r.returncode != 0:
logger.error(f"docker compose down failed: {r.stderr.strip()}")
return
result = _subprocess.run(base_cmd + compose_up_args,
capture_output=True, text=True, timeout=120)
if result.returncode != 0:
logger.error(f"docker compose up failed: {result.stderr.strip()}")
else:
logger.info(f'docker compose {" ".join(compose_up_args)} completed successfully')
# Specific containers only — API is not affected, run directly from here.
def _do_apply():
import time as _time
import subprocess as _subprocess
_time.sleep(0.3)
result = _subprocess.run(
['docker', 'compose',
'--project-directory', project_dir,
'-f', '/app/docker-compose.yml',
'--env-file', '/app/.env.compose',
'up', '-d', '--no-deps'] + containers,
capture_output=True, text=True, timeout=120,
)
if result.returncode != 0:
logger.error(f"docker compose up failed: {result.stderr.strip()}")
else:
logger.info(f'docker compose up completed for: {containers}')
threading.Thread(target=_do_apply, daemon=False).start()