fix: harden gateway process shutdown lifecycle

Co-authored-by: Haze <hazeone@users.noreply.github.com>
This commit is contained in:
Cursor Agent
2026-03-18 12:09:25 +00:00
committed by Haze
parent 6b0400e3c3
commit 4d6a60fa77
9 changed files with 285 additions and 13 deletions

View File

@@ -251,6 +251,15 @@ ClawXは、**デュアルプロセス + Host API 統一アクセス**構成を
- **セキュアストレージ**: APIキーや機密データは、OSのネイティブセキュアストレージ機構を活用します
- **CORSセーフ設計**: ローカルHTTPはMainプロキシ経由とし、Renderer側CORS問題を回避します
### プロセスモデルと Gateway トラブルシューティング
- ClawX は Electron アプリのため、**1つのアプリインスタンスでも複数プロセスmain/renderer/zygote/utilityが表示される**のが正常です。
- ただし OpenClaw Gateway の待受は常に**単一**であるべきです。`127.0.0.1:18789` を Listen しているプロセスは1つだけです。
- Listen プロセスの確認例:
- macOS/Linux: `lsof -nP -iTCP:18789 -sTCP:LISTEN`
- Windows (PowerShell): `Get-NetTCPConnection -LocalPort 18789 -State Listen`
- ウィンドウの閉じるボタン(`X`)は既定でトレイへ最小化する動作で、完全終了ではありません。完全終了する場合はトレイメニューの **Quit ClawX** を使用してください。
---
## ユースケース

View File

@@ -255,6 +255,15 @@ ClawX employs a **dual-process architecture** with a unified host API layer. The
- **Secure Storage**: API keys and sensitive data leverage the operating system's native secure storage mechanisms
- **CORS-Safe by Design**: Local HTTP access is proxied by Main, preventing renderer-side CORS issues
### Process Model & Gateway Troubleshooting
- ClawX is an Electron app, so **one app instance normally appears as multiple OS processes** (main/renderer/zygote/utility). This is expected.
- The OpenClaw Gateway listener should still be **single-owner**: only one process should listen on `127.0.0.1:18789`.
- To verify the active listener:
- macOS/Linux: `lsof -nP -iTCP:18789 -sTCP:LISTEN`
- Windows (PowerShell): `Get-NetTCPConnection -LocalPort 18789 -State Listen`
- Clicking the window close button (`X`) hides ClawX to tray; it does **not** fully quit the app. Use tray menu **Quit ClawX** for complete shutdown.
---
## Use Cases

View File

@@ -255,6 +255,15 @@ ClawX 采用 **双进程 + Host API 统一接入架构**。渲染进程只调用
- **安全存储**API 密钥和敏感数据利用操作系统原生的安全存储机制
- **CORS 安全**:本地 HTTP 请求由主进程代理,避免渲染进程跨域问题
### 进程模型与 Gateway 排障
- ClawX 基于 Electron**单个应用实例出现多个系统进程是正常现象**main/renderer/zygote/utility
- 但 OpenClaw Gateway 监听应始终保持**单实例**`127.0.0.1:18789` 只能有一个监听者。
- 可用以下命令确认监听进程:
- macOS/Linux`lsof -nP -iTCP:18789 -sTCP:LISTEN`
- WindowsPowerShell`Get-NetTCPConnection -LocalPort 18789 -State Listen`
- 点击窗口关闭按钮(`X`)默认只是最小化到托盘,并不会完全退出应用。请在托盘菜单中选择 **Quit ClawX** 执行完整退出。
---
## 使用场景

View File

@@ -241,6 +241,7 @@ export class GatewayManager extends EventEmitter {
onConnectedToExistingGateway: () => {
this.ownsProcess = false;
this.setStatus({ pid: undefined });
logger.info(`Gateway manager attached to external process on port ${this.status.port} (ownsProcess=false)`);
this.startHealthCheck();
},
waitForPortFree: async (port) => {
@@ -714,6 +715,7 @@ export class GatewayManager extends EventEmitter {
this.process = child;
this.ownsProcess = true;
logger.debug(`Gateway manager now owns process pid=${child.pid ?? 'unknown'}`);
this.lastSpawnSummary = lastSpawnSummary;
}

View File

@@ -24,6 +24,13 @@ export function warmupManagedPythonReadiness(): void {
export async function terminateOwnedGatewayProcess(child: Electron.UtilityProcess): Promise<void> {
let exited = false;
const terminateWindowsProcessTree = async (pid: number): Promise<void> => {
const cp = await import('child_process');
await new Promise<void>((resolve) => {
cp.exec(`taskkill /F /PID ${pid} /T`, { timeout: 5000, windowsHide: true }, () => resolve());
});
};
await new Promise<void>((resolve) => {
child.once('exit', () => {
exited = true;
@@ -32,20 +39,33 @@ export async function terminateOwnedGatewayProcess(child: Electron.UtilityProces
const pid = child.pid;
logger.info(`Sending kill to Gateway process (pid=${pid ?? 'unknown'})`);
try {
child.kill();
} catch {
// ignore if already exited
if (process.platform === 'win32' && pid) {
void terminateWindowsProcessTree(pid).catch((err) => {
logger.warn(`Windows process-tree kill failed for Gateway pid=${pid}:`, err);
});
} else {
try {
child.kill();
} catch {
// ignore if already exited
}
}
const timeout = setTimeout(() => {
if (!exited) {
logger.warn(`Gateway did not exit in time, force-killing (pid=${pid ?? 'unknown'})`);
if (pid) {
try {
process.kill(pid, 'SIGKILL');
} catch {
// ignore
if (process.platform === 'win32') {
void terminateWindowsProcessTree(pid).catch((err) => {
logger.warn(`Forced Windows process-tree kill failed for Gateway pid=${pid}:`, err);
});
} else {
try {
process.kill(pid, 'SIGKILL');
} catch {
// ignore
}
}
}
}
@@ -226,6 +246,9 @@ export async function findExistingGatewayProcess(options: {
const pids = await getListeningProcessIds(port);
if (pids.length > 0 && (!ownedPid || !pids.includes(String(ownedPid)))) {
await terminateOrphanedProcessIds(port, pids);
if (process.platform === 'win32') {
await waitForPortFree(port, 10000);
}
return null;
}
} catch (err) {

View File

@@ -27,6 +27,11 @@ import {
createMainWindowFocusState,
requestSecondInstanceFocus,
} from './main-window-focus';
import {
createQuitLifecycleState,
markQuitCleanupCompleted,
requestQuitLifecycleAction,
} from './quit-lifecycle';
import { getSetting } from '../utils/store';
import { ensureBuiltinSkillsInstalled, ensurePreinstalledSkillsInstalled } from '../utils/skill-config';
import { ensureAllBundledPluginsInstalled } from '../utils/plugin-install';
@@ -70,6 +75,7 @@ if (process.platform === 'linux') {
// The losing process must exit immediately so it never reaches Gateway startup.
const gotTheLock = app.requestSingleInstanceLock();
if (!gotTheLock) {
console.info('[ClawX] Another instance already holds the single-instance lock; exiting duplicate process');
app.exit(0);
}
@@ -80,6 +86,7 @@ let clawHubService!: ClawHubService;
let hostEventBus!: HostEventBus;
let hostApiServer: Server | null = null;
const mainWindowFocusState = createMainWindowFocusState();
const quitLifecycleState = createQuitLifecycleState();
/**
* Resolve the icons directory path (works in both dev and packaged mode)
@@ -216,7 +223,7 @@ async function initialize(): Promise<void> {
logger.init();
logger.info('=== ClawX Application Starting ===');
logger.debug(
`Runtime: platform=${process.platform}/${process.arch}, electron=${process.versions.electron}, node=${process.versions.node}, packaged=${app.isPackaged}`
`Runtime: platform=${process.platform}/${process.arch}, electron=${process.versions.electron}, node=${process.versions.node}, packaged=${app.isPackaged}, pid=${process.pid}, ppid=${process.ppid}`
);
// Warm up network optimization (non-blocking)
@@ -461,15 +468,38 @@ if (gotTheLock) {
}
});
app.on('before-quit', () => {
app.on('before-quit', (event) => {
setQuitting();
const action = requestQuitLifecycleAction(quitLifecycleState);
if (action === 'allow-quit') {
return;
}
event.preventDefault();
if (action === 'cleanup-in-progress') {
logger.debug('Quit requested while cleanup already in progress; waiting for shutdown task to finish');
return;
}
hostEventBus.closeAll();
hostApiServer?.close();
// Fire-and-forget: do not await gatewayManager.stop() here.
// Awaiting inside before-quit can stall Electron's quit sequence.
void gatewayManager.stop().catch((err) => {
const stopPromise = gatewayManager.stop().catch((err) => {
logger.warn('gatewayManager.stop() error during quit:', err);
});
const timeoutPromise = new Promise<'timeout'>((resolve) => {
setTimeout(() => resolve('timeout'), 5000);
});
void Promise.race([stopPromise.then(() => 'stopped' as const), timeoutPromise]).then((result) => {
if (result === 'timeout') {
logger.warn('Gateway shutdown timed out during app quit; proceeding with forced quit');
}
markQuitCleanupCompleted(quitLifecycleState);
app.quit();
});
});
}

View File

@@ -0,0 +1,30 @@
export interface QuitLifecycleState {
cleanupStarted: boolean;
cleanupCompleted: boolean;
}
export type QuitLifecycleAction = 'start-cleanup' | 'cleanup-in-progress' | 'allow-quit';
export function createQuitLifecycleState(): QuitLifecycleState {
return {
cleanupStarted: false,
cleanupCompleted: false,
};
}
export function requestQuitLifecycleAction(state: QuitLifecycleState): QuitLifecycleAction {
if (state.cleanupCompleted) {
return 'allow-quit';
}
if (state.cleanupStarted) {
return 'cleanup-in-progress';
}
state.cleanupStarted = true;
return 'start-cleanup';
}
export function markQuitCleanupCompleted(state: QuitLifecycleState): void {
state.cleanupCompleted = true;
}

View File

@@ -0,0 +1,137 @@
import { EventEmitter } from 'node:events';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
const originalPlatform = process.platform;
const {
mockExec,
mockCreateServer,
} = vi.hoisted(() => ({
mockExec: vi.fn(),
mockCreateServer: vi.fn(),
}));
vi.mock('electron', () => ({
app: {
isPackaged: false,
getPath: () => '/tmp',
},
utilityProcess: {},
}));
vi.mock('child_process', () => ({
exec: mockExec,
execSync: vi.fn(),
spawn: vi.fn(),
default: {
exec: mockExec,
execSync: vi.fn(),
spawn: vi.fn(),
},
}));
vi.mock('net', () => ({
createServer: mockCreateServer,
}));
class MockUtilityChild extends EventEmitter {
pid?: number;
kill = vi.fn();
constructor(pid?: number) {
super();
this.pid = pid;
}
}
function setPlatform(platform: string): void {
Object.defineProperty(process, 'platform', { value: platform, writable: true });
}
describe('gateway supervisor process cleanup', () => {
beforeEach(() => {
vi.resetModules();
vi.clearAllMocks();
mockExec.mockImplementation((_cmd: string, _opts: object, cb: (err: Error | null, stdout: string) => void) => {
cb(null, '');
return {} as never;
});
mockCreateServer.mockImplementation(() => {
const handlers = new Map<string, (...args: unknown[]) => void>();
return {
once(event: string, callback: (...args: unknown[]) => void) {
handlers.set(event, callback);
return this;
},
listen() {
queueMicrotask(() => handlers.get('listening')?.());
return this;
},
close(callback?: () => void) {
callback?.();
},
};
});
});
afterEach(() => {
Object.defineProperty(process, 'platform', { value: originalPlatform, writable: true });
});
it('uses taskkill tree strategy for owned process on Windows', async () => {
setPlatform('win32');
const child = new MockUtilityChild(4321);
const { terminateOwnedGatewayProcess } = await import('@electron/gateway/supervisor');
const stopPromise = terminateOwnedGatewayProcess(child as unknown as Electron.UtilityProcess);
child.emit('exit', 0);
await stopPromise;
await vi.waitFor(() => {
expect(mockExec).toHaveBeenCalledWith(
'taskkill /F /PID 4321 /T',
expect.objectContaining({ timeout: 5000, windowsHide: true }),
expect.any(Function),
);
});
expect(child.kill).not.toHaveBeenCalled();
});
it('uses direct child.kill for owned process on non-Windows', async () => {
setPlatform('linux');
const child = new MockUtilityChild(9876);
const { terminateOwnedGatewayProcess } = await import('@electron/gateway/supervisor');
const stopPromise = terminateOwnedGatewayProcess(child as unknown as Electron.UtilityProcess);
child.emit('exit', 0);
await stopPromise;
expect(child.kill).toHaveBeenCalledTimes(1);
});
it('waits for port release after orphan cleanup on Windows', async () => {
setPlatform('win32');
const { findExistingGatewayProcess } = await import('@electron/gateway/supervisor');
mockExec.mockImplementation((cmd: string, _opts: object, cb: (err: Error | null, stdout: string) => void) => {
if (cmd.includes('netstat -ano')) {
cb(null, ' TCP 127.0.0.1:18789 0.0.0.0:0 LISTENING 4321\n');
return {} as never;
}
cb(null, '');
return {} as never;
});
const result = await findExistingGatewayProcess({ port: 18789 });
expect(result).toBeNull();
expect(mockExec).toHaveBeenCalledWith(
expect.stringContaining('taskkill /F /PID 4321 /T'),
expect.objectContaining({ timeout: 5000, windowsHide: true }),
expect.any(Function),
);
expect(mockCreateServer).toHaveBeenCalled();
});
});

View File

@@ -0,0 +1,23 @@
import { describe, expect, it } from 'vitest';
import {
createQuitLifecycleState,
markQuitCleanupCompleted,
requestQuitLifecycleAction,
} from '@electron/main/quit-lifecycle';
describe('main quit lifecycle coordination', () => {
it('starts cleanup only once', () => {
const state = createQuitLifecycleState();
expect(requestQuitLifecycleAction(state)).toBe('start-cleanup');
expect(requestQuitLifecycleAction(state)).toBe('cleanup-in-progress');
});
it('allows quit after cleanup is marked complete', () => {
const state = createQuitLifecycleState();
expect(requestQuitLifecycleAction(state)).toBe('start-cleanup');
markQuitCleanupCompleted(state);
expect(requestQuitLifecycleAction(state)).toBe('allow-quit');
});
});