fix(gateway): prevent reconnect race and hide windows subprocess consoles
Co-authored-by: Haze <hazeone@users.noreply.github.com>
This commit is contained in:
@@ -35,6 +35,12 @@ import { syncGatewayTokenToConfig, syncBrowserConfigToOpenClaw, sanitizeOpenClaw
|
||||
import { buildProxyEnv, resolveProxySettings } from '../utils/proxy';
|
||||
import { syncProxyConfigToOpenClaw } from '../utils/openclaw-proxy';
|
||||
import { shouldAttemptConfigAutoRepair } from './startup-recovery';
|
||||
import {
|
||||
getReconnectSkipReason,
|
||||
isLifecycleSuperseded,
|
||||
nextLifecycleEpoch,
|
||||
shouldHideConsoleWindow,
|
||||
} from './process-policy';
|
||||
|
||||
/**
|
||||
* Gateway connection status
|
||||
@@ -162,6 +168,13 @@ function ensureGatewayFetchPreload(): string {
|
||||
return dest;
|
||||
}
|
||||
|
||||
class LifecycleSupersededError extends Error {
|
||||
constructor(message: string) {
|
||||
super(message);
|
||||
this.name = 'LifecycleSupersededError';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gateway Manager
|
||||
* Handles starting, stopping, and communicating with the OpenClaw Gateway
|
||||
@@ -187,6 +200,7 @@ export class GatewayManager extends EventEmitter {
|
||||
}> = new Map();
|
||||
private deviceIdentity: DeviceIdentity | null = null;
|
||||
private restartDebounceTimer: NodeJS.Timeout | null = null;
|
||||
private lifecycleEpoch = 0;
|
||||
|
||||
constructor(config?: Partial<ReconnectConfig>) {
|
||||
super();
|
||||
@@ -247,6 +261,20 @@ export class GatewayManager extends EventEmitter {
|
||||
}
|
||||
}
|
||||
|
||||
private bumpLifecycleEpoch(reason: string): number {
|
||||
this.lifecycleEpoch = nextLifecycleEpoch(this.lifecycleEpoch);
|
||||
logger.debug(`Gateway lifecycle epoch advanced to ${this.lifecycleEpoch} (${reason})`);
|
||||
return this.lifecycleEpoch;
|
||||
}
|
||||
|
||||
private assertLifecycleEpoch(expectedEpoch: number, phase: string): void {
|
||||
if (isLifecycleSuperseded(expectedEpoch, this.lifecycleEpoch)) {
|
||||
throw new LifecycleSupersededError(
|
||||
`Gateway ${phase} superseded (expectedEpoch=${expectedEpoch}, currentEpoch=${this.lifecycleEpoch})`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current Gateway status
|
||||
*/
|
||||
@@ -276,6 +304,7 @@ export class GatewayManager extends EventEmitter {
|
||||
}
|
||||
|
||||
this.startLock = true;
|
||||
const startEpoch = this.bumpLifecycleEpoch('start');
|
||||
logger.info(`Gateway start requested (port=${this.status.port})`);
|
||||
this.lastSpawnSummary = null;
|
||||
this.shouldReconnect = true;
|
||||
@@ -310,14 +339,17 @@ export class GatewayManager extends EventEmitter {
|
||||
|
||||
try {
|
||||
while (true) {
|
||||
this.assertLifecycleEpoch(startEpoch, 'start');
|
||||
this.recentStartupStderrLines = [];
|
||||
try {
|
||||
// Check if Gateway is already running
|
||||
logger.debug('Checking for existing Gateway...');
|
||||
const existing = await this.findExistingGateway();
|
||||
this.assertLifecycleEpoch(startEpoch, 'start/find-existing');
|
||||
if (existing) {
|
||||
logger.debug(`Found existing Gateway on port ${existing.port}`);
|
||||
await this.connect(existing.port, existing.externalToken);
|
||||
this.assertLifecycleEpoch(startEpoch, 'start/connect-existing');
|
||||
this.ownsProcess = false;
|
||||
this.setStatus({ pid: undefined });
|
||||
this.startHealthCheck();
|
||||
@@ -328,18 +360,24 @@ export class GatewayManager extends EventEmitter {
|
||||
|
||||
// Start new Gateway process
|
||||
await this.startProcess();
|
||||
this.assertLifecycleEpoch(startEpoch, 'start/start-process');
|
||||
|
||||
// Wait for Gateway to be ready
|
||||
await this.waitForReady();
|
||||
this.assertLifecycleEpoch(startEpoch, 'start/wait-ready');
|
||||
|
||||
// Connect WebSocket
|
||||
await this.connect(this.status.port);
|
||||
this.assertLifecycleEpoch(startEpoch, 'start/connect');
|
||||
|
||||
// Start health monitoring
|
||||
this.startHealthCheck();
|
||||
logger.debug('Gateway started successfully');
|
||||
return;
|
||||
} catch (error) {
|
||||
if (error instanceof LifecycleSupersededError) {
|
||||
throw error;
|
||||
}
|
||||
if (shouldAttemptConfigAutoRepair(error, this.recentStartupStderrLines, configRepairAttempted)) {
|
||||
configRepairAttempted = true;
|
||||
logger.warn(
|
||||
@@ -358,6 +396,10 @@ export class GatewayManager extends EventEmitter {
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
if (error instanceof LifecycleSupersededError) {
|
||||
logger.debug(error.message);
|
||||
return;
|
||||
}
|
||||
logger.error(
|
||||
`Gateway start failed (port=${this.status.port}, reconnectAttempts=${this.reconnectAttempts}, spawn=${this.lastSpawnSummary ?? 'n/a'})`,
|
||||
error
|
||||
@@ -374,6 +416,7 @@ export class GatewayManager extends EventEmitter {
|
||||
*/
|
||||
async stop(): Promise<void> {
|
||||
logger.info('Gateway stop requested');
|
||||
this.bumpLifecycleEpoch('stop');
|
||||
// Disable auto-reconnect
|
||||
this.shouldReconnect = false;
|
||||
|
||||
@@ -666,7 +709,7 @@ export class GatewayManager extends EventEmitter {
|
||||
|
||||
const { stdout } = await new Promise<{ stdout: string }>((resolve, reject) => {
|
||||
import('child_process').then(cp => {
|
||||
cp.exec(cmd, { timeout: 5000 }, (err, stdout) => {
|
||||
cp.exec(cmd, { timeout: 5000, windowsHide: shouldHideConsoleWindow() }, (err, stdout) => {
|
||||
if (err) resolve({ stdout: '' });
|
||||
else resolve({ stdout });
|
||||
});
|
||||
@@ -694,7 +737,11 @@ export class GatewayManager extends EventEmitter {
|
||||
if (process.platform === 'win32') {
|
||||
// On Windows, use taskkill for reliable process group termination
|
||||
import('child_process').then(cp => {
|
||||
cp.exec(`taskkill /PID ${pid} /T /F`, { timeout: 5000 }, () => { });
|
||||
cp.exec(
|
||||
`taskkill /PID ${pid} /T /F`,
|
||||
{ timeout: 5000, windowsHide: shouldHideConsoleWindow() },
|
||||
() => { }
|
||||
);
|
||||
}).catch(() => { });
|
||||
} else {
|
||||
// SIGTERM first so the gateway can clean up its lock file.
|
||||
@@ -797,6 +844,7 @@ export class GatewayManager extends EventEmitter {
|
||||
stdio: ['ignore', 'pipe', 'pipe'],
|
||||
detached: false,
|
||||
shell: false,
|
||||
windowsHide: shouldHideConsoleWindow(),
|
||||
env: spawnEnv,
|
||||
});
|
||||
|
||||
@@ -1050,6 +1098,7 @@ export class GatewayManager extends EventEmitter {
|
||||
stdio: ['ignore', 'pipe', 'pipe'],
|
||||
detached: false,
|
||||
shell: useShell,
|
||||
windowsHide: shouldHideConsoleWindow(),
|
||||
env: spawnEnv,
|
||||
});
|
||||
const child = this.process;
|
||||
@@ -1557,27 +1606,24 @@ export class GatewayManager extends EventEmitter {
|
||||
state: 'reconnecting',
|
||||
reconnectAttempts: this.reconnectAttempts
|
||||
});
|
||||
const scheduledEpoch = this.lifecycleEpoch;
|
||||
|
||||
this.reconnectTimer = setTimeout(async () => {
|
||||
this.reconnectTimer = null;
|
||||
const skipReason = getReconnectSkipReason({
|
||||
scheduledEpoch,
|
||||
currentEpoch: this.lifecycleEpoch,
|
||||
shouldReconnect: this.shouldReconnect,
|
||||
});
|
||||
if (skipReason) {
|
||||
logger.debug(`Skipping reconnect attempt: ${skipReason}`);
|
||||
return;
|
||||
}
|
||||
try {
|
||||
// Try to find existing Gateway first
|
||||
const existing = await this.findExistingGateway();
|
||||
if (existing) {
|
||||
await this.connect(existing.port, existing.externalToken);
|
||||
this.ownsProcess = false;
|
||||
this.setStatus({ pid: undefined });
|
||||
this.reconnectAttempts = 0;
|
||||
this.startHealthCheck();
|
||||
return;
|
||||
}
|
||||
|
||||
// Otherwise restart the process
|
||||
await this.startProcess();
|
||||
await this.waitForReady();
|
||||
await this.connect(this.status.port);
|
||||
// Use the guarded start() flow so reconnect attempts cannot bypass
|
||||
// lifecycle locking and accidentally start duplicate Gateway processes.
|
||||
await this.start();
|
||||
this.reconnectAttempts = 0;
|
||||
this.startHealthCheck();
|
||||
} catch (error) {
|
||||
logger.error('Gateway reconnection attempt failed:', error);
|
||||
this.scheduleReconnect();
|
||||
|
||||
27
electron/gateway/process-policy.ts
Normal file
27
electron/gateway/process-policy.ts
Normal file
@@ -0,0 +1,27 @@
|
||||
export function shouldHideConsoleWindow(platform: NodeJS.Platform = process.platform): boolean {
|
||||
return platform === 'win32';
|
||||
}
|
||||
|
||||
export function nextLifecycleEpoch(currentEpoch: number): number {
|
||||
return currentEpoch + 1;
|
||||
}
|
||||
|
||||
export function isLifecycleSuperseded(expectedEpoch: number, currentEpoch: number): boolean {
|
||||
return expectedEpoch !== currentEpoch;
|
||||
}
|
||||
|
||||
export interface ReconnectAttemptContext {
|
||||
scheduledEpoch: number;
|
||||
currentEpoch: number;
|
||||
shouldReconnect: boolean;
|
||||
}
|
||||
|
||||
export function getReconnectSkipReason(context: ReconnectAttemptContext): string | null {
|
||||
if (!context.shouldReconnect) {
|
||||
return 'auto-reconnect disabled';
|
||||
}
|
||||
if (isLifecycleSuperseded(context.scheduledEpoch, context.currentEpoch)) {
|
||||
return `stale reconnect callback (scheduledEpoch=${context.scheduledEpoch}, currentEpoch=${context.currentEpoch})`;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
64
tests/unit/gateway-process-policy.test.ts
Normal file
64
tests/unit/gateway-process-policy.test.ts
Normal file
@@ -0,0 +1,64 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import {
|
||||
getReconnectSkipReason,
|
||||
isLifecycleSuperseded,
|
||||
nextLifecycleEpoch,
|
||||
shouldHideConsoleWindow,
|
||||
} from '@electron/gateway/process-policy';
|
||||
|
||||
describe('gateway process policy helpers', () => {
|
||||
describe('shouldHideConsoleWindow', () => {
|
||||
it('returns true on Windows', () => {
|
||||
expect(shouldHideConsoleWindow('win32')).toBe(true);
|
||||
});
|
||||
|
||||
it('returns false on non-Windows platforms', () => {
|
||||
expect(shouldHideConsoleWindow('darwin')).toBe(false);
|
||||
expect(shouldHideConsoleWindow('linux')).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('lifecycle epoch helpers', () => {
|
||||
it('increments lifecycle epoch by one', () => {
|
||||
expect(nextLifecycleEpoch(0)).toBe(1);
|
||||
expect(nextLifecycleEpoch(5)).toBe(6);
|
||||
});
|
||||
|
||||
it('detects superseded lifecycle epochs', () => {
|
||||
expect(isLifecycleSuperseded(3, 4)).toBe(true);
|
||||
expect(isLifecycleSuperseded(8, 8)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('getReconnectSkipReason', () => {
|
||||
it('skips reconnect when auto-reconnect is disabled', () => {
|
||||
expect(
|
||||
getReconnectSkipReason({
|
||||
scheduledEpoch: 10,
|
||||
currentEpoch: 10,
|
||||
shouldReconnect: false,
|
||||
})
|
||||
).toBe('auto-reconnect disabled');
|
||||
});
|
||||
|
||||
it('skips stale reconnect callbacks when lifecycle epoch changed', () => {
|
||||
expect(
|
||||
getReconnectSkipReason({
|
||||
scheduledEpoch: 11,
|
||||
currentEpoch: 12,
|
||||
shouldReconnect: true,
|
||||
})
|
||||
).toContain('stale reconnect callback');
|
||||
});
|
||||
|
||||
it('allows reconnect when callback is current and reconnect enabled', () => {
|
||||
expect(
|
||||
getReconnectSkipReason({
|
||||
scheduledEpoch: 7,
|
||||
currentEpoch: 7,
|
||||
shouldReconnect: true,
|
||||
})
|
||||
).toBeNull();
|
||||
});
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user