fix(gateway): prevent reconnect race and hide windows subprocess consoles
Co-authored-by: Haze <hazeone@users.noreply.github.com>
This commit is contained in:
@@ -35,6 +35,12 @@ import { syncGatewayTokenToConfig, syncBrowserConfigToOpenClaw, sanitizeOpenClaw
|
|||||||
import { buildProxyEnv, resolveProxySettings } from '../utils/proxy';
|
import { buildProxyEnv, resolveProxySettings } from '../utils/proxy';
|
||||||
import { syncProxyConfigToOpenClaw } from '../utils/openclaw-proxy';
|
import { syncProxyConfigToOpenClaw } from '../utils/openclaw-proxy';
|
||||||
import { shouldAttemptConfigAutoRepair } from './startup-recovery';
|
import { shouldAttemptConfigAutoRepair } from './startup-recovery';
|
||||||
|
import {
|
||||||
|
getReconnectSkipReason,
|
||||||
|
isLifecycleSuperseded,
|
||||||
|
nextLifecycleEpoch,
|
||||||
|
shouldHideConsoleWindow,
|
||||||
|
} from './process-policy';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gateway connection status
|
* Gateway connection status
|
||||||
@@ -162,6 +168,13 @@ function ensureGatewayFetchPreload(): string {
|
|||||||
return dest;
|
return dest;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class LifecycleSupersededError extends Error {
|
||||||
|
constructor(message: string) {
|
||||||
|
super(message);
|
||||||
|
this.name = 'LifecycleSupersededError';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gateway Manager
|
* Gateway Manager
|
||||||
* Handles starting, stopping, and communicating with the OpenClaw Gateway
|
* Handles starting, stopping, and communicating with the OpenClaw Gateway
|
||||||
@@ -187,6 +200,7 @@ export class GatewayManager extends EventEmitter {
|
|||||||
}> = new Map();
|
}> = new Map();
|
||||||
private deviceIdentity: DeviceIdentity | null = null;
|
private deviceIdentity: DeviceIdentity | null = null;
|
||||||
private restartDebounceTimer: NodeJS.Timeout | null = null;
|
private restartDebounceTimer: NodeJS.Timeout | null = null;
|
||||||
|
private lifecycleEpoch = 0;
|
||||||
|
|
||||||
constructor(config?: Partial<ReconnectConfig>) {
|
constructor(config?: Partial<ReconnectConfig>) {
|
||||||
super();
|
super();
|
||||||
@@ -247,6 +261,20 @@ export class GatewayManager extends EventEmitter {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private bumpLifecycleEpoch(reason: string): number {
|
||||||
|
this.lifecycleEpoch = nextLifecycleEpoch(this.lifecycleEpoch);
|
||||||
|
logger.debug(`Gateway lifecycle epoch advanced to ${this.lifecycleEpoch} (${reason})`);
|
||||||
|
return this.lifecycleEpoch;
|
||||||
|
}
|
||||||
|
|
||||||
|
private assertLifecycleEpoch(expectedEpoch: number, phase: string): void {
|
||||||
|
if (isLifecycleSuperseded(expectedEpoch, this.lifecycleEpoch)) {
|
||||||
|
throw new LifecycleSupersededError(
|
||||||
|
`Gateway ${phase} superseded (expectedEpoch=${expectedEpoch}, currentEpoch=${this.lifecycleEpoch})`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get current Gateway status
|
* Get current Gateway status
|
||||||
*/
|
*/
|
||||||
@@ -276,6 +304,7 @@ export class GatewayManager extends EventEmitter {
|
|||||||
}
|
}
|
||||||
|
|
||||||
this.startLock = true;
|
this.startLock = true;
|
||||||
|
const startEpoch = this.bumpLifecycleEpoch('start');
|
||||||
logger.info(`Gateway start requested (port=${this.status.port})`);
|
logger.info(`Gateway start requested (port=${this.status.port})`);
|
||||||
this.lastSpawnSummary = null;
|
this.lastSpawnSummary = null;
|
||||||
this.shouldReconnect = true;
|
this.shouldReconnect = true;
|
||||||
@@ -310,14 +339,17 @@ export class GatewayManager extends EventEmitter {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
while (true) {
|
while (true) {
|
||||||
|
this.assertLifecycleEpoch(startEpoch, 'start');
|
||||||
this.recentStartupStderrLines = [];
|
this.recentStartupStderrLines = [];
|
||||||
try {
|
try {
|
||||||
// Check if Gateway is already running
|
// Check if Gateway is already running
|
||||||
logger.debug('Checking for existing Gateway...');
|
logger.debug('Checking for existing Gateway...');
|
||||||
const existing = await this.findExistingGateway();
|
const existing = await this.findExistingGateway();
|
||||||
|
this.assertLifecycleEpoch(startEpoch, 'start/find-existing');
|
||||||
if (existing) {
|
if (existing) {
|
||||||
logger.debug(`Found existing Gateway on port ${existing.port}`);
|
logger.debug(`Found existing Gateway on port ${existing.port}`);
|
||||||
await this.connect(existing.port, existing.externalToken);
|
await this.connect(existing.port, existing.externalToken);
|
||||||
|
this.assertLifecycleEpoch(startEpoch, 'start/connect-existing');
|
||||||
this.ownsProcess = false;
|
this.ownsProcess = false;
|
||||||
this.setStatus({ pid: undefined });
|
this.setStatus({ pid: undefined });
|
||||||
this.startHealthCheck();
|
this.startHealthCheck();
|
||||||
@@ -328,18 +360,24 @@ export class GatewayManager extends EventEmitter {
|
|||||||
|
|
||||||
// Start new Gateway process
|
// Start new Gateway process
|
||||||
await this.startProcess();
|
await this.startProcess();
|
||||||
|
this.assertLifecycleEpoch(startEpoch, 'start/start-process');
|
||||||
|
|
||||||
// Wait for Gateway to be ready
|
// Wait for Gateway to be ready
|
||||||
await this.waitForReady();
|
await this.waitForReady();
|
||||||
|
this.assertLifecycleEpoch(startEpoch, 'start/wait-ready');
|
||||||
|
|
||||||
// Connect WebSocket
|
// Connect WebSocket
|
||||||
await this.connect(this.status.port);
|
await this.connect(this.status.port);
|
||||||
|
this.assertLifecycleEpoch(startEpoch, 'start/connect');
|
||||||
|
|
||||||
// Start health monitoring
|
// Start health monitoring
|
||||||
this.startHealthCheck();
|
this.startHealthCheck();
|
||||||
logger.debug('Gateway started successfully');
|
logger.debug('Gateway started successfully');
|
||||||
return;
|
return;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
if (error instanceof LifecycleSupersededError) {
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
if (shouldAttemptConfigAutoRepair(error, this.recentStartupStderrLines, configRepairAttempted)) {
|
if (shouldAttemptConfigAutoRepair(error, this.recentStartupStderrLines, configRepairAttempted)) {
|
||||||
configRepairAttempted = true;
|
configRepairAttempted = true;
|
||||||
logger.warn(
|
logger.warn(
|
||||||
@@ -358,6 +396,10 @@ export class GatewayManager extends EventEmitter {
|
|||||||
}
|
}
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
if (error instanceof LifecycleSupersededError) {
|
||||||
|
logger.debug(error.message);
|
||||||
|
return;
|
||||||
|
}
|
||||||
logger.error(
|
logger.error(
|
||||||
`Gateway start failed (port=${this.status.port}, reconnectAttempts=${this.reconnectAttempts}, spawn=${this.lastSpawnSummary ?? 'n/a'})`,
|
`Gateway start failed (port=${this.status.port}, reconnectAttempts=${this.reconnectAttempts}, spawn=${this.lastSpawnSummary ?? 'n/a'})`,
|
||||||
error
|
error
|
||||||
@@ -374,6 +416,7 @@ export class GatewayManager extends EventEmitter {
|
|||||||
*/
|
*/
|
||||||
async stop(): Promise<void> {
|
async stop(): Promise<void> {
|
||||||
logger.info('Gateway stop requested');
|
logger.info('Gateway stop requested');
|
||||||
|
this.bumpLifecycleEpoch('stop');
|
||||||
// Disable auto-reconnect
|
// Disable auto-reconnect
|
||||||
this.shouldReconnect = false;
|
this.shouldReconnect = false;
|
||||||
|
|
||||||
@@ -666,7 +709,7 @@ export class GatewayManager extends EventEmitter {
|
|||||||
|
|
||||||
const { stdout } = await new Promise<{ stdout: string }>((resolve, reject) => {
|
const { stdout } = await new Promise<{ stdout: string }>((resolve, reject) => {
|
||||||
import('child_process').then(cp => {
|
import('child_process').then(cp => {
|
||||||
cp.exec(cmd, { timeout: 5000 }, (err, stdout) => {
|
cp.exec(cmd, { timeout: 5000, windowsHide: shouldHideConsoleWindow() }, (err, stdout) => {
|
||||||
if (err) resolve({ stdout: '' });
|
if (err) resolve({ stdout: '' });
|
||||||
else resolve({ stdout });
|
else resolve({ stdout });
|
||||||
});
|
});
|
||||||
@@ -694,7 +737,11 @@ export class GatewayManager extends EventEmitter {
|
|||||||
if (process.platform === 'win32') {
|
if (process.platform === 'win32') {
|
||||||
// On Windows, use taskkill for reliable process group termination
|
// On Windows, use taskkill for reliable process group termination
|
||||||
import('child_process').then(cp => {
|
import('child_process').then(cp => {
|
||||||
cp.exec(`taskkill /PID ${pid} /T /F`, { timeout: 5000 }, () => { });
|
cp.exec(
|
||||||
|
`taskkill /PID ${pid} /T /F`,
|
||||||
|
{ timeout: 5000, windowsHide: shouldHideConsoleWindow() },
|
||||||
|
() => { }
|
||||||
|
);
|
||||||
}).catch(() => { });
|
}).catch(() => { });
|
||||||
} else {
|
} else {
|
||||||
// SIGTERM first so the gateway can clean up its lock file.
|
// SIGTERM first so the gateway can clean up its lock file.
|
||||||
@@ -797,6 +844,7 @@ export class GatewayManager extends EventEmitter {
|
|||||||
stdio: ['ignore', 'pipe', 'pipe'],
|
stdio: ['ignore', 'pipe', 'pipe'],
|
||||||
detached: false,
|
detached: false,
|
||||||
shell: false,
|
shell: false,
|
||||||
|
windowsHide: shouldHideConsoleWindow(),
|
||||||
env: spawnEnv,
|
env: spawnEnv,
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -1050,6 +1098,7 @@ export class GatewayManager extends EventEmitter {
|
|||||||
stdio: ['ignore', 'pipe', 'pipe'],
|
stdio: ['ignore', 'pipe', 'pipe'],
|
||||||
detached: false,
|
detached: false,
|
||||||
shell: useShell,
|
shell: useShell,
|
||||||
|
windowsHide: shouldHideConsoleWindow(),
|
||||||
env: spawnEnv,
|
env: spawnEnv,
|
||||||
});
|
});
|
||||||
const child = this.process;
|
const child = this.process;
|
||||||
@@ -1557,27 +1606,24 @@ export class GatewayManager extends EventEmitter {
|
|||||||
state: 'reconnecting',
|
state: 'reconnecting',
|
||||||
reconnectAttempts: this.reconnectAttempts
|
reconnectAttempts: this.reconnectAttempts
|
||||||
});
|
});
|
||||||
|
const scheduledEpoch = this.lifecycleEpoch;
|
||||||
|
|
||||||
this.reconnectTimer = setTimeout(async () => {
|
this.reconnectTimer = setTimeout(async () => {
|
||||||
this.reconnectTimer = null;
|
this.reconnectTimer = null;
|
||||||
|
const skipReason = getReconnectSkipReason({
|
||||||
|
scheduledEpoch,
|
||||||
|
currentEpoch: this.lifecycleEpoch,
|
||||||
|
shouldReconnect: this.shouldReconnect,
|
||||||
|
});
|
||||||
|
if (skipReason) {
|
||||||
|
logger.debug(`Skipping reconnect attempt: ${skipReason}`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
try {
|
try {
|
||||||
// Try to find existing Gateway first
|
// Use the guarded start() flow so reconnect attempts cannot bypass
|
||||||
const existing = await this.findExistingGateway();
|
// lifecycle locking and accidentally start duplicate Gateway processes.
|
||||||
if (existing) {
|
await this.start();
|
||||||
await this.connect(existing.port, existing.externalToken);
|
|
||||||
this.ownsProcess = false;
|
|
||||||
this.setStatus({ pid: undefined });
|
|
||||||
this.reconnectAttempts = 0;
|
|
||||||
this.startHealthCheck();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Otherwise restart the process
|
|
||||||
await this.startProcess();
|
|
||||||
await this.waitForReady();
|
|
||||||
await this.connect(this.status.port);
|
|
||||||
this.reconnectAttempts = 0;
|
this.reconnectAttempts = 0;
|
||||||
this.startHealthCheck();
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error('Gateway reconnection attempt failed:', error);
|
logger.error('Gateway reconnection attempt failed:', error);
|
||||||
this.scheduleReconnect();
|
this.scheduleReconnect();
|
||||||
|
|||||||
27
electron/gateway/process-policy.ts
Normal file
27
electron/gateway/process-policy.ts
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
export function shouldHideConsoleWindow(platform: NodeJS.Platform = process.platform): boolean {
|
||||||
|
return platform === 'win32';
|
||||||
|
}
|
||||||
|
|
||||||
|
export function nextLifecycleEpoch(currentEpoch: number): number {
|
||||||
|
return currentEpoch + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function isLifecycleSuperseded(expectedEpoch: number, currentEpoch: number): boolean {
|
||||||
|
return expectedEpoch !== currentEpoch;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ReconnectAttemptContext {
|
||||||
|
scheduledEpoch: number;
|
||||||
|
currentEpoch: number;
|
||||||
|
shouldReconnect: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function getReconnectSkipReason(context: ReconnectAttemptContext): string | null {
|
||||||
|
if (!context.shouldReconnect) {
|
||||||
|
return 'auto-reconnect disabled';
|
||||||
|
}
|
||||||
|
if (isLifecycleSuperseded(context.scheduledEpoch, context.currentEpoch)) {
|
||||||
|
return `stale reconnect callback (scheduledEpoch=${context.scheduledEpoch}, currentEpoch=${context.currentEpoch})`;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
64
tests/unit/gateway-process-policy.test.ts
Normal file
64
tests/unit/gateway-process-policy.test.ts
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
import { describe, expect, it } from 'vitest';
|
||||||
|
import {
|
||||||
|
getReconnectSkipReason,
|
||||||
|
isLifecycleSuperseded,
|
||||||
|
nextLifecycleEpoch,
|
||||||
|
shouldHideConsoleWindow,
|
||||||
|
} from '@electron/gateway/process-policy';
|
||||||
|
|
||||||
|
describe('gateway process policy helpers', () => {
|
||||||
|
describe('shouldHideConsoleWindow', () => {
|
||||||
|
it('returns true on Windows', () => {
|
||||||
|
expect(shouldHideConsoleWindow('win32')).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns false on non-Windows platforms', () => {
|
||||||
|
expect(shouldHideConsoleWindow('darwin')).toBe(false);
|
||||||
|
expect(shouldHideConsoleWindow('linux')).toBe(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('lifecycle epoch helpers', () => {
|
||||||
|
it('increments lifecycle epoch by one', () => {
|
||||||
|
expect(nextLifecycleEpoch(0)).toBe(1);
|
||||||
|
expect(nextLifecycleEpoch(5)).toBe(6);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('detects superseded lifecycle epochs', () => {
|
||||||
|
expect(isLifecycleSuperseded(3, 4)).toBe(true);
|
||||||
|
expect(isLifecycleSuperseded(8, 8)).toBe(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('getReconnectSkipReason', () => {
|
||||||
|
it('skips reconnect when auto-reconnect is disabled', () => {
|
||||||
|
expect(
|
||||||
|
getReconnectSkipReason({
|
||||||
|
scheduledEpoch: 10,
|
||||||
|
currentEpoch: 10,
|
||||||
|
shouldReconnect: false,
|
||||||
|
})
|
||||||
|
).toBe('auto-reconnect disabled');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('skips stale reconnect callbacks when lifecycle epoch changed', () => {
|
||||||
|
expect(
|
||||||
|
getReconnectSkipReason({
|
||||||
|
scheduledEpoch: 11,
|
||||||
|
currentEpoch: 12,
|
||||||
|
shouldReconnect: true,
|
||||||
|
})
|
||||||
|
).toContain('stale reconnect callback');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('allows reconnect when callback is current and reconnect enabled', () => {
|
||||||
|
expect(
|
||||||
|
getReconnectSkipReason({
|
||||||
|
scheduledEpoch: 7,
|
||||||
|
currentEpoch: 7,
|
||||||
|
shouldReconnect: true,
|
||||||
|
})
|
||||||
|
).toBeNull();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
Reference in New Issue
Block a user